In [1]:
# Set spark environments
import os
os.environ["SPARK_HOME"] = '/home/ypang6/spark-2.4.7-bin-hadoop2.7'
os.environ["PYTHONPATH"] = '/home/ypang6/anaconda3/bin/python3.7'
os.environ['PYSPARK_PYTHON'] = '/home/ypang6/anaconda3/bin/python3.7'
os.environ['PYSPARK_DRIVER_PYTHON'] = '/home/ypang6/anaconda3/bin/python3.7'

In [2]:
from pyspark.sql.types import StructType, StructField, ShortType, StringType, LongType, IntegerType, DoubleType
from pyspark.sql import SparkSession
from geospark.register import GeoSparkRegistrator
from geospark.utils import GeoSparkKryoRegistrator, KryoSerializer
from geospark.register import upload_jars
from geospark.core.geom.envelope import Envelope
from geospark.core.enums import IndexType
from geospark.core.spatialOperator import RangeQuery
from geospark.utils.adapter import Adapter
upload_jars()
spark = SparkSession.builder. \
    master("local[*]"). \
    appName("TestApp"). \
    config("spark.serializer", KryoSerializer.getName). \
    config("spark.kryo.registrator", GeoSparkKryoRegistrator.getName). \
    getOrCreate()
GeoSparkRegistrator.registerAll(spark)

True

In [3]:
myschema = StructType([
    StructField("recType", ShortType(), True),  #1  //track point record type number
    StructField("recTime", StringType(), True),  #2  //seconds since midnigght 1/1/70 UTC
    StructField("fltKey", LongType(), True),  #3  //flight key
    StructField("bcnCode", IntegerType(), True),  #4  //digit range from 0 to 7
    StructField("cid", IntegerType(), True),  #5  //computer flight id
    StructField("Source", StringType(), True),  #6  //source of the record
    StructField("msgType", StringType(), True),  #7
    StructField("acId", StringType(), True),  #8  //call sign
    StructField("recTypeCat", StringType(), True),  #9
    StructField("lat", DoubleType(), True),  #10
    StructField("lon", DoubleType(), True),  #11
    StructField("alt", DoubleType(), True),  #12  //in 100s of feet
    StructField("significance", ShortType(), True),  #13 //digit range from 1 to 10
    StructField("latAcc", DoubleType(), True),  #14
    StructField("lonAcc", DoubleType(), True),  #15
    StructField("altAcc", DoubleType(), True),  #16
    StructField("groundSpeed", IntegerType(), True),  #17 //in knots
    StructField("course", DoubleType(), True),  #18  //in degrees from true north
    StructField("rateOfClimb", DoubleType(), True),  #19  //in feet per minute
    StructField("altQualifier", StringType(), True),  #20  //Altitude qualifier (the “B4 character”)
    StructField("altIndicator", StringType(), True),  #21  //Altitude indicator (the “C4 character”)
    StructField("trackPtStatus", StringType(), True),  #22  //Track point status (e.g., ‘C’ for coast)
    StructField("leaderDir", IntegerType(), True),  #23  //int 0-8 representing the direction of the leader line
    StructField("scratchPad", StringType(), True),  #24
    StructField("msawInhibitInd", ShortType(), True),  #25 // MSAW Inhibit Indicator (0=not inhibited, 1=inhibited)
    StructField("assignedAltString", StringType(), True),  #26
    StructField("controllingFac", StringType(), True),  #27
    StructField("controllingSec", StringType(), True),  #28
    StructField("receivingFac", StringType(), True),  #29
    StructField("receivingSec", StringType(), True),  #30
    StructField("activeContr", IntegerType(), True),  #31  // the active control number
    StructField("primaryContr", IntegerType(), True),  #32  //The primary(previous, controlling, or possible next)controller number
    StructField("kybrdSubset", StringType(), True),  #33  //identifies a subset of controller keyboards
    StructField("kybrdSymbol", StringType(), True),  #34  //identifies a keyboard within the keyboard subsets
    StructField("adsCode", IntegerType(), True),  #35  //arrival departure status code
    StructField("opsType", StringType(), True),  #36  //Operations type (O/E/A/D/I/U)from ARTS and ARTS 3A data
    StructField("airportCode", StringType(), True),  #37
    StructField("trackNumber", IntegerType(), True),  #38
    StructField("tptReturnType", StringType(), True),  #39
    StructField("modeSCode", StringType(), True),  #40
    StructField("sensorTrackNumberList", StringType(), True), #41 //a list of sensor/track number combinations
    StructField("spi", StringType(), True),  #42 // representing the Ident feature
    StructField("dvs", StringType(), True), #43 // indicate the aircraft is within a suppresion volumn area
    StructField("dupM3a", StringType(), True),  #44 // indicate 2 aircraft have the same mode 3a code
    StructField("tid", StringType(), True),  #45 //Aircraft Ident entered by pilot
])


In [4]:
import glob
date=20190801
iff_file_path = glob.glob("/media/ypang6/paralab/Research/data/ZTL/IFF_ZTL_{}*.csv".format(date))[0]

In [5]:
# load iff sector data
df = spark.read.csv(iff_file_path, header=False, sep=",", schema=myschema)

# select columns
cols = ['recType', 'recTime', 'acId', 'lat', 'lon', 'alt']
df = df.select(*cols).filter(df['recType'] == 3).withColumn("recTime", df['recTime'].cast(IntegerType()))

# register pyspark df in SQL
df.registerTempTable("pointtable")

# create shape column in geospark
spatialdf = spark.sql(
    """
    SELECT ST_Point(CAST(lat AS Decimal(24, 20)), CAST(lon AS Decimal(24, 20))) AS geom, recTime, acId
    FROM pointtable
    """)

spatialdf.createOrReplaceTempView("spatialdf")

# register pyspark spatialdf in SQL
spatialdf.registerTempTable("spatialdf")

In [6]:
df.count()

1984654

In [14]:
df.show(truncate=False)

+-------+----------+-----+--------+---------+----+
|recType|recTime   |acId |lat     |lon      |alt |
+-------+----------+-----+--------+---------+----+
|3      |1564634988|SKQ74|36.10444|-79.43917|23.0|
|3      |1564634999|SKQ74|36.11417|-79.43611|26.0|
|3      |1564635012|SKQ74|36.12389|-79.4325 |29.0|
|3      |1564635024|SKQ74|36.13333|-79.42889|32.0|
|3      |1564635036|SKQ74|36.14278|-79.42583|37.0|
|3      |1564635048|SKQ74|36.1525 |-79.42278|40.0|
|3      |1564635059|SKQ74|36.16083|-79.42   |43.0|
|3      |1564635072|SKQ74|36.17139|-79.41667|46.0|
|3      |1564635084|SKQ74|36.18056|-79.41389|48.0|
|3      |1564635096|SKQ74|36.19028|-79.41056|51.0|
|3      |1564635108|SKQ74|36.2    |-79.40722|54.0|
|3      |1564635120|SKQ74|36.21   |-79.40389|58.0|
|3      |1564635132|SKQ74|36.21944|-79.40056|60.0|
|3      |1564635143|SKQ74|36.22833|-79.39778|63.0|
|3      |1564635155|SKQ74|36.23833|-79.39444|66.0|
|3      |1564635167|SKQ74|36.24806|-79.39083|69.0|
|3      |1564635180|SKQ74|36.25

In [7]:
SQL_range_query_result = spark.sql(
  """
    SELECT *
    FROM spatialdf
    WHERE ST_Contains(ST_PolygonFromEnvelope(33.62, 33.64, -84.54, -84.56), geom)
  """)

In [8]:
SQL_range_query_result.show(10, truncate=False)

+--------------------------+----------+-------+
|geom                      |recTime   |acId   |
+--------------------------+----------+-------+
|POINT (29.7275 -82.54639) |1564632158|LN441PC|
|POINT (29.74972 -82.56111)|1564632170|LN441PC|
|POINT (29.7725 -82.57694) |1564632182|LN441PC|
|POINT (29.79444 -82.59222)|1564632194|LN441PC|
|POINT (29.81667 -82.60694)|1564632206|LN441PC|
|POINT (29.83944 -82.62167)|1564632218|LN441PC|
|POINT (29.86167 -82.63611)|1564632230|LN441PC|
|POINT (29.885 -82.65083)  |1564632242|LN441PC|
|POINT (29.90778 -82.66472)|1564632254|LN441PC|
|POINT (29.93111 -82.67889)|1564632266|LN441PC|
+--------------------------+----------+-------+
only showing top 10 rows



In [9]:
SQL_radius_query_result = spark.sql(
  """
  SELECT *
  FROM spatialdf
  WHERE ST_Distance(ST_Point(33.63172, -84.54941), geom) < 0.01
  """)

In [10]:
SQL_radius_query_result.count()

193

In [11]:
spatial_rdd = Adapter.toSpatialRdd(spatialdf, "geom")
range_query_window = Envelope(-84.56, -84.54, 33.62, 33.64)

consider_boundary_intersection = False ## Only return gemeotries fully covered by the window
build_on_spatial_partitioned_rdd = False ## Set to TRUE only if run join query
spatial_rdd.buildIndex(IndexType.QUADTREE, build_on_spatial_partitioned_rdd)

using_index = True

query_result = RangeQuery.SpatialRangeQuery(
    spatial_rdd,
    range_query_window,
    consider_boundary_intersection,
    using_index
)

In [12]:
query_result.count()

0

In [13]:
query_result.map(lambda x: x.geom.length).collect()


[]