In [1]:
import geopandas as gpd
from sedona.spark import *
from delta import *
from pyspark.sql import SparkSession

packages = [
    'io.delta:delta-core_2.12:2.1.1',
    'org.apache.sedona:sedona-spark-shaded-3.4_2.12:1.5.0,'
    'org.datasyslab:geotools-wrapper:1.5.0-28.2'
]

config = SedonaContext.builder()\
     .config("spark.jars.packages", ",".join(packages))\
     .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")\
     .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")\
     .enableHiveSupport()\
     .getOrCreate()

sedona = SedonaContext.create(config)

In [4]:
rawDf = sedona.read.format("csv").option("delimiter", "\t").option("header", "false").load("county_small.tsv")
rawDf.createOrReplaceTempView("rawdf")
rawDf.printSchema()

root
 |-- _c0: string (nullable = true)
 |-- _c1: string (nullable = true)
 |-- _c2: string (nullable = true)
 |-- _c3: string (nullable = true)
 |-- _c4: string (nullable = true)
 |-- _c5: string (nullable = true)
 |-- _c6: string (nullable = true)
 |-- _c7: string (nullable = true)
 |-- _c8: string (nullable = true)
 |-- _c9: string (nullable = true)
 |-- _c10: string (nullable = true)
 |-- _c11: string (nullable = true)
 |-- _c12: string (nullable = true)
 |-- _c13: string (nullable = true)
 |-- _c14: string (nullable = true)
 |-- _c15: string (nullable = true)
 |-- _c16: string (nullable = true)
 |-- _c17: string (nullable = true)



In [6]:
%load_ext sparksql_magic

In [8]:
%%sparksql
show tables in default

0,1,2
namespace,tableName,isTemporary
default,device_data_events,False
default,people10m,False
default,taxi_zone_lookup,False
default,taxi_zones,False
default,trips_data,False
,rawdf,False


In [10]:
%%sparksql
SELECT ST_GeomFromGeoHash('s00twy01mt', 4)

0
"st_geomfromgeohash(s00twy01mt, 4)"
"POLYGON ((0.703125 0.87890625, 0.703125 1.0546875, 1.0546875 1.0546875, 1.0546875 0.87890625, 0.703125 0.87890625))"


In [12]:
%%sparksql
SELECT ST_GeomFromGeoJSON('''{
   "type":"Feature",
   "properties":{
      "STATEFP":"01",
      "COUNTYFP":"077",
      "TRACTCE":"011501",
      "BLKGRPCE":"5",
      "AFFGEOID":"1500000US010770115015",
      "GEOID":"010770115015",
      "NAME":"5",
      "LSAD":"BG",
      "ALAND":6844991,
      "AWATER":32636
   },
   "geometry":{
      "type":"Polygon",
      "coordinates":[
         [
            [-87.621765, 34.873444],
            [-87.617535, 34.873369],
            [-87.62119, 34.85053],
            [-87.62144, 34.865379],
            [-87.621765, 34.873444]
         ]
      ]
   }
}''')

0
"st_geomfromgeojson({  ""type"":""Feature"",  ""properties"":{  ""STATEFP"":""01"",  ""COUNTYFP"":""077"",  ""TRACTCE"":""011501"",  ""BLKGRPCE"":""5"",  ""AFFGEOID"":""1500000US010770115015"",  ""GEOID"":""010770115015"",  ""NAME"":""5"",  ""LSAD"":""BG"",  ""ALAND"":6844991,  ""AWATER"":32636  },  ""geometry"":{  ""type"":""Polygon"",  ""coordinates"":[  [  [-87.621765, 34.873444],  [-87.617535, 34.873369],  [-87.62119, 34.85053],  [-87.62144, 34.865379],  [-87.621765, 34.873444]  ]  ]  } })"
"POLYGON ((-87.621765 34.873444, -87.617535 34.873369, -87.62119 34.85053, -87.62144 34.865379, -87.621765 34.873444))"


In [13]:
%%sparksql
SELECT GeometryType(ST_GeomFromText('LINESTRING(77.29 29.07,77.42 29.26,77.27 29.31,77.29 29.07)'));

0
"geometrytype(st_geomfromtext(LINESTRING(77.29 29.07,77.42 29.26,77.27 29.31,77.29 29.07), 0))"
LINESTRING
