In [0]:
import pandas as pd
import geopandas as gpd

from pyspark.sql.types import *
from shapely import wkt

In [0]:
geo_schema = StructType([
  StructField("geometry", StringType(), True),
  StructField("name", StringType(), True),
  StructField("gss_code", StringType(), True),
  StructField("hectares", DoubleType(), True),
  StructField("nonld_area", DoubleType(), True),
  StructField("ons_inner", StringType(), True),
  StructField("sub_2009", StringType(), True),
  StructField("sub_2006", StringType(), True)
])

In [0]:
boroughs_df = spark.read.csv("/FileStore/London_Borough_Excluding_MHW.csv",
                             header = True,
                             schema = geo_schema)

In [0]:
boroughs_df = boroughs_df.drop("gss_code", "nonld_area", "ons_inner", "sub_2009", "sub_2006")

In [0]:
boroughs_df.show(33)

In [0]:
boroughs_pandas_df = boroughs_df.toPandas()

In [0]:
boroughs_pandas_df["geometry"] = boroughs_pandas_df["geometry"].apply(wkt.loads)

In [0]:
boroughs_geo_df = gpd.GeoDataFrame(boroughs_pandas_df, geometry = "geometry")

In [0]:
boroughs_geo_df = boroughs_geo_df.explode(column = "geometry", index_parts = False)

In [0]:
boroughs_geo_df

In [0]:
map = boroughs_geo_df.plot(column = "hectares", cmap = "OrRd", legend = True)

map.set_axis_off()

"Contains National Statistics data © Crown copyright and database right [2015]" and "Contains Ordnance Survey data © Crown copyright and database right [2015]"

In [0]:
boroughs_geo_df = boroughs_geo_df.assign(centroid = boroughs_geo_df["geometry"].centroid)

In [0]:
boroughs_geo_df.info()

In [0]:
boroughs_geo_df["geometry"] = boroughs_geo_df["geometry"].apply(wkt.dumps)
boroughs_geo_df["centroid"] = boroughs_geo_df["centroid"].apply(wkt.dumps)

In [0]:
boroughs_df = spark.createDataFrame(boroughs_geo_df)

In [0]:
%run ./Setup

In [0]:
spark.conf.set("spark.datasource.singlestore.ddlEndpoint", cluster)
spark.conf.set("spark.datasource.singlestore.user", "admin")
spark.conf.set("spark.datasource.singlestore.password", password)
spark.conf.set("spark.datasource.singlestore.disablePushdown", "false")

In [0]:
(boroughs_df.write
   .format("singlestore")
   .option("loadDataCompression", "LZ4")
   .mode("ignore")
   .save("geo_db.london_boroughs"))