In [None]:
import geoanalytics
import geoanalytics.sql.functions as ST
import geoanalytics.tracks.functions as TRK
from geoanalytics.tools import *

import pyspark.sql.functions as F
from pyspark.sql import Window

In [None]:
output=None#"C:/dev/demo/uc2025/sandbox/tucson_demo"

In [None]:
from esrimap import EsriJSMap, Renderers, Labels, Popups

# Add display method to DataFrame for additional convenience
from pyspark.sql import DataFrame
DataFrame.display_layer = EsriJSMap.display_layer

In [None]:
points = spark.read.parquet(r"C:\dev\demo\uc2025\sandbox\routes_10000_i15_d50")\
    .select("id", "index", F.to_timestamp("DATE").alias("date"), ST.point("x1", "y1", 3857).alias("geometry"))\
    .persist()
points.show(5, truncate=False)
print("Point Count: " + str(points.count()))

# Load streets data generated from OSM data

Example: https://community.esri.com/t5/geoanalytics-engine-blog/enhancing-a-network-dataset-to-include-from-and-to/ba-p/1372018

In [None]:
streets = spark.read.format("shapefile").load(r"C:/dev/demo/uc2025/sandbox/Tucson1000Int15Dev300/streets")\
  .select("id", "from_node", "to_node", "direction", "geometry")
streets.printSchema()

# Draw streets and points

* Note the connectivity fields on the streets layer

In [None]:
map = EsriJSMap(basemap="streets-navigation-vector")
map.add_layer(streets, color="black", width=1, popup=["id", "from_node", "to_node", "direction"])
map.add_layer(points, color="red", size="4")
map.display()

In [None]:
sample_filter = "id < 5"
sample_points = points.where(sample_filter)

#sample_points.display_layer(basemap="streets-navigation-vector", color="red", size=5, label="index")
sample_points.count()

In [None]:
snapper = SnapTracks()\
  .setTrackFields("id")\
  .setSearchDistance(70, "meters")\
  .setDistanceMethod("planar")\
  .setConnectivityFields("from_node", "to_node")\
  .setDirectionFieldMatching("direction", forward_value="FT", backward_value="TF", none_value="N")

snapped_points = snapper.run(sample_points, streets)\
  .persist()

map = EsriJSMap(basemap="gray-vector") 
map.add_layer(streets.where(ST.bbox_intersects("geometry", *snapped_points.st.get_extent())), color="black", width=1, popup=["from_node", "to_node", "direction"])
map.add_layer(snapped_points.select(ST.linestring(F.array("geometry", "match_geometry")).alias("geom")), color="red", width=2)
map.add_layer(snapped_points.select("match_geometry"), color="red", size=6)
map.display()

In [None]:
end_points = sample_points.groupBy("id").agg(
    F.min_by("geometry", "date").alias("start_point"),
    F.max_by("geometry", "date").alias("end_point")
)

In [None]:
expected_routes = CreateRoutes()\
  .setTravelMode("Driving Time")\
  .setNetwork("C:/dev/smp/Arizona.mmpk")\
  .setStops("start_point", "end_point")\
  .run(end_points)

map = EsriJSMap(basemap="streets-navigation-vector")
map.add_layer(expected_routes.select("route_geometry"), color="blue", width=2)
map.add_layer(snapped_points.select("match_geometry"), color="red", size=6)
map.display()

In [None]:
distance_from_expected = snapped_points.join(expected_routes, "id")\
  .withColumn("dist_from_route", ST.distance("route_geometry", "match_geometry"))\
  .select(*snapped_points.columns, "dist_from_route")\
  .withColumn("on_route", F.col("dist_from_route") < 300)\
  .groupBy("id").agg(
    F.count_if("on_route").alias("num_on_route"),
    F.count_if(F.expr("not on_route")).alias("num_off_route"),
  )\
  .withColumn("percent_on_route", F.expr("(num_on_route / (num_on_route + num_off_route)) * 100"))

distance_from_expected.show()

In [None]:
reverse_geocode = ReverseGeocode()\
  .setLocator(r"C:\dev\smp\Arizona.mmpk")\
  .setFeatureTypes("streetaddress", "streetname")\
  .setOutFields("all")\
  .run(snapped_points.drop("geometry"))\
  .select("id", "date", "match_geometry", "LongLabel", "ShortLabel", "Subregion", "Region", "Neighborhood", "Postal", "PostalExt")\
  .persist()
  
reverse_geocode.select("LongLabel", "ShortLabel", "Subregion", "Region", "Neighborhood", "Postal", "PostalExt").show(5)

reverse_geocode.display_layer(label="Postal", popup=["LongLabel", "ShortLabel", "Subregion", "Region", "Neighborhood", "Postal", "PostalExt"])

In [None]:
window = Window.partitionBy("id").orderBy("date")
segments = reverse_geocode.withColumn("next_date", F.lead("date").over(window))\
                          .withColumn("next_location", F.lead("match_geometry").over(window))\
                          .withColumn("duration", F.col("next_date").cast("long") - F.col("date").cast("long"))\
                          .withColumn("length", ST.distance("next_location", "match_geometry"))\
                          .persist()

segments.show(2, vertical=True, truncate=False)

if output:
    segments.repartition(1).write.format("geoparquet").save(f"{output}/segments")

In [None]:
segments.groupBy("postal").agg(
    F.count("*").alias("count"),
    F.sum("duration").alias("total_duration_seconds"),
    F.sum("length").alias("total_length_meters")
).orderBy(F.desc("count")).show()