In [0]:
%sh
pip install keplergl geopandas --quiet


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m26.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [0]:
import geopandas as gpd
import pandas as pd
from keplergl import KeplerGl
from pathlib import Path



### BRONZE - Vessel AIS and Anchorage Data

In [0]:
%sql
SELECT count(*) FROM users.david_hurley.ais_records_raw;

count(*)
243511076


In [0]:
%sql
SELECT * FROM users.david_hurley.ais_records_raw LIMIT 1;

mmsi,base_date_time,latitude,longitude,sog,cog,heading,vessel_name,imo,call_sign,vessel_type,status,length,width,draft,Cargo,TransceiverClass
367792830,2024-03-27T11:34:30.000Z,47.62924,-122.38967,0.0,360.0,511.0,ROPOJO,IMO0000000,WDJ4944,37,,14,5,,,B


In [0]:
%sql
SELECT * FROM users.david_hurley.anchorages_raw LIMIT 1;

RCID,anchorage_area
534,"POLYGON((-73.125788 46.054167,-73.12587427409099 46.05329104445415,-73.12612978089903 46.05244875138868,-73.12654470144076 46.05167248965374,-73.12710309055248 46.05099209055247,-73.12778348965375 46.05043370144076,-73.12855975138868 46.05001878089902,-73.12940204445415 46.04976327409099,-73.130278 46.049677,-73.13115395554586 46.04976327409099,-73.13199624861133 46.05001878089902,-73.13277251034626 46.05043370144076,-73.13345290944753 46.05099209055247,-73.13401129855924 46.05167248965374,-73.13442621910097 46.05244875138868,-73.13468172590902 46.05329104445415,-73.13476800000001 46.054167,-73.13468172590902 46.05504295554585,-73.13442621910097 46.05588524861132,-73.13401129855924 46.05666151034626,-73.13345290944753 46.05734190944753,-73.13277251034626 46.05790029855924,-73.13199624861133 46.058315219100976,-73.13115395554586 46.05857072590901,-73.130278 46.058657,-73.12940204445415 46.05857072590901,-73.12855975138868 46.058315219100976,-73.12778348965375 46.05790029855924,-73.12710309055248 46.05734190944753,-73.12654470144076 46.05666151034626,-73.12612978089903 46.05588524861132,-73.12587427409099 46.05504295554585,-73.125788 46.054167))"


## SILVER - Construct Spatial Types

In [0]:
%sql
CREATE OR REPLACE TABLE users.david_hurley.ais_records_transformed AS (
  SELECT 
    mmsi,
    base_date_time,
    vessel_name,
    vessel_type,
    latitude, 
    longitude,
    ST_POINT(longitude, latitude, 4326) as point_geom 
  FROM users.david_hurley.ais_records_raw
);

SELECT * FROM users.david_hurley.ais_records_transformed LIMIT 1;

mmsi,base_date_time,vessel_name,vessel_type,latitude,longitude,point_geom
368344830,2024-03-10T04:25:20.000Z,KOPENG,36,30.33043,-81.43754,SRID=4326;POINT(-81.43754 30.33043)


In [0]:
%sql
CREATE OR REPLACE TABLE users.david_hurley.anchorages_transformed AS (
  SELECT 
    *,
    ST_GEOMFROMTEXT(anchorage_area, 4326) as polygon_geom
  FROM users.david_hurley.anchorages_raw
);

SELECT * FROM users.david_hurley.anchorages_transformed LIMIT 1;

RCID,anchorage_area,polygon_geom
534,"POLYGON((-73.125788 46.054167,-73.12587427409099 46.05329104445415,-73.12612978089903 46.05244875138868,-73.12654470144076 46.05167248965374,-73.12710309055248 46.05099209055247,-73.12778348965375 46.05043370144076,-73.12855975138868 46.05001878089902,-73.12940204445415 46.04976327409099,-73.130278 46.049677,-73.13115395554586 46.04976327409099,-73.13199624861133 46.05001878089902,-73.13277251034626 46.05043370144076,-73.13345290944753 46.05099209055247,-73.13401129855924 46.05167248965374,-73.13442621910097 46.05244875138868,-73.13468172590902 46.05329104445415,-73.13476800000001 46.054167,-73.13468172590902 46.05504295554585,-73.13442621910097 46.05588524861132,-73.13401129855924 46.05666151034626,-73.13345290944753 46.05734190944753,-73.13277251034626 46.05790029855924,-73.13199624861133 46.058315219100976,-73.13115395554586 46.05857072590901,-73.130278 46.058657,-73.12940204445415 46.05857072590901,-73.12855975138868 46.058315219100976,-73.12778348965375 46.05790029855924,-73.12710309055248 46.05734190944753,-73.12654470144076 46.05666151034626,-73.12612978089903 46.05588524861132,-73.12587427409099 46.05504295554585,-73.125788 46.054167))","SRID=4326;POLYGON((-73.125788 46.054167,-73.12587427409099 46.05329104445415,-73.12612978089903 46.05244875138868,-73.12654470144076 46.05167248965374,-73.12710309055248 46.05099209055247,-73.12778348965375 46.05043370144076,-73.12855975138868 46.05001878089902,-73.12940204445415 46.04976327409099,-73.130278 46.049677,-73.13115395554586 46.04976327409099,-73.13199624861133 46.05001878089902,-73.13277251034626 46.05043370144076,-73.13345290944753 46.05099209055247,-73.13401129855924 46.05167248965374,-73.13442621910097 46.05244875138868,-73.13468172590902 46.05329104445415,-73.13476800000001 46.054167,-73.13468172590902 46.05504295554585,-73.13442621910097 46.05588524861132,-73.13401129855924 46.05666151034626,-73.13345290944753 46.05734190944753,-73.13277251034626 46.05790029855924,-73.13199624861133 46.058315219100976,-73.13115395554586 46.05857072590901,-73.130278 46.058657,-73.12940204445415 46.05857072590901,-73.12855975138868 46.058315219100976,-73.12778348965375 46.05790029855924,-73.12710309055248 46.05734190944753,-73.12654470144076 46.05666151034626,-73.12612978089903 46.05588524861132,-73.12587427409099 46.05504295554585,-73.125788 46.054167))"


## SILVER - Peform Spatial Join

In [0]:
df = spark.sql(
  f"""
    SELECT
      *
    FROM users.david_hurley.ais_records_transformed t1, users.david_hurley.anchorages_transformed t2
    WHERE ST_CONTAINS(t2.polygon_geom, t1.point_geom)
    AND t1.vessel_type = 70 -- code 70 is tanker type
    ORDER BY base_date_time, vessel_name DESC
  """
)

print("Number of Tanker Ship AIS Pings at Anchorage: ", df.count())

Number of Tanker Ship AIS Pings at Anchorage:  412871


In [0]:
map = KeplerGl(height=600)

map.add_data(data=df[['base_date_time', 'vessel_name', 'latitude', 'longitude']].limit(40000).toPandas(), name='vessels')
map.add_data(data=df.select("RCID", "anchorage_area").distinct().toPandas(), name='anchorages')

map

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(data={'vessels': {'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,â€¦

## GOLD - H3 Aggregation

In [0]:
%sql
CREATE OR REPLACE TABLE users.david_hurley.ais_records_curated AS
WITH base AS (
  SELECT
    date_trunc('hour', base_date_time) AS hour_ts,
    h3_pointash3(ST_AsText(point_geom), 5) AS h3_res5,
    h3_pointash3(ST_AsText(point_geom), 6) AS h3_res6,
    h3_pointash3(ST_AsText(point_geom), 7) AS h3_res7,
    h3_pointash3(ST_AsText(point_geom), 8) AS h3_res8,
    h3_pointash3(ST_AsText(point_geom), 9) AS h3_res9
  FROM users.david_hurley.ais_records_transformed
  WHERE vessel_type = 70 -- code 70 is tanker type
)
SELECT
  ROW_NUMBER() OVER (ORDER BY hour_ts, h3_res7) AS id,
  7 AS resolution,
  h3_res7 AS h3_cell,
  hour_ts,
  COUNT(*) AS ship_count
FROM base
GROUP BY h3_res7, hour_ts;

SELECT * FROM users.david_hurley.ais_records_curated LIMIT 1;

id,resolution,h3_cell,hour_ts,ship_count
1,7,608658873108135935,2024-03-01T00:00:00.000Z,26
