# Northwest Port Alliance Report for CARB RFP

This notebook provides a static report for the Seattle and Tacoma port system to support TRG's submission for the CARB RFP. The data and the majority of the code base comes from the [Port Performance Project](https://github.com/epistemetrica/Port-Performance-Project) represents the data work related to the [WSU TRG's](https://ses.wsu.edu/trg/). 

In [1]:
#prelims
import polars as pl
import pandas as pd
import geopandas as gpd
import time
import plotly.express as px
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import contextily as cx
import numpy as np
import glob

#enable string cache for polars categoricals
pl.enable_string_cache()
#display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pl.Config(tbl_rows=50);

## Load and Inspect Data

In [2]:
main_df = (
    #read
    pl.read_parquet('port data/dashboard/main.parquet')
    #limit to only Northwest Seaport Alliance
    .filter(pl.col('port_name').is_in(['Seattle, WA', 'Tacoma, WA']))
)
display(main_df.describe())
main_df.head()

statistic,docking_id,mmsi,time,speed,course,heading,status,vessel_name,vessel_type,imo,length,width,draft,cargo,status_previous,status_duration,short,port_type,port_name,port_rank,port_dist,year,month,dock_id,dock_dist,port_lat,port_lon,dock_lat,dock_lon
str,f64,str,str,f64,f64,f64,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str,f64,f64,f64,str,str,f64,f64,f64,f64,f64
"""count""",26440.0,"""26440""","""26440""",26440.0,26407.0,26404.0,26440.0,"""26440""",26440.0,26438.0,26440.0,25915.0,25930.0,20318.0,25692.0,26396.0,26440.0,"""26440""","""26440""",26440.0,26440.0,26440.0,"""26440""","""26440""",26369.0,26440.0,26440.0,26369.0,26369.0
"""null_count""",0.0,"""0""","""0""",0.0,33.0,36.0,0.0,"""0""",0.0,2.0,0.0,525.0,510.0,6122.0,748.0,44.0,0.0,"""0""","""0""",0.0,0.0,0.0,"""0""","""0""",71.0,0.0,0.0,71.0,71.0
"""mean""",922891.387141,,"""2021-06-30 12:25:42.641301""",2.031623,181.334283,189.478867,2.183585,,70.608623,9895600.0,246.99236,34.268532,11.366537,71.251304,2.23085,23235.131687,0.0,,,31.44497,587906.551713,2021.009871,,,587362.205844,47.422284,-122.410437,47.40879,-122.378918
"""std""",517281.269378,,,4.644211,101.208465,100.338901,2.442724,,2.114414,23051000.0,53.345669,6.484736,2.887599,6.053708,2.407398,109073.806092,,,,0.496972,1595200.0,1.898144,,,1597300.0,0.148123,0.045862,0.159534,0.031979
"""min""",620.0,"""205221000""","""2018-01-01 01:23:57""",0.0,0.0,0.0,0.0,,70.0,6413924.0,104.0,15.0,-12.8,0.0,0.0,0.0,0.0,"""C""","""Seattle, WA""",31.0,259.542763,2018.0,"""201801""","""016F""",0.164943,47.28966,-122.4515,47.253889,-122.44305
"""25%""",527083.0,,"""2019-11-26 23:35:31""",0.0,106.4,126.0,0.0,,70.0,9232280.0,200.0,32.0,9.7,70.0,0.0,500.0,,,,31.0,4880.921804,2019.0,,,200.420219,47.28966,-122.4515,47.266389,-122.405
"""50%""",927800.0,,"""2021-07-29 10:26:37""",0.1,180.0,180.0,1.0,,70.0,9347566.0,229.0,32.0,11.5,70.0,1.0,1577.0,,,,31.0,9007.294693,2021.0,,,3667.666482,47.28966,-122.4515,47.274444,-122.38333
"""75%""",1276985.0,,"""2023-01-22 00:45:42""",0.7,267.6,296.0,5.0,,70.0,9542295.0,294.0,40.0,13.6,71.0,5.0,5652.0,,,,32.0,298665.572736,2023.0,,,302277.859243,47.587711,-122.359218,47.582943,-122.34305
"""max""",1862019.0,"""636093294""","""2024-09-30 21:57:37""",102.3,359.9,359.0,15.0,,89.0,946470400.0,366.0,58.0,22.5,159.0,15.0,3357536.0,0.0,"""C""","""Tacoma, WA""",32.0,31725000.0,2024.0,"""202409""","""nan""",31723000.0,47.587711,-122.359218,47.626389,-122.34


docking_id,mmsi,time,speed,course,heading,status,vessel_name,vessel_type,imo,length,width,draft,cargo,status_previous,status_duration,short,port_type,port_name,port_rank,port_dist,year,month,dock_id,dock_dist,port_lat,port_lon,dock_lat,dock_lon
f64,str,datetime[μs],f64,f64,f64,f64,cat,f64,f64,f64,f64,f64,f64,f64,f64,bool,str,str,f64,f64,i32,str,str,f64,f64,f64,f64,f64
620.0,"""205221000""",2024-09-10 21:20:44,0.2,222.1,129.0,0.0,"""LOWLANDS PELIKAAN""",70.0,9700005.0,180.0,30.0,6.8,70.0,5.0,548.0,False,"""C""","""Tacoma, WA""",31.0,1333900.0,2024,"""202409""","""0ZF3""",1329700.0,47.28966,-122.4515,47.268056,-122.36888
620.0,"""205221000""",2024-09-11 06:29:24,0.3,116.9,186.0,1.0,"""LOWLANDS PELIKAAN""",70.0,9700005.0,180.0,30.0,6.8,70.0,0.0,566.0,False,"""C""","""Tacoma, WA""",31.0,1449400.0,2024,"""202409""","""0ZF3""",1445800.0,47.28966,-122.4515,47.268056,-122.36888
620.0,"""205221000""",2024-09-11 15:55:54,7.6,249.3,248.0,0.0,"""LOWLANDS PELIKAAN""",70.0,9700005.0,180.0,30.0,6.8,70.0,1.0,18.0,False,"""C""","""Tacoma, WA""",31.0,1403300.0,2024,"""202409""","""0ZF3""",1399500.0,47.28966,-122.4515,47.268056,-122.36888
620.0,"""205221000""",2024-09-11 16:14:30,0.1,336.0,234.0,1.0,"""LOWLANDS PELIKAAN""",70.0,9700005.0,180.0,30.0,6.8,70.0,0.0,9415.0,False,"""C""","""Tacoma, WA""",31.0,1449500.0,2024,"""202409""","""0ZF3""",1445900.0,47.28966,-122.4515,47.268056,-122.36888
620.0,"""205221000""",2024-09-18 05:09:36,8.9,336.5,330.0,0.0,"""LOWLANDS PELIKAAN""",70.0,9700005.0,180.0,30.0,6.8,70.0,1.0,5414.0,False,"""C""","""Tacoma, WA""",31.0,1447200.0,2024,"""202409""","""0ZF3""",1443600.0,47.28966,-122.4515,47.268056,-122.36888


## Generate stats

In [3]:
#init handy variables
earliest_date = main_df['time'].min().date()
latest_date = main_df['time'].max().date()

#define port stats function
def port_stats(df, start_date=earliest_date, end_date=latest_date):
    #create ports stats
    portstats_df = (
        #convert main gdf to polars
        df
        #filter to given date
        .filter(pl.col('time').is_between(start_date, end_date))
        #ensure sorting
        .sort(['mmsi', 'time'])
        #drop messages not associated with a docking event
        .drop_nulls(subset='docking_id')
        .with_columns(
            #sum anchorage time for each docking event
            time_at_anchor = (
                pl.when(pl.col('status')==1)
                .then(pl.col('status_duration'))
                .otherwise(pl.lit(None))
                .sum().over('docking_id')
            ),
            #get monthly vessels and visits
            vessels = pl.col('mmsi').n_unique().over('port_name', 'month'),
            visits = pl.col('docking_id').n_unique().over('port_name', 'month')
        )
        #aggregate to ports
        .group_by('port_name')
        .agg(
            #keep lat and long
            port_lat = pl.col('port_lat').first(),
            port_lon = pl.col('port_lon').first(),
            #get monthly average of unique vessels seen at each port
            vessels_avg = pl.col('vessels').mean(),
            #get monthly average of vessel visits at each port
            visits_avg = pl.col('visits').mean(),
            #get median time at berth in hours
            time_at_berth_median = (
                pl.when(pl.col('status')==5)
                .then(pl.col('status_duration'))
                .otherwise(pl.lit(None))
            ).median()/60,
            #get median time at anchor in hours
            time_at_anchor_median = pl.col('time_at_anchor').median()/60,
            #get mean time at anchor in hours
            time_at_anchor_mean = pl.col('time_at_anchor').mean()/60
        )
        #convert to pandas to that geopandas is happy
        .to_pandas()
    )
    #convert back to geodataframe
    portstats_gdf = (
        gpd.GeoDataFrame(
            portstats_df, 
            geometry=gpd.points_from_xy(portstats_df.port_lon, 
                                        portstats_df.port_lat),
            crs=3857
        )
    )
    return portstats_gdf

#define dock stats function
def dock_stats(df, start_date=earliest_date, end_date=latest_date):
    #create dock stats
    dockstats_df = (
        #convert main gdf to polars
        df
        #filter to given date
        .filter(pl.col('time').is_between(start_date, end_date))
        #ensure sorting
        .sort(['mmsi', 'time'])
        #drop messages not associated with a docking event
        .drop_nulls(subset='docking_id')
        .with_columns(
            #sum anchorage time for each docking event
            time_at_anchor = (
                pl.when(pl.col('status')==1)
                .then(pl.col('status_duration'))
                .otherwise(pl.lit(None))
                .sum().over('docking_id')
            ),
            #get monthly vessels and visits
            vessels = pl.col('mmsi').n_unique().over('dock_id', 'month'),
            visits = pl.col('docking_id').n_unique().over('dock_id', 'month')
        )
        #aggregate to ports
        .group_by('dock_id', 'port_name')
        .agg(
            #keep lat and long
            dock_lat = pl.col('dock_lat').first(),
            dock_lon = pl.col('dock_lon').first(),
            #get monthly average of unique vessels seen at each port
            vessels_avg = pl.col('vessels').mean(),
            #get monthly average of vessel visits at each port
            visits_avg = pl.col('visits').mean(),
            #get median time at berth in hours
            time_at_berth_median = (
                pl.when(pl.col('status')==5)
                .then(pl.col('status_duration'))
                .otherwise(pl.lit(None))
            ).median()/60,
            #get median time at anchor in hours
            time_at_anchor_median = pl.col('time_at_anchor').median()/60,
            #get mean time at anchor in hours
            time_at_anchor_mean = pl.col('time_at_anchor').mean()/60
        )
        #convert to pandas to that geopandas is happy
        .to_pandas()
    )
    #convert back to geodataframe
    dockstats_gdf = (
        gpd.GeoDataFrame(
            dockstats_df, 
            geometry=gpd.points_from_xy(dockstats_df.dock_lon, 
                                        dockstats_df.dock_lat),
            crs=3857
        )
    )
    return dockstats_gdf

In [4]:
#generate stats for each member port
portstats_df = port_stats(main_df)
#inspect
portstats_df

Unnamed: 0,port_name,port_lat,port_lon,vessels_avg,visits_avg,time_at_berth_median,time_at_anchor_median,time_at_anchor_mean,geometry
0,"Tacoma, WA",47.28966,-122.4515,71.517925,108.658261,23.858333,0.0,37.994551,POINT (-122.451 47.290)
1,"Seattle, WA",47.587711,-122.359218,55.795885,76.343365,29.316667,0.0,65.377577,POINT (-122.359 47.588)


In [5]:
#get combined stats for NW Port Alliance
nwpa_stats = (
    #drop port name
    main_df.drop('port_name')
    #create new port name
    .with_columns(port_name = pl.lit('NW Seaport Alliance'))
)
#get stats
nwpa_stats = port_stats(nwpa_stats).drop(['port_lat', 'port_lon', 'geometry'], axis=1)
#inspect
nwpa_stats

Unnamed: 0,port_name,vessels_avg,visits_avg,time_at_berth_median,time_at_anchor_median,time_at_anchor_mean
0,NW Seaport Alliance,124.875809,181.221373,25.583333,0.0,50.179402


In [6]:
dockstats_df = dock_stats(main_df)

In [7]:
print(dockstats_df.head())

  dock_id    port_name   dock_lat    dock_lon  vessels_avg  visits_avg  \
0    0TD0  Seattle, WA  47.589167 -122.356660     1.000000    1.000000   
1    0ZPT  Seattle, WA  47.552222 -122.342500     1.000000    1.000000   
2    0Z63   Tacoma, WA  47.262500 -122.389720     9.008170   16.365196   
3    0UFJ   Tacoma, WA  47.259722 -122.384440    11.995355   14.086264   
4    01CX   Tacoma, WA  47.277566 -122.430666     1.000000    1.000000   

   time_at_berth_median  time_at_anchor_median  time_at_anchor_mean  \
0             47.500000                    0.0             0.000000   
1             92.633333                    0.0             0.000000   
2             49.300000                    0.0             9.372331   
3             15.216667                    0.0            26.589084   
4             49.066667                    0.0             0.000000   

                  geometry  
0  POINT (-122.357 47.589)  
1  POINT (-122.343 47.552)  
2  POINT (-122.390 47.263)  
3  POINT (-1

In [8]:
seattle_df = dockstats_df[dockstats_df.port_name == 'Seattle, WA']

# seattle mapbox
fig_seattle = px.scatter_mapbox(
    seattle_df,
    lon='dock_lon',
    lat='dock_lat',
    size='visits_avg',
    color='time_at_berth_median',
    range_color=[0, 50],
    hover_name='dock_id',
    size_max=20,
    title='Average Visits per Month & Mean Hours at Berth',
    color_continuous_scale=px.colors.sequential.Viridis,
    labels={'time_at_berth_median': 'Median Hours at Berth'},
    height=500, width=800
)

# Set Mapbox style
fig_seattle.update_layout(
    mapbox_style="carto-positron", 
    mapbox_zoom=10.5, 
    mapbox_center={"lat": seattle_df['dock_lat'].mean(), 
                   "lon": seattle_df['dock_lon'].mean()},
)

# Add footnote using add_annotation
fig_seattle.add_annotation(
    text="Note: Circle size corresponds to average vessel visits per month since 2018",
    xref="paper", yref="paper",
    x=0, y=-0.05,
    showarrow=False,
    font=dict(size=14, color="black"),
    align="left"
)

fig_seattle.show()

In [9]:
tacoma_df = dockstats_df[dockstats_df.port_name == 'Tacoma, WA']

# Example: Mapbox version of your scatter_geo map
fig_seattle = px.scatter_mapbox(
    tacoma_df,
    lon='dock_lon',
    lat='dock_lat',
    size='visits_avg',
    color='time_at_berth_median',
    range_color=[0, 50],
    hover_name='dock_id',
    size_max=20,
    title='Average Visits per Month & Mean Hours at Berth',
    color_continuous_scale=px.colors.sequential.Viridis,
    labels={'time_at_berth_median': 'Median Hours at Berth'},
    height=500, width=800
)

# Set Mapbox style
fig_seattle.update_layout(
    mapbox_style="carto-positron", 
    mapbox_zoom=11, 
    mapbox_center={"lat": tacoma_df['dock_lat'].mean(), 
                   "lon": tacoma_df['dock_lon'].mean()},
)

# Add footnote using add_annotation
fig_seattle.add_annotation(
    text="Note: Circle size corresponds to average vessel visits per month since 2018",
    xref="paper", yref="paper",
    x=0, y=-0.05,
    showarrow=False,
    font=dict(size=14, color="black"),
    align="left"
)

fig_seattle.show()

In [10]:
# Example: Mapbox version of your scatter_geo map
fig2 = px.scatter_mapbox(
    dockstats_df,
    lon='dock_lon',
    lat='dock_lat',
    size='visits_avg',
    color='time_at_anchor_mean',
    range_color=[0, 50],
    hover_name='dock_id',
    size_max=20,
    title='Average Visits per Month & Mean Hours at Anchor',
    color_continuous_scale=px.colors.sequential.Viridis,
    labels={'time_at_anchor_mean': 'Mean Hours at Anchor'},
    height=1000, width=800
)

# Set Mapbox style (e.g., 'carto-positron', 'stamen-terrain', 'open-street-map')
fig2.update_layout(
    mapbox_style="carto-positron",  # Choose a style
    mapbox_zoom=9.5,  # Adjust zoom level
    mapbox_center={"lat": dockstats_df['dock_lat'].mean(), "lon": dockstats_df['dock_lon'].mean()},  # Center map
)

# Add footnote using add_annotation
fig2.add_annotation(
    text="Note: Circle size corresponds to average vessel visits per month",
    xref="paper", yref="paper",
    x=0, y=-0.05,
    showarrow=False,
    font=dict(size=14, color="black"),
    align="left"
)

fig2.show()

In [11]:
#create map figure for ports
fig = px.scatter_geo(
    portstats_df,
    lon='port_lon',
    lat='port_lat',
    size='visits_avg',
    color='time_at_anchor_median',
    range_color=[0,50],
    hover_name='port_name',
    size_max=20,
    title='Average Visits per Month & Median Hours at Anchor',
    color_continuous_scale=px.colors.sequential.Viridis,
    labels={
        'time_at_anchor_median':'Median Hours at Anchor'
    }
)
# Fit the view to ports
fig.update_geos(fitbounds="locations")
# Add footnote using add_annotation
fig.add_annotation(
    text="Note: Circle size corresponds to average vessel visits per month",  # Footnote text
    xref="paper", yref="paper",  # Position relative to the plot area
    x=0, y=0-0.05,  # Adjust to footnote position
    showarrow=False,  # No arrow, just text
    font=dict(size=14, color="black"),  # Customize the font style
    align="left"
)

fig.show()