# Northwest Port Alliance Report for CARB RFP

This notebook provides a static report for the Seattle and Tacoma port system to support TRG's submission for the CARB RFP. The data and the majority of the code base comes from the [Port Performance Project](https://github.com/epistemetrica/Port-Performance-Project) represents the data work related to the [WSU TRG's](https://ses.wsu.edu/trg/). 

In [2]:
#prelims
import polars as pl
import pandas as pd
import geopandas as gpd
import time
import plotly.express as px
import matplotlib.pyplot as plt
import contextily as cx
import numpy as np
import glob

#enable string cache for polars categoricals
pl.enable_string_cache()
#display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pl.Config(tbl_rows=50);

## Load and Inspect Data

In [3]:
main_df = (
    #read
    pl.read_parquet('port data/dashboard/main.parquet')
    #limit to only Northwest Seaport Alliance
    .filter(pl.col('port_name').is_in(['Seattle, WA', 'Tacoma, WA']))
)
display(main_df.describe())
main_df.head()

statistic,docking_id,mmsi,time,speed,course,heading,status,vessel_name,vessel_type,imo,length,width,draft,cargo,status_previous,status_duration,short,port_type,port_name,port_rank,port_dist,year,month,dock_id,dock_dist,port_lat,port_lon,dock_lat,dock_lon
str,f64,str,str,f64,f64,f64,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str,f64,f64,f64,str,str,f64,f64,f64,f64,f64
"""count""",25415.0,"""25415""","""25415""",25415.0,25379.0,25380.0,25415.0,"""25415""",25415.0,25415.0,25415.0,24901.0,24908.0,19299.0,24688.0,25371.0,25415.0,"""25415""","""25415""",25415.0,25415.0,25415.0,"""25415""","""25415""",25345.0,25415.0,25415.0,25345.0,25345.0
"""null_count""",0.0,"""0""","""0""",0.0,36.0,35.0,0.0,"""0""",0.0,0.0,0.0,514.0,507.0,6116.0,727.0,44.0,0.0,"""0""","""0""",0.0,0.0,0.0,"""0""","""0""",70.0,0.0,0.0,70.0,70.0
"""mean""",878478.695849,,"""2021-05-17 00:19:43.182884""",2.033287,181.024374,188.942711,2.189022,,70.58375,9915300.0,247.226598,34.240593,11.412374,71.282294,2.242182,23053.583028,0.0,,,31.445367,596237.285973,2020.892741,,,595719.829008,47.422402,-122.410401,47.40896,-122.379048
"""std""",493301.089577,,,4.731636,101.50017,100.532552,2.447175,,2.087255,23510000.0,53.193671,6.453565,2.906082,6.04698,2.416935,105832.308527,,,,0.497016,1612900.0,1.836575,,,1615100.0,0.148136,0.045866,0.15958,0.032165
"""min""",3249.0,"""205717000""","""2018-01-01 01:23:57""",0.0,0.0,0.0,0.0,,70.0,6413924.0,104.0,15.0,-12.8,0.0,0.0,0.0,0.0,"""C""","""Seattle, WA""",31.0,132.600895,2018.0,"""201801""","""016F""",1.640486,47.28966,-122.4515,47.253889,-122.502587
"""25%""",505146.0,,"""2019-11-04 13:58:57""",0.0,105.0,125.0,0.0,,70.0,9232280.0,200.0,32.0,9.8,70.0,0.0,496.0,,,,31.0,4841.719471,2019.0,,,200.286598,47.28966,-122.4515,47.266389,-122.405
"""50%""",887209.0,,"""2021-06-27 12:31:16""",0.1,180.0,180.0,1.0,,70.0,9347554.0,229.0,32.0,11.5,70.0,1.0,1566.0,,,,31.0,9005.271394,2021.0,,,3599.989453,47.28966,-122.4515,47.274444,-122.38333
"""75%""",1215511.0,,"""2022-11-20 23:36:22""",0.8,267.3,296.0,5.0,,70.0,9532197.0,294.0,40.0,13.8,71.0,5.0,5596.0,,,,32.0,298755.347558,2022.0,,,302727.151039,47.587711,-122.359218,47.582943,-122.34305
"""max""",1779092.0,"""636093158""","""2024-06-30 23:45:34""",102.3,359.9,359.0,15.0,,89.0,946470400.0,366.0,58.0,22.5,159.0,15.0,3357503.0,0.0,"""C""","""Tacoma, WA""",32.0,31611000.0,2024.0,"""202406""","""nan""",31609000.0,47.587711,-122.359218,47.626389,-122.34


docking_id,mmsi,time,speed,course,heading,status,vessel_name,vessel_type,imo,length,width,draft,cargo,status_previous,status_duration,short,port_type,port_name,port_rank,port_dist,year,month,dock_id,dock_dist,port_lat,port_lon,dock_lat,dock_lon
f64,str,datetime[μs],f64,f64,f64,f64,cat,f64,f64,f64,f64,f64,f64,f64,f64,bool,str,str,f64,f64,i32,str,str,f64,f64,f64,f64,f64
3249.0,"""205717000""",2019-01-11 04:03:17,0.5,113.8,142.0,0.0,"""LA TONDA""",70.0,9748485.0,199.0,,,,5.0,11.0,False,"""C""","""Seattle, WA""",32.0,260345.939838,2019,"""201901""","""0UMV""",265830.127742,47.587711,-122.359218,47.623333,-122.37
3249.0,"""205717000""",2019-01-11 04:15:16,0.1,252.9,154.0,1.0,"""LA TONDA""",70.0,9748485.0,199.0,,,,0.0,53.0,False,"""C""","""Seattle, WA""",32.0,260151.124906,2019,"""201901""","""0UMV""",265633.735792,47.587711,-122.359218,47.623333,-122.37
3249.0,"""205717000""",2019-01-11 05:08:59,9.2,356.4,357.0,0.0,"""LA TONDA""",70.0,9748485.0,199.0,,,,5.0,145609.0,False,"""C""","""Seattle, WA""",32.0,257937.991121,2019,"""201901""","""0UMV""",263410.67524,47.587711,-122.359218,47.623333,-122.37
3249.0,"""205717000""",2019-04-22 07:58:55,0.1,167.8,67.0,1.0,"""LA TONDA""",70.0,9748485.0,199.0,,,,0.0,2116.0,False,"""C""","""Seattle, WA""",32.0,6803.927572,2019,"""201904""","""0UMV""",3174.702145,47.587711,-122.359218,47.623333,-122.37
3249.0,"""205717000""",2019-04-23 19:15:35,0.4,161.6,111.0,0.0,"""LA TONDA""",70.0,9748485.0,199.0,,,,1.0,53.0,False,"""C""","""Seattle, WA""",32.0,6787.748447,2019,"""201904""","""0UMV""",2807.10319,47.587711,-122.359218,47.623333,-122.37


## Generate stats

In [4]:
#init handy variables
earliest_date = main_df['time'].min().date()
latest_date = main_df['time'].max().date()

#define port stats function
def port_stats(df, start_date=earliest_date, end_date=latest_date):
    #create ports stats
    portstats_df = (
        #convert main gdf to polars
        df
        #filter to given date
        .filter(pl.col('time').is_between(start_date, end_date))
        #ensure sorting
        .sort(['mmsi', 'time'])
        #drop messages not associated with a docking event
        .drop_nulls(subset='docking_id')
        .with_columns(
            #sum anchorage time for each docking event
            time_at_anchor = (
                pl.when(pl.col('status')==1)
                .then(pl.col('status_duration'))
                .otherwise(pl.lit(None))
                .sum().over('docking_id')
            ),
            #get monthly vessels and visits
            vessels = pl.col('mmsi').n_unique().over('port_name', 'month'),
            visits = pl.col('docking_id').n_unique().over('port_name', 'month')
        )
        #aggregate to ports
        .group_by('port_name')
        .agg(
            #keep lat and long
            port_lat = pl.col('port_lat').first(),
            port_lon = pl.col('port_lon').first(),
            #get monthly average of unique vessels seen at each port
            vessels_avg = pl.col('vessels').mean(),
            #get monthly average of vessel visits at each port
            visits_avg = pl.col('visits').mean(),
            #get median time at berth in hours
            time_at_berth_median = (
                pl.when(pl.col('status')==5)
                .then(pl.col('status_duration'))
                .otherwise(pl.lit(None))
            ).median()/60,
            #get median time at anchor in hours
            time_at_anchor_median = pl.col('time_at_anchor').median()/60,
            #get mean time at anchor in hours
            time_at_anchor_mean = pl.col('time_at_anchor').mean()/60
        )
        #convert to pandas to that geopandas is happy
        .to_pandas()
    )
    #convert back to geodataframe
    portstats_gdf = (
        gpd.GeoDataFrame(
            portstats_df, 
            geometry=gpd.points_from_xy(portstats_df.port_lon, 
                                        portstats_df.port_lat),
            crs=3857
        )
    )
    return portstats_gdf

#define dock stats function
def dock_stats(df, start_date=earliest_date, end_date=latest_date):
    #create dock stats
    dockstats_df = (
        #convert main gdf to polars
        df
        #filter to given date
        .filter(pl.col('time').is_between(start_date, end_date))
        #ensure sorting
        .sort(['mmsi', 'time'])
        #drop messages not associated with a docking event
        .drop_nulls(subset='docking_id')
        .with_columns(
            #sum anchorage time for each docking event
            time_at_anchor = (
                pl.when(pl.col('status')==1)
                .then(pl.col('status_duration'))
                .otherwise(pl.lit(None))
                .sum().over('docking_id')
            ),
            #get monthly vessels and visits
            vessels = pl.col('mmsi').n_unique().over('dock_id', 'month'),
            visits = pl.col('docking_id').n_unique().over('dock_id', 'month')
        )
        #aggregate to ports
        .group_by('dock_id')
        .agg(
            #keep lat and long
            dock_lat = pl.col('dock_lat').first(),
            dock_lon = pl.col('dock_lon').first(),
            #get monthly average of unique vessels seen at each port
            vessels_avg = pl.col('vessels').mean(),
            #get monthly average of vessel visits at each port
            visits_avg = pl.col('visits').mean(),
            #get median time at berth in hours
            time_at_berth_median = (
                pl.when(pl.col('status')==5)
                .then(pl.col('status_duration'))
                .otherwise(pl.lit(None))
            ).median()/60,
            #get median time at anchor in hours
            time_at_anchor_median = pl.col('time_at_anchor').median()/60,
            #get mean time at anchor in hours
            time_at_anchor_mean = pl.col('time_at_anchor').mean()/60
        )
        #convert to pandas to that geopandas is happy
        .to_pandas()
    )
    #convert back to geodataframe
    dockstats_gdf = (
        gpd.GeoDataFrame(
            dockstats_df, 
            geometry=gpd.points_from_xy(dockstats_df.dock_lon, 
                                        dockstats_df.dock_lat),
            crs=3857
        )
    )
    return dockstats_gdf

In [5]:
#generate stats for each member port
portstats_df = port_stats(main_df)
#inspect
portstats_df

Unnamed: 0,port_name,port_lat,port_lon,vessels_avg,visits_avg,time_at_berth_median,time_at_anchor_median,time_at_anchor_mean,geometry
0,"Tacoma, WA",47.28966,-122.4515,71.683721,108.749645,23.883333,0.0,37.257579,POINT (-122.451 47.290)
1,"Seattle, WA",47.587711,-122.359218,55.838046,76.483831,28.991667,0.0,56.510983,POINT (-122.359 47.588)


In [6]:
#get combined stats for NW Port Alliance
nwpa_stats = (
    #drop port name
    main_df.drop('port_name')
    #create new port name
    .with_columns(port_name = pl.lit('NW Seaport Alliance'))
)
#get stats
nwpa_stats = port_stats(nwpa_stats).drop(['port_lat', 'port_lon', 'geometry'], axis=1)
#inspect
nwpa_stats

Unnamed: 0,port_name,vessels_avg,visits_avg,time_at_berth_median,time_at_anchor_median,time_at_anchor_mean
0,NW Seaport Alliance,125.124321,181.365329,25.516667,0.0,45.833337


In [7]:
dockstats_df = dock_stats(main_df)

In [19]:
# Example: Mapbox version of your scatter_geo map
fig2 = px.scatter_mapbox(
    dockstats_df,
    lon='dock_lon',
    lat='dock_lat',
    size='visits_avg',
    color='time_at_berth_median',
    range_color=[0, 50],
    hover_name='dock_id',
    size_max=20,
    title='Average Visits per Month & Mean Hours at Berth',
    color_continuous_scale=px.colors.sequential.Viridis,
    labels={'time_at_berth_median': 'Median Hours at Berth'},
    height=1000, width=800
)

# Set Mapbox style
fig2.update_layout(
    mapbox_style="carto-positron", 
    mapbox_zoom=9.7, 
    mapbox_center={"lat": dockstats_df['dock_lat'].mean(), 
                   "lon": dockstats_df['dock_lon'].mean()},
)

# Add footnote using add_annotation
fig2.add_annotation(
    text="Note: Circle size corresponds to average vessel visits per month since 2018",
    xref="paper", yref="paper",
    x=0, y=-0.05,
    showarrow=False,
    font=dict(size=14, color="black"),
    align="left"
)

fig2.show()

In [65]:
# Example: Mapbox version of your scatter_geo map
fig2 = px.scatter_mapbox(
    dockstats_df,
    lon='dock_lon',
    lat='dock_lat',
    size='visits_avg',
    color='time_at_anchor_mean',
    range_color=[0, 50],
    hover_name='dock_id',
    size_max=20,
    title='Average Visits per Month & Mean Hours at Anchor',
    color_continuous_scale=px.colors.sequential.Viridis,
    labels={'time_at_anchor_mean': 'Mean Hours at Anchor'},
    height=1000, width=800
)

# Set Mapbox style (e.g., 'carto-positron', 'stamen-terrain', 'open-street-map')
fig2.update_layout(
    mapbox_style="carto-positron",  # Choose a style
    mapbox_zoom=3,  # Adjust zoom level
    mapbox_center={"lat": dockstats_df['dock_lat'].mean(), "lon": dockstats_df['dock_lon'].mean()},  # Center map
)

# Add footnote using add_annotation
fig2.add_annotation(
    text="Note: Circle size corresponds to average vessel visits per month",
    xref="paper", yref="paper",
    x=0, y=-0.05,
    showarrow=False,
    font=dict(size=14, color="black"),
    align="left"
)

fig2.show()

In [30]:
#create map figure for ports
fig = px.scatter_geo(
    portstats_df,
    lon='port_lon',
    lat='port_lat',
    size='visits_avg',
    color='time_at_anchor_median',
    range_color=[0,50],
    hover_name='port_name',
    size_max=20,
    title='Average Visits per Month & Median Hours at Anchor',
    color_continuous_scale=px.colors.sequential.Viridis,
    labels={
        'time_at_anchor_median':'Median Hours at Anchor'
    }
)
# Fit the view to ports
fig.update_geos(fitbounds="locations")
# Add footnote using add_annotation
fig.add_annotation(
    text="Note: Circle size corresponds to average vessel visits per month",  # Footnote text
    xref="paper", yref="paper",  # Position relative to the plot area
    x=0, y=0-0.05,  # Adjust to footnote position
    showarrow=False,  # No arrow, just text
    font=dict(size=14, color="black"),  # Customize the font style
    align="left"
)

fig.show()