# Port Statistics

This notebook develops and explores the various port statistics used in the [Port Performance Project](https://github.com/epistemetrica/Port-Performance-Project). See the README.md file in the main directory for more info.

The primary data set comes from a combination of AIS vessel data and port data, processed in the Port Geodata notebook.

Statistics and final dataframes developed here are used in the Port Performance Dashboard.



In [1]:
#prelims
import polars as pl
import pandas as pd
import geopandas as gpd
import time
import plotly.express as px
import matplotlib.pyplot as plt
import contextily as cx
import numpy as np
import glob
import folium
from folium.plugins import HeatMap

#enable string cache for polars categoricals
pl.enable_string_cache()
#display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pl.Config(tbl_rows=100);

In [2]:
#load and inspect
lf = pl.scan_parquet('port data/dashboard/main.parquet')
display(lf.describe())
lf.limit(5).collect()

statistic,docking_id,mmsi,time,speed,course,heading,status,vessel_name,vessel_type,imo,length,width,draft,cargo,"Albany Port District, NY_in_port_waters","Anacortes, WA_in_port_waters","Baltimore, MD_in_port_waters","Beaumont, TX_in_port_waters","Boston, MA_in_port_waters","Bridgeport, CT_in_port_waters","Brownsville, TX_in_port_waters","Calhoun Port Authority, TX_in_port_waters","Canaveral Port District, FL_in_port_waters","Coos Bay OR, Port of_in_port_waters","Corpus Christi, TX_in_port_waters","Galveston, TX_in_port_waters","Grays Harbor Port District, WA_in_port_waters","Greater Lafourche Port, LA_in_port_waters","Guayama, PR_in_port_waters","Guaynabo, PR_in_port_waters","Hilo, Hawai'i, HI_in_port_waters","Honolulu, O'ahu, HI_in_port_waters","Houston Port Authority, TX_in_port_waters","Jacksonville, FL_in_port_waters","Kahului, Maui, HI_in_port_waters","Kalaeloa Barbers Point, HI_in_port_waters",…,"San Juan, PR_in_port_waters","Searsport, ME_in_port_waters","Seattle, WA_in_port_waters","South Jersey Port Corp, NJ_in_port_waters","South Louisiana, LA, Port of_in_port_waters","Stockton, CA_in_port_waters","Tacoma, WA_in_port_waters","Tampa Port Authority, FL_in_port_waters","Terrebonne Parish Port, LA_in_port_waters","Texas City, TX_in_port_waters","Unalaska Island, AK_in_port_waters","Valdez, AK_in_port_waters","Victoria, TX_in_port_waters","Virgin Islands - St. Croix, VI_in_port_waters","Virginia, VA, Port of_in_port_waters","West St. Mary Parish Port, LA_in_port_waters","Wilmington, DE_in_port_waters","Wilmington, NC_in_port_waters","Yabucoa, PR_in_port_waters",in_port_waters,status_duration,dock_id,dock_name,facility_type,port_name,port_area_desc,port_area_name,port_area_id,dist_to_dock,year,month,vessel_lat,vessel_lon,dock_lat,dock_lon,port_lat,port_lon
str,f64,str,str,f64,f64,f64,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str,str,str,str,str,str,f64,f64,str,f64,f64,f64,f64,f64,f64
"""count""",6531864.0,"""6531864""","""6531864""",6531864.0,6511179.0,6422552.0,6531864.0,"""6531864""",6531864.0,6531836.0,6531864.0,6034333.0,5979758.0,5055474.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,…,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6530351.0,"""6531864""","""6531864""","""6518493""","""6531864""","""6531864""","""3554364""","""6531864""",6531864.0,6531864.0,"""6531864""",6531864.0,6531864.0,6531864.0,6531864.0,6531864.0,6531864.0
"""null_count""",0.0,"""0""","""0""",0.0,20685.0,109312.0,0.0,"""0""",0.0,28.0,0.0,497531.0,552106.0,1476390.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1513.0,"""0""","""0""","""13371""","""0""","""0""","""2977500""","""0""",0.0,0.0,"""0""",0.0,0.0,0.0,0.0,0.0,0.0
"""mean""",5228500.0,,"""2021-04-02 19:12:45.842675""",0.2848,184.639669,183.188419,2.760437,,73.378543,10025000.0,209.19864,32.331777,11.013516,74.033752,0.001512,0.002196,0.026336,0.011178,0.001473,4.1e-05,0.003137,0.001351,0.004604,0.000568,0.011701,0.010328,1.5e-05,0.000412,0.000242,0.010688,0.0,0.006796,0.021497,0.002805,0.000385,0.003124,…,0.010688,0.000743,0.008124,0.003476,0.012415,0.008934,0.00532,1e-05,0.0,0.009896,0.000956,3e-06,0.0,0.0,0.011972,0.0,0.004647,0.005983,0.001882,0.465712,2045.06717,,,,,,,,47289.818318,2020.772401,,32.953566,-94.786453,3920600.0,-10555000.0,3920300.0,-10558000.0
"""std""",2992000.0,,,3.52349,90.161092,104.843372,2.484746,,4.928406,23680000.0,56.970343,7.652324,2.956588,8.277722,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,30915.504658,,,,,,,,404861.581364,1.926209,,6.020352,17.558937,830455.640448,1940300.0,829848.510842,1940400.0
"""min""",148.0,"""205042000""","""2018-01-01 00:09:04""",0.0,0.0,0.0,0.0,,70.0,0.0,101.0,0.0,-12.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""00P8""","""76 LUBRICANTS CO SAVANNAH TERM…","""Anchorage""","""Albany Port District, NY""","""""Portland Harbor"" means the en…","""Albany Port District, NY""","""1506""",1.0245e-08,2018.0,"""201801""",1.31245,-177.04225,2001700.0,-18544000.0,2008600.0,-18540000.0
"""25%""",2520932.0,,"""2019-07-17 22:03:28""",0.0,126.3,90.0,0.0,,70.0,9324629.0,179.0,28.0,9.0,70.0,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,21.0,,,,,,,,88.540379,2019.0,,29.73723,-97.26162,3470000.0,-10829000.0,3471100.0,-10842000.0
"""50%""",5298226.0,,"""2021-04-21 11:29:57""",0.0,184.9,181.0,5.0,,70.0,9499448.0,189.0,32.0,11.0,70.0,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,51.0,,,,,,,,147.327802,2021.0,,30.13001,-91.19952,3521200.0,-10152000.0,3520800.0,-10152000.0
"""75%""",7873517.0,,"""2022-10-31 18:53:30""",0.1,250.6,274.0,5.0,,80.0,9684976.0,229.0,32.0,13.1,80.0,,,,,,,,,,,,,,,,,,,,,,,…,,,,,,,,,,,,,,,,,,,,,123.0,,,,,,,,265.247433,2022.0,,36.94817,-81.13598,4428800.0,-9032100.0,4420500.0,-9027500.0
"""max""",10251314.0,"""725019920""","""2024-09-30 23:53:26""",102.3,361.9,436.0,15.0,,89.0,984903300.0,901.0,86.0,25.5,230.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,…,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,3501927.0,"""1JHK""","""ZEN-NOH GRAIN CORP. WHARF.""","""Tie Off""","""Yabucoa, PR""","""U.S. Census Bureau municipal l…","""Yabucoa, PR""","""99""",33858000.0,2024.0,"""202409""",82.36032,179.09033,8681800.0,-7207500.0,8680600.0,-7206000.0


docking_id,mmsi,time,speed,course,heading,status,vessel_name,vessel_type,imo,length,width,draft,cargo,"Albany Port District, NY_in_port_waters","Anacortes, WA_in_port_waters","Baltimore, MD_in_port_waters","Beaumont, TX_in_port_waters","Boston, MA_in_port_waters","Bridgeport, CT_in_port_waters","Brownsville, TX_in_port_waters","Calhoun Port Authority, TX_in_port_waters","Canaveral Port District, FL_in_port_waters","Coos Bay OR, Port of_in_port_waters","Corpus Christi, TX_in_port_waters","Galveston, TX_in_port_waters","Grays Harbor Port District, WA_in_port_waters","Greater Lafourche Port, LA_in_port_waters","Guayama, PR_in_port_waters","Guaynabo, PR_in_port_waters","Hilo, Hawai'i, HI_in_port_waters","Honolulu, O'ahu, HI_in_port_waters","Houston Port Authority, TX_in_port_waters","Jacksonville, FL_in_port_waters","Kahului, Maui, HI_in_port_waters","Kalaeloa Barbers Point, HI_in_port_waters","Kawaihae, Hawai'i, HI_in_port_waters",…,"San Juan, PR_in_port_waters","Searsport, ME_in_port_waters","Seattle, WA_in_port_waters","South Jersey Port Corp, NJ_in_port_waters","South Louisiana, LA, Port of_in_port_waters","Stockton, CA_in_port_waters","Tacoma, WA_in_port_waters","Tampa Port Authority, FL_in_port_waters","Terrebonne Parish Port, LA_in_port_waters","Texas City, TX_in_port_waters","Unalaska Island, AK_in_port_waters","Valdez, AK_in_port_waters","Victoria, TX_in_port_waters","Virgin Islands - St. Croix, VI_in_port_waters","Virginia, VA, Port of_in_port_waters","West St. Mary Parish Port, LA_in_port_waters","Wilmington, DE_in_port_waters","Wilmington, NC_in_port_waters","Yabucoa, PR_in_port_waters",in_port_waters,status_duration,dock_id,dock_name,facility_type,port_name,port_area_desc,port_area_name,port_area_id,dist_to_dock,year,month,vessel_lat,vessel_lon,dock_lat,dock_lon,port_lat,port_lon
u32,str,datetime[μs],f64,f64,f64,f64,cat,f64,f64,f64,f64,f64,f64,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,…,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,bool,f64,str,str,str,str,str,str,str,f64,i32,str,f64,f64,f64,f64,f64,f64
148,"""205042000""",2021-07-24 13:04:45,0.1,344.3,289.0,0.0,"""DELOS""",80.0,9877767.0,336.0,60.0,11.0,80.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,…,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,176240.0,"""110V""","""ENBRIDGE INGLESIDE ENERGY CENT…","""Dock""","""Corpus Christi, TX""","""Per Port of Corpus Chisti legi…",,"""2436""",810817.734973,2021,"""202107""",28.86163,-90.0203,3226500.0,-10821000.0,3225400.0,-10842000.0
148,"""205042000""",2021-11-23 22:25:16,0.3,3.5,70.0,1.0,"""DELOS""",80.0,9877767.0,336.0,60.0,11.0,80.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,…,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,2221.0,"""110V""","""ENBRIDGE INGLESIDE ENERGY CENT…","""Dock""","""Corpus Christi, TX""","""Per Port of Corpus Chisti legi…",,"""2436""",40077.792485,2021,"""202111""",27.77591,-96.85123,3226500.0,-10821000.0,3225400.0,-10842000.0
148,"""205042000""",2021-11-25 11:26:30,0.6,183.0,148.0,0.0,"""DELOS""",80.0,9877767.0,336.0,60.0,11.0,80.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,…,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,219.0,"""110V""","""ENBRIDGE INGLESIDE ENERGY CENT…","""Dock""","""Corpus Christi, TX""","""Per Port of Corpus Chisti legi…",,"""2436""",40038.429637,2021,"""202111""",27.77814,-96.85123,3226500.0,-10821000.0,3225400.0,-10842000.0
148,"""205042000""",2021-11-25 15:05:50,0.0,202.0,202.0,5.0,"""DELOS""",80.0,9877767.0,336.0,60.0,11.0,80.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,…,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,3054.0,"""110V""","""ENBRIDGE INGLESIDE ENERGY CENT…","""Dock""","""Corpus Christi, TX""","""Per Port of Corpus Chisti legi…","""Corpus Christi, TX""","""2436""",204.903865,2021,"""202111""",27.82006,-97.20766,3226500.0,-10821000.0,3225400.0,-10842000.0
190,"""205042000""",2022-12-12 12:11:45,0.8,17.4,293.0,1.0,"""DELOS""",80.0,9877767.0,336.0,60.0,16.5,80.0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,…,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,18.0,"""0V0U""","""ARCO WESTERN PIPELINE CO BERTH…","""Dock""","""Port of Long Beach, CA""","""As defined per legislation by …",,"""4110""",25964.938571,2022,"""202212""",33.62575,-118.04731,3996200.0,-13160000.0,3993900.0,-13159000.0


In [3]:
#create basic monthly port stats dataframe
port_monththly_df = (
    lf
    #create end of status datetime column
    .with_columns(
        end_time = (pl.col('time') + 
                    pl.duration(minutes=pl.col('status_duration')))
    )
    #group by port and month
    .group_by(['port_name', 'month'])
    .agg(
        #count number of vessels
        vessels = pl.n_unique('mmsi'),
        #count number of vessel calls
        calls = pl.n_unique('docking_id'),
        #time at dock stats for each vessel in hours
        hrs_at_berth_median = (
            pl.when(pl.col('status')==5)
            .then(pl.col('status_duration'))
            .otherwise(pl.lit(None))
        ).median()/60,
        hrs_at_berth_mean = (
            pl.when(pl.col('status')==5)
            .then(pl.col('status_duration'))
            .otherwise(pl.lit(None))
        ).mean()/60,
        #time at anchor stats for each vessel in hours
        hrs_at_anchor_median = (
            pl.when(pl.col('status')==1)
            .then(pl.col('status_duration'))
            .otherwise(pl.lit(None))
        ).median()/60,
        hrs_at_anchor_mean = (
            pl.when(pl.col('status')==1)
            .then(pl.col('status_duration'))
            .otherwise(pl.lit(None))
        ).mean()/60
    )
    #sort by port then month
    .sort(['port_name', 'month'])
    #collect
    .collect()
)


In [4]:
#df for point in time statistics
df = (
    lf
    #create end of status and date columns
    .with_columns(
        end_time = (pl.col('time') + 
                    pl.duration(minutes=pl.col('status_duration'))),
        date = pl.col('time').dt.date()
    )
    #group by port and date
    .group_by(['port_name', 'month', 'date'])
    .agg(
        #number of vessels at dock at noon (should this be at 6am? or 6pm? or all over the above and averaged?)
        vessels_at_dock = (
            #when moored at noon
            pl.when((pl.col('status')==5) & 
                    (pl.time(12).dt.hour()
                     .is_between(pl.col('time').dt.hour(), 
                                 pl.col('end_time').dt.hour())))
            #then count the individual vessels each day
            .then(pl.col('mmsi'))
            .otherwise(pl.lit(None))
            .drop_nulls() #n_unique counts nulls as unique values
            .n_unique()
        ),
        #number of vessels at anchor at noon
        vessels_at_anchor = (
            #when anchored at noon
            pl.when((pl.col('status')==1) & 
                    (pl.time(12).dt.hour()
                     .is_between(pl.col('time').dt.hour(), 
                                 pl.col('end_time').dt.hour())))
            #then count the individual vessels each day
            .then(pl.col('mmsi'))
            .otherwise(pl.lit(None))
            .drop_nulls() #n_unique counts nulls as unique values
            .n_unique()
        )
    )
    #group by month to get averages
    .group_by(['port_name', 'month'])
    .agg(
        #average number of vessels at dock
        vessels_at_dock_at_noon_mean = pl.mean('vessels_at_dock'),
        #average number of vessels at anchor
        vessels_at_anchor_at_noon_mean = pl.mean('vessels_at_anchor')
    )
    #sort by port then date
    .sort(['port_name', 'month'])
    #collect
    .collect()
)

#join to port_monthly_df
port_monththly_df = (
    port_monththly_df
    .join(df, on=['port_name', 'month'], how='left')
)



In [5]:
#inspect
port_monththly_df.head()

port_name,month,vessels,calls,hrs_at_berth_median,hrs_at_berth_mean,hrs_at_anchor_median,hrs_at_anchor_mean,vessels_at_dock_at_noon_mean,vessels_at_anchor_at_noon_mean
str,str,u32,u32,f64,f64,f64,f64,f64,f64
"""Albany Port District, NY""","""201801""",3,43,0.3,0.414634,5.333333,5.333333,0.166667,0.166667
"""Albany Port District, NY""","""201802""",6,58,1.333333,9.267251,0.616667,3.837963,0.333333,0.133333
"""Albany Port District, NY""","""201803""",5,7,15.775,14.529167,39.466667,39.466667,0.333333,0.0
"""Albany Port District, NY""","""201804""",9,30,2.35,17.604938,11.991667,20.558333,0.263158,0.105263
"""Albany Port District, NY""","""201805""",8,53,1.8,8.064103,20.55,29.84,0.6,0.066667


In [6]:
#get means for each port over all time
port_alltime_df = (
    port_monththly_df
    .group_by(['port_name'])
    .agg(
        #average number of vessels at dock
        vessels_at_dock_at_noon_mean = pl.mean('vessels_at_dock_at_noon_mean'),
        #average number of vessels at anchor
        vessels_at_anchor_at_noon_mean = pl.mean('vessels_at_anchor_at_noon_mean'),
        #average time at dock in hours
        hrs_at_berth_median = pl.mean('hrs_at_berth_median'),
        hrs_at_berth_mean = pl.mean('hrs_at_berth_mean'),
        #average time at anchor in hours
        hrs_at_anchor_median = pl.mean('hrs_at_anchor_median'),
        hrs_at_anchor_mean = pl.mean('hrs_at_anchor_mean')
    )
    #sort by port
    .sort(['port_name'])
)

#inspect
port_alltime_df.head()

port_name,vessels_at_dock_at_noon_mean,vessels_at_anchor_at_noon_mean,hrs_at_berth_median,hrs_at_berth_mean,hrs_at_anchor_median,hrs_at_anchor_mean
str,f64,f64,f64,f64,f64,f64
"""Albany Port District, NY""",0.360985,0.047641,9.389506,15.038997,13.703333,18.1363
"""Anacortes, WA""",0.510336,0.077529,3.805864,9.666307,28.969264,40.798921
"""Baltimore, MD""",5.141655,0.257404,1.034877,6.098101,20.794033,124.246689
"""Beaumont, TX""",3.915332,0.671338,1.14465,7.312186,19.619856,38.896071
"""Boston, MA""",0.642309,0.060388,1.930041,11.229007,17.306962,81.831545
