# Ocean Carrier Alliances: Analysis and Modeling 

This notebook analyzes the data processed in the "oca_data_prep" notebook; see the [repo](https://github.com/epistemetrica/Ocean-Carrier-Alliances-Project/blob/main/analysis/oca_data_prep.ipynb) for full details. 

In [1]:
#preliminaries

#preliminaries 
import pandas as pd #v2.1.3
import numpy as np
import polars as pl #v1.1.0
import plotly_express as px #v0.4.1 
import plotly.graph_objects as go
from datetime import datetime
import statsmodels.api as sm
import scipy
import matplotlib.pyplot as plt
import seaborn as sns

#enable string cache for polars categoricals
pl.enable_string_cache()

#load data to lazyframe NOTE modify this step once unified data is finished clustering
exports_lf = pl.scan_parquet('../data/exports/exports.parquet') 
imports_lf = pl.scan_parquet('../data/imports/*.parquet')
#create main lf
main_lf = pl.concat([imports_lf, exports_lf], how='diagonal')

## Data Summary 

In [2]:
display(main_lf.limit(5).collect())
main_lf.describe()

teus,date,origin_territory,origin_region,arrival_port_code,arrival_port_name,departure_port_code,departure_port_name,coast_region,hs_code,carrier_name,carrier_scac,vessel_name,voyage_number,vessel_id,direction,bol_id,year,month,lane_id,lane_name,unified_carrier_name,unified_carrier_scac,vessel_owner,primary_cargo,vessel_lane_pair,date_arrival,dest_territory,dest_region,date_departure
f64,datetime[μs],cat,cat,cat,cat,cat,cat,cat,str,cat,cat,str,str,i32,cat,str,i32,str,cat,cat,cat,cat,cat,bool,cat,datetime[μs],cat,cat,datetime[μs]
2.198764,2005-12-31 00:00:00,"""PHILIPPINES""","""SOUTH EAST ASIA""","""2704""","""LOS ANGELES""","""58309""","""KAOHSIUNG""","""WEST""","""730721""","""HYUNDAI""","""HYMM""","""APL CHINA""","""97""",9074389,"""import""","""HYMM_62321214L12""",2005,"""200512""","""58309_2704""","""Kaohsiung — Los Angeles""","""HYUNDAI""","""HDMU""","""APLU""",False,"""9074389_58309_2704""",2005-12-31 00:00:00,,,
2.198764,2005-12-31 00:00:00,"""MALAYSIA""","""SOUTH EAST ASIA""","""1601""","""CHARLESTON""","""22519""","""COLON PA""","""EAST""","""400121""","""EVERGREEN LINE""","""EVER""","""EVER DAINTY""","""200""",9134232,"""import""","""EVER_090550113466""",2005,"""200512""","""22519_1601""","""Colon Pa — Charleston""","""EVERGREEN LINE""","""SLCC""","""SLCC""",True,"""9134232_22519_1601""",2005-12-31 00:00:00,,,
2.198764,2005-12-31 00:00:00,"""CHINA (MAINLAND)""","""NORTH EAST ASIA""","""2704""","""LOS ANGELES""","""57078""","""YANTIAN""","""WEST""","""009669""","""HATSU MARINE LTD""","""HTML""","""HATSU EXCEL""","""344""",9241322,"""import""","""HTML_149502055056""",2005,"""200512""","""57078_2704""","""Yantian — Los Angeles""","""HATSU MARINE LTD""","""HTML""","""SLCC""",False,"""9241322_57078_2704""",2005-12-31 00:00:00,,,
2.198764,2005-12-31 00:00:00,"""CHINA (MAINLAND)""","""NORTH EAST ASIA""","""2704""","""LOS ANGELES""","""57069""","""XIAMEN""","""WEST""","""640590""","""HYUNDAI""","""HYMM""","""APL CHINA""","""97""",9074389,"""import""","""HYMM_615737517""",2005,"""200512""","""57069_2704""","""Xiamen — Los Angeles""","""HYUNDAI""","""HDMU""","""APLU""",False,"""9074389_57069_2704""",2005-12-31 00:00:00,,,
2.198764,2005-12-31 00:00:00,"""CHINA (MAINLAND)""","""NORTH EAST ASIA""","""2704""","""LOS ANGELES""","""57069""","""XIAMEN""","""WEST""","""732393""","""ORIENT OVERSEAS CONTAINER LINE""","""OOCL""","""NYK ATHENA""","""26""",9247766,"""import""","""OOCL_96352730""",2005,"""200512""","""57069_2704""","""Xiamen — Los Angeles""","""ORIENT OVERSEAS CONTAINER LINE""","""SMMB""","""NYKS""",False,"""9247766_57069_2704""",2005-12-31 00:00:00,,,


statistic,teus,date,origin_territory,origin_region,arrival_port_code,arrival_port_name,departure_port_code,departure_port_name,coast_region,hs_code,carrier_name,carrier_scac,vessel_name,voyage_number,vessel_id,direction,bol_id,year,month,lane_id,lane_name,unified_carrier_name,unified_carrier_scac,vessel_owner,primary_cargo,vessel_lane_pair,date_arrival,dest_territory,dest_region,date_departure
str,f64,str,str,str,str,str,str,str,str,str,str,str,str,str,f64,str,str,f64,str,str,str,str,str,str,f64,str,str,str,str,str
"""count""",237242716.0,"""237242716""","""170344621""","""170344621""","""237242716""","""237242716""","""237242716""","""237242716""","""237135817""","""237241270""","""236978373""","""237242716""","""237242716""","""233594720""",237242716.0,"""237242716""","""237240807""",237242716.0,"""237242716""","""237242716""","""237242716""","""237200871""","""237242716""","""237242716""",237242716.0,"""237242716""","""170844507""","""66354759""","""66354759""","""66329578"""
"""null_count""",0.0,"""0""","""66898095""","""66898095""","""0""","""0""","""0""","""0""","""106899""","""1446""","""264343""","""0""","""0""","""3647996""",0.0,"""0""","""1909""",0.0,"""0""","""0""","""0""","""41845""","""0""","""0""",0.0,"""0""","""66398209""","""170887957""","""170887957""","""170913138"""
"""mean""",2.604211,"""2015-11-12 09:32:47.660812""",,,,,,,,,,,,,9291200.0,,,2015.363832,,,,,,,0.631104,,"""2016-05-03 06:53:29.688093""",,,"""2014-07-29 18:53:10.259862"""
"""std""",3.791839,,,,,,,,,,,,,,387539.223857,,,5.347143,,,,,,,,,,,,
"""min""",0.01,"""2005-01-01 00:00:00""",,,,,,,,"""-1""",,,"""102 SUNG SHIN""","""#c""",196.0,,"""-1_CSHSE0009996""",2005.0,"""200501""",,,,,,0.0,,"""2005-01-01 00:00:00""",,,"""2005-01-01 00:00:00"""
"""25%""",2.0,"""2011-09-02 00:00:00""",,,,,,,,,,,,,9232759.0,,,2011.0,,,,,,,,,"""2012-06-17 00:00:00""",,,"""2009-10-02 00:00:00"""
"""50%""",2.198764,"""2016-04-05 00:00:00""",,,,,,,,,,,,,9320257.0,,,2016.0,,,,,,,,,"""2016-11-08 00:00:00""",,,"""2014-07-19 00:00:00"""
"""75%""",2.65,"""2020-08-06 00:00:00""",,,,,,,,,,,,,9450648.0,,,2020.0,,,,,,,,,"""2020-12-15 00:00:00""",,,"""2019-02-15 00:00:00"""
"""max""",3729.25,"""2024-03-31 00:00:00""",,,,,,,,"""ddedo""",,,"""xin yang shan""","""|SAL5""",9993688.0,,"""zzzz_ZZZZ""",2024.0,"""202403""",,,,,,1.0,,"""2024-03-31 00:00:00""",,,"""2024-03-31 00:00:00"""
