# 

# Lane Matching between PIERS and Drewery databases

Problem: PIERS BOL data on ports/lanes and Drewery lane categories do not match

Strategy: 
- geocode ports from both databases
- match on haversine distance to associate PIERS -> Drewery (many:1 matching)
- merge drewery lanes and associated price info into main_lf to allow aggregation and analysis

## Prelims and load data

In [1]:
#preliminaries 
import pandas as pd 
import polars as pl
import numpy as np
import geopy
import geopy.distance
from geopy.geocoders import Bing
from geopy.extra.rate_limiter import RateLimiter

#display settings
pd.set_option('display.max_columns', None)

#enable string cache for polars categoricals
pl.enable_string_cache()

#load exports from PIERS
exports_lf = (
    pl.scan_parquet('../data/main/*.parquet')
    #limit to exports
    .filter(pl.col('direction')=='export')
)

#load drewery data
drewery_df = (
    #load CSV
    pl.read_csv('../data/rates/tidy_rates.csv')
    #filter by US ports
    .filter(pl.col('route').str.contains('US '))
    #drop lanes containing "via" - these are not coast ports
    .filter(~pl.col('route').str.contains(' via '))
)

#get unified ports, regions and territories over lane_ids (NOTE this step may live in oca_data_prep)
exports_lf = (
    exports_lf
    .with_columns(
        pl.col('origin_territory').drop_nulls().mode().first().over('lane_id').alias('origin_territory'),
        pl.col('origin_region').drop_nulls().mode().first().over('lane_id').alias('origin_region'),
        pl.col('dest_territory').drop_nulls().mode().first().over('lane_id').alias('dest_territory'),
        pl.col('dest_region').drop_nulls().mode().first().over('lane_id').alias('dest_region'),
        pl.col('arrival_port_name').drop_nulls().mode().first().over('arrival_port_code').alias('arrival_port_name'),
        pl.col('departure_port_name').drop_nulls().mode().first().over('departure_port_code').alias('departure_port_name'),
        pl.col('us_port').drop_nulls().mode().first().over('departure_port_code').alias('us_port')
    )
)

In [2]:
#get piers_lanes_df (used to merge back into main_lf)
piers_lanes_df = (
    exports_lf
    #select columns
    .select('lane_id', 'lane_name', 'origin_territory', 'departure_port_name', 
            'coast_region', 'dest_territory', 'arrival_port_name', 'direction')
    #group by to get modes (NOTE: territory data is uncommonly messy/incorrect; this step avoids gets around that issue)
    .group_by('direction', 'lane_id')
    .agg(
        pl.all().mode().first()
    )
    #construct origin and destination port names for geocoder
    .with_columns(
        pl.when(pl.col('direction')=='import')
        .then(pl.col('origin_territory').cast(pl.Utf8)+' '+pl.col('departure_port_name').cast(pl.Utf8))
        .otherwise('US Port of '+pl.col('departure_port_name').cast(pl.Utf8))
        .alias('piers_origin'),
        pl.when(pl.col('direction')=='import')
        .then('US Port of '+pl.col('arrival_port_name').cast(pl.Utf8))
        .otherwise(pl.col('dest_territory').cast(pl.Utf8)+' '+pl.col('arrival_port_name').cast(pl.Utf8))
        .alias('piers_dest')
    )
    #drop unnessary cols
    .drop('origin_territory', 'departure_port_name', 'coast_region', 'dest_territory', 'arrival_port_name', 'direction')
    #recast to categorical data
    .cast(pl.Categorical)
    #drop duplicates
    .unique()
    #drop nulls
    .drop_nulls()
    #collect to memory
    .collect()
)

#get piers_ports_df
#convert origin ports to series
piers_ports = (
    piers_lanes_df
    .select('piers_origin')
    .rename({'piers_origin':'piers_ports'})
    .drop_nulls()
    .unique()
    .to_series()
)
#append dest ports
piers_ports_df = (
    pl.DataFrame(
        piers_ports.append(
            piers_lanes_df
            .select('piers_dest')
            .drop_nulls()
            .unique()
            .to_series()
        )
    )
    #cast to strings
    .cast(pl.Utf8)
    #convert to pandas
    .to_pandas()
)

In [3]:
piers_lanes_df.head()

lane_id,lane_name,piers_origin,piers_dest
cat,cat,cat,cat
"""5204_23630""","""W Palm Bch — Arawak Cay""","""US Port of W PALM BCH""","""BAHAMAS ARAWAK CAY"""
"""2904_20100""","""Portland Or — Colima""","""US Port of PORTLAND OR""","""MEXICO ALTAMIRA"""
"""4107_05515""","""Sandusky — Courtright""","""US Port of SANDUSKY""","""CANADA COURTRIGHT"""
"""2908_55751""","""Vancouver Wa — Pt Kelang""","""US Port of VANCOUVER WA""","""MALAYSIA PT KELANG"""
"""1703_74825""","""Savannah — Abidjan""","""US Port of SAVANNAH""","""IVORY COAST ABIDJAN"""


In [4]:
#get drewery_lanes_df
drewery_lanes_df = (
    drewery_df
    .select('route')
    .unique()
    #split route col
    .with_columns(
        pl.col('route').str.split_exact(by=' to ', n=1)
        .alias('split')
    )
    #unnest into separate cols
    .unnest('split')
    #rename
    .rename({
        'field_0':'drewery_origin',
        'field_1':'drewery_dest'
    })
    #drop nulls
    .drop_nulls()
)

#get drewery_ports
#convert origin col to series
drewery_ports = (
    drewery_lanes_df
    .select('drewery_origin')
    .rename({'drewery_origin':'drewery_port'})
    .drop_nulls()
    .unique()
    .to_series()
)
#append dest col
drewery_ports_df = (
    pl.DataFrame(
        drewery_ports.append(
            drewery_lanes_df
            .select('drewery_dest')
            .drop_nulls()
            .unique()
            .to_series()
        )
    )
    #drop non-coast ports
    .filter(~pl.col('drewery_port').str.contains(' via '))
    #convert to pandas
    .to_pandas()
)

## Geocode

In [5]:
def geocoder_trg(locations, bing_rest_api_key='Am19ZYf8qoO0j2DJGJDu6oZJkhtyvG9v9-8zJ-RDowSZ8QIKLMbjDIq0w7qAzSv1', 
                 df_export=False):
    '''
    Converts location inputs to geographic coordinates (decimal degrees format, datum WGS-84) using the Bing REST Services geocoder API 
    INPUTS:
        locations - array-like - the address/es or place name/s to be geocoded.
        bing_rest_api_key - an API key issued by Bing Rest Services. Uses Adam Wilson's by default.
        df_export - boolean - default=False - when True, returns a pandas dataframe containing the 'locations' inputs in the first column, 
                    the latitude in the second column, and the longitude in the third column.  
    RETURNS:
        when df_export = False (default), returns a list of (lat, long) tuples corresponding to the 'locations' input list. Uninterpretable
                    inputs are listed as np.NaN.
        when df_export = True, returns a pandas dataframe containing the 'locations' inputs in the first column, 
                    the latitude in the second column, and the longitude in the third column.
    RELIES ON:
        pandas
        numpy
        geopy
        Bing from geopy.geocoders
        RateLimiter from geopy.extras
    '''
    #define geocoder function
    def geocoder_latlong(loc):
        '''returns latitute and longitude of given location if interpretable by Bing, else NaN'''
        #instantiate Bing client
        geocoder_bing = Bing(bing_rest_api_key)
        #rate limit
        geocoder_bing = RateLimiter(geocoder_bing.geocode, min_delay_seconds=0.5)
        #geocode location
        geoloc = geocoder_bing(loc)
        #return latitude and longitude results 
        if type(geoloc) == geopy.Location:
            return geoloc.latitude, geoloc.longitude
        else:
            return np.NaN, np.NaN
    #coerse locations input to pd.Series
    locations = pd.Series(locations)
    #init df
    df = pd.DataFrame({'locations': locations})
    #apply geocoder to each location 
    df[['lat', 'long']] = df.apply(lambda row: geocoder_latlong(row), axis=1, result_type='expand')
    #create coordinate list
    coord_list = [coord if ~np.isnan(coord[0]) else np.NaN for coord in list(zip(df.lat, df.long))]
    #return results 
    if df_export:
        return df
    elif len(df)==1:
        return coord_list[0]
    else:
        return coord_list

In [6]:
%%script echo skipping #api calls are limited; only execute when necessary

#geocode drewery ports
drewery_ports_df['drewery_port_loc'] = (
    drewery_ports_df.drewery_port
    .apply(lambda r: geocoder_trg(r))
    .dropna()
)

#geocode piers ports
piers_ports_df['piers_port_loc'] = (
    piers_ports_df.piers_ports
    .apply(lambda r: geocoder_trg(r))
    .dropna()
)

#save geolocations
drewery_ports_df.to_parquet('../data/misc/drewery_port_geolocations.parquet')
piers_ports_df.to_parquet('../data/misc/piers_port_geolocations.parquet')

skipping #api calls are limited; only execute when necessary


## Match on Haversine Distance

In [7]:
#%%script echo skipping
#load previously geocoded data
drewery_ports_df = pl.read_parquet('../data/misc/drewery_port_geolocations.parquet').to_pandas()
piers_ports_df = pl.read_parquet('../data/misc/piers_port_geolocations.parquet').to_pandas()

In [8]:
#merge distances back to lanes

#merge drewery origin locs
drewery_loc_lanes_df = (
    drewery_lanes_df
    .join(
        pl.DataFrame(drewery_ports_df),
        left_on='drewery_origin',
        right_on='drewery_port',
    )
    .rename({'drewery_port_loc':'drewery_origin_loc'})
)
#merge drewery dest locs
drewery_loc_lanes_df = (
    drewery_loc_lanes_df
    .join(
        pl.DataFrame(drewery_ports_df),
        left_on='drewery_dest',
        right_on='drewery_port',
    )
    .rename({'drewery_port_loc':'drewery_dest_loc'})
    .unique()
)

#merge drewery origin locs
piers_loc_lanes_df = (
    piers_lanes_df
    .join(
        pl.DataFrame(piers_ports_df).cast({'piers_ports':pl.Categorical}),
        left_on='piers_origin',
        right_on='piers_ports',
    )
    .rename({'piers_port_loc':'piers_origin_loc'})
)
#merge drewery dest locs
piers_loc_lanes_df = (
    piers_loc_lanes_df
    .join(
        pl.DataFrame(piers_ports_df).cast({'piers_ports':pl.Categorical}),
        left_on='piers_dest',
        right_on='piers_ports',
    )
    .rename({'piers_port_loc':'piers_dest_loc'})
    .unique()
)

#inspect
display(drewery_loc_lanes_df.head())
piers_loc_lanes_df.head()

route,drewery_origin,drewery_dest,drewery_origin_loc,drewery_dest_loc
str,str,str,list[f64],list[f64]
"""US East Coast (New York) to Ru…","""US East Coast (New York)""","""Russia (St Petersburg)""","[40.713047, -74.007233]","[59.938732, 30.316229]"
"""India (Chennai) to US East Coa…","""India (Chennai)""","""US East Coast (New York)""","[13.072092, 80.201859]","[40.713047, -74.007233]"
"""US Gulf Coast (Houston) to Arg…","""US Gulf Coast (Houston)""","""Argentina (Buenos Aires)""","[29.760803, -95.369507]","[-34.607338, -58.443287]"
"""North China (Tianjin) to US We…","""North China (Tianjin)""","""US West Coast (Los Angeles)""","[39.294743, 117.335098]","[34.052238, -118.243347]"
"""US Gulf Coast (Houston) to Aus…","""US Gulf Coast (Houston)""","""Australia (Melbourne)""","[29.760803, -95.369507]","[-37.81546, 144.967163]"


lane_id,lane_name,piers_origin,piers_dest,piers_origin_loc,piers_dest_loc
cat,cat,cat,cat,list[f64],list[f64]
"""2904_30151""","""Portland Or — Buenaventura""","""US Port of PORTLAND OR""","""COLOMBIA BUENAVENTURA""","[45.516018, -122.681427]","[3.888193, -77.07383]"
"""1901_27700""","""Mobile — St Eustatius""","""US Port of MOBILE""","""BONAIRE BONAIRE""","[39.503571, -99.018341]","[12.184975, -68.290291]"
"""4909_20100""","""San Juan — Altamira""","""US Port of SAN JUAN""","""MEXICO ALTAMIRA""","[18.466303, -66.105232]","[22.389076, -97.940643]"
"""5301_41685""","""Houston — Nigg Bay""","""US Port of HOUSTON""","""UNITED KINGDOM NIGG BAY""","[29.748505, -95.292221]","[57.71833, -4.061262]"
"""4904_21531""","""Yabucoa — Pto Cortes""","""US Port of YABUCOA""","""HONDURAS PTO CORTES""","[39.503571, -99.018341]","[14.822356, -86.5979]"


In [9]:
matched_df = (
    #cross join piers and drewery tables
    piers_loc_lanes_df.join(drewery_loc_lanes_df, how='cross')
    #convert to pandas
    .to_pandas()
)


In [10]:
#match on summed haversine distance
def haversine(row, col1, col2):
    return geopy.distance.great_circle(row[col1], row[col2]).km

df = matched_df
df['origin_dist'] = (
    df.apply(lambda r: haversine(row=r, col1='piers_origin_loc', col2='drewery_origin_loc'), axis=1)
)
df['dest_dist'] = (
    df.apply(lambda r: haversine(row=r, col1='piers_dest_loc', col2='drewery_dest_loc'), axis=1)
)
df['dist'] = df.origin_dist + df.dest_dist
df.head()

Unnamed: 0,lane_id,lane_name,piers_origin,piers_dest,piers_origin_loc,piers_dest_loc,route,drewery_origin,drewery_dest,drewery_origin_loc,drewery_dest_loc,origin_dist,dest_dist,dist
0,2904_30151,Portland Or — Buenaventura,US Port of PORTLAND OR,COLOMBIA BUENAVENTURA,"[45.51601791, -122.681427]","[3.88819289, -77.07382965]",US East Coast (New York) to Russia (St Petersb...,US East Coast (New York),Russia (St Petersburg),"[40.71304703, -74.00723267]","[59.93873215, 30.31622887]",3925.760476,10586.082178,14511.842655
1,2904_30151,Portland Or — Buenaventura,US Port of PORTLAND OR,COLOMBIA BUENAVENTURA,"[45.51601791, -122.681427]","[3.88819289, -77.07382965]",India (Chennai) to US East Coast (New York),India (Chennai),US East Coast (New York),"[13.07209206, 80.20185852]","[40.71304703, -74.00723267]",13106.579995,4106.23974,17212.819735
2,2904_30151,Portland Or — Buenaventura,US Port of PORTLAND OR,COLOMBIA BUENAVENTURA,"[45.51601791, -122.681427]","[3.88819289, -77.07382965]",US Gulf Coast (Houston) to Argentina (Buenos A...,US Gulf Coast (Houston),Argentina (Buenos Aires),"[29.76080322, -95.36950684]","[-34.60733795, -58.4432869]",2951.162805,4703.600199,7654.763004
3,2904_30151,Portland Or — Buenaventura,US Port of PORTLAND OR,COLOMBIA BUENAVENTURA,"[45.51601791, -122.681427]","[3.88819289, -77.07382965]",North China (Tianjin) to US West Coast (Los An...,North China (Tianjin),US West Coast (Los Angeles),"[39.29474258, 117.33509827]","[34.05223846, -118.24334717]",8849.121545,5413.340293,14262.461838
4,2904_30151,Portland Or — Buenaventura,US Port of PORTLAND OR,COLOMBIA BUENAVENTURA,"[45.51601791, -122.681427]","[3.88819289, -77.07382965]",US Gulf Coast (Houston) to Australia (Melbourne),US Gulf Coast (Houston),Australia (Melbourne),"[29.76080322, -95.36950684]","[-37.81546021, 144.96716309]",2951.162805,14324.539946,17275.702751


In [11]:
matched_df = (
    pl.DataFrame(df)
    .sort(by='dist')
    .group_by('lane_id')
    .agg(
        pl.col('route').first(),
        pl.col('dist').min()
    )
)

In [12]:
matched_df.head()

lane_id,route,dist
cat,str,f64
"""5301_28319""","""US Gulf Coast (Houston) to Col…",1648.57357
"""2101_24865""","""US Gulf Coast (Houston) to Col…",2759.834358
"""5203_76283""","""US Gulf Coast (Houston) to Ken…",4517.955113
"""0401_47571""","""US Gulf Coast (Houston) to Wes…",1510.075682
"""5203_53309""","""US Gulf Coast (Houston) to Ind…",1799.740808


In [13]:
matched_df.write_csv('../data/misc/matched_lanes_dist.csv')

## Merge back to main lf

In [14]:
exports_lf = (
    exports_lf.join(
        matched_df.lazy(),
        on='lane_id',
        how='left'
    )
    .rename({'route':'drewery_lane'})
)

In [15]:
#drop old rate cols
exports_lf = exports_lf.drop('rate_20', 'rate_40', 'route_right')

In [16]:
exports_lf.limit(5).collect()

teus,date_raw,origin_territory,origin_region,arrival_port_code,arrival_port_name,departure_port_code,departure_port_name,coast_region,hs_code,carrier_name,carrier_scac,vessel_name,voyage_number,vessel_id,direction,bol_id,year,month,lane_id,lane_name,dest_territory,dest_region,unified_carrier_name,unified_carrier_scac,vessel_owner,primary_cargo,shared_teus,us_port,vessel_port_pair,date,alliance,alliance_member,pc_alliance,cargo_source,vessel_capacity,drewery_lane,dist
f64,datetime[μs],cat,cat,cat,cat,cat,cat,cat,str,cat,cat,str,str,i32,cat,str,i32,str,cat,cat,cat,cat,cat,cat,cat,bool,f64,cat,cat,datetime[μs],str,bool,str,str,f64,cat,f64
2.533158,2007-06-08 00:00:00,,,"""58201""","""HONG KONG""","""2709""","""LONG BEACH""","""WEST""","""391190""","""HANJIN SHIPPING COMPANY LTD""","""HJSC""","""PEKING SENATOR""","""55""",9141273,"""export""","""HJSC_LGBA12558801""",2007,"""200706""","""2709_58201""","""Long Beach — Hong Kong""","""HONG KONG""","""NORTH EAST ASIA""","""HANJIN SHIPPING COMPANY LTD""","""HJSC""","""HJSC""",True,0.0,"""2709""","""9141273_2709""",2007-06-08 00:00:00,"""CYKH""",True,"""CYKH""","""ally""",2265.882353,"""US West Coast (Los Angeles) to…",49.518882
2.533158,2007-02-17 00:00:00,,,"""35525""","""MONTEVIDEO""","""4601""","""NEW YORK""","""EAST""","""842959""","""MEDITERRANEAN SHIPPING COMPANY""","""MDSC""","""MSC VIENNA""","""4""",9253296,"""export""","""MDSC_MSCUNW606072""",2007,"""200702""","""4601_35525""","""New York — Montevideo""","""URUGUAY""","""EAST COAST SOUTH AMERICA""","""MEDITERRANEAN SHIPPING COMPANY""","""MSCU""","""MSCU""",True,0.0,"""4601""","""9253296_4601""",2007-02-17 00:00:00,"""Non-alliance Carriers""",False,"""Non-alliance Carriers""","""non-ally""",1764.485294,"""US East Coast (New York) to Ch…",214.317611
2.533158,2007-05-11 00:00:00,,,"""58309""","""KAOHSIUNG""","""2709""","""LONG BEACH""","""WEST""","""720449""","""HANJIN SHIPPING COMPANY LTD""","""HJSC""","""PENANG SENATOR""","""48""",9139490,"""export""","""HJSC_ESEA00210704""",2007,"""200705""","""2709_58309""","""Long Beach — Kaohsiung""","""TAIWAN""","""NORTH EAST ASIA""","""HANJIN SHIPPING COMPANY LTD""","""HJSC""","""HJSC""",True,0.0,"""2709""","""9139490_2709""",2007-05-11 00:00:00,"""CYKH""",True,"""CYKH""","""ally""",2265.882353,"""US West Coast (Los Angeles) to…",33.234866
2.533158,2007-02-09 00:00:00,,,"""22556""","""PUNTA MANZANI""","""2709""","""LONG BEACH""","""WEST""","""841182""","""WALLENIUS WILHELMEN LINES""","""WWLN""","""TALISMAN""","""703""",9191319,"""export""","""WWLN_US566709""",2007,"""200702""","""2709_22556""","""Long Beach — Punta Manzani""","""PANAMA""","""CENTRAL AMERICA""","""WALLENIUS WILHELMEN LINES""","""WLWH""","""WLWH""",True,0.0,"""2709""","""9191319_2709""",2007-02-09 00:00:00,"""Non-alliance Carriers""",False,"""Non-alliance Carriers""","""non-ally""",1740.735294,"""US West Coast (Los Angeles) to…",578.195262
2.533158,2007-05-01 00:00:00,,,"""50805""","""ASHDOD""","""1703""","""SAVANNAH""","""EAST""","""4805""","""ZIM CONTAINER""","""ZIML""","""ZIM NEW YORK""","""24""",9231810,"""export""","""ZIML_195871""",2007,"""200705""","""1703_50805""","""Savannah — Ashdod""","""ISRAEL""","""MEDITERRANEAN""","""ZIM CONTAINER""","""ZIMU""","""ZIMU""",True,0.0,"""1703""","""9231810_1703""",2007-05-01 00:00:00,"""Non-alliance Carriers""",False,"""Non-alliance Carriers""","""non-ally""",2470.882353,"""US (Baltimore) to Israel (Ashd…",1380.783547


In [17]:
#prep drewery df for merge
df = (
    #convert to polars because I apparently live here now
    pl.DataFrame(drewery_df)
    #choose cols
    .select('route', 'container_type', 'date', 'rate')
    #drop duplicates on relevant cols
    .unique(subset=['route', 'container_type', 'date'])
    #pivot container type
    .pivot('container_type', values='rate')
    #rename
    .rename({
        '40ft Dry':'rate_40',
        '20ft Dry':'rate_20'
    })
    #convert date to dt
    .with_columns(
        pl.col('date').str.to_date(format='%Y-%m')
    )
    #drop rows with missing prices
    .drop_nulls(subset='rate_20')
)

In [18]:
df.head()

route,date,rate_20,rate_40
str,date,f64,f64
"""US East Coast (New York) to Ho…",2019-08-01,810.0,1010.0
"""US West Coast (Los Angeles) to…",2020-03-01,1760.0,2090.0
"""West Med (Genoa) to US East Co…",2023-01-01,6280.0,7440.0
"""North Continent Europe (Rotter…",2017-02-01,1260.0,1440.0
"""US West Coast (Los Angeles) to…",2022-09-01,3920.0,7080.0


In [19]:
#join asof date with exports
lf = (
    exports_lf
    #sort by date and recast to enable join_asof
    .sort(by='date')
    .cast({'date':pl.Date})
    #join 
    .join_asof(
        df.lazy().sort(by='date').cast({'route':pl.Categorical}),
        on='date',
        by_left='drewery_lane',
        by_right='route'
    )
)

In [20]:
lf.limit(5).collect()

teus,date_raw,origin_territory,origin_region,arrival_port_code,arrival_port_name,departure_port_code,departure_port_name,coast_region,hs_code,carrier_name,carrier_scac,vessel_name,voyage_number,vessel_id,direction,bol_id,year,month,lane_id,lane_name,dest_territory,dest_region,unified_carrier_name,unified_carrier_scac,vessel_owner,primary_cargo,shared_teus,us_port,vessel_port_pair,date,alliance,alliance_member,pc_alliance,cargo_source,vessel_capacity,drewery_lane,dist,rate_20,rate_40
f64,datetime[μs],cat,cat,cat,cat,cat,cat,cat,str,cat,cat,str,str,i32,cat,str,i32,str,cat,cat,cat,cat,cat,cat,cat,bool,f64,cat,cat,date,str,bool,str,str,f64,cat,f64,f64,f64
1.0,2013-11-08 00:00:00,,,"""23645""","""FREEPORT""","""5204""","""W PALM BCH""","""EAST""","""190590""","""MEDITERRANEAN SHIPPING COMPANY""","""MDSC""","""MAERSK WISMAR""","""1348""",9550371,"""export""","""MDSC_MSCUPE052597""",2013,"""201311""","""5204_23645""","""W Palm Bch — Freeport""","""BAHAMAS""","""CARIBBEAN""","""MEDITERRANEAN SHIPPING COMPANY""","""MSCU""","""MAEU""",False,1.0,"""5204""","""9550371_5204""",,"""Non-alliance Carriers""",False,"""Non-alliance Carriers""","""non-ally""",574.558824,"""US (Baltimore) to US (Baltimor…",2958.63524,,
1.0,2014-06-13 00:00:00,,,"""35705""","""BUENOS AIRES""","""5203""","""PT EVERGLADES""","""EAST""","""007985""","""HAMBURG SUD""","""SUDU""","""CSAV HOUSTON""","""423S""",9208021,"""export""","""SUDU_240018571C44""",2014,"""201406""","""5203_35705""","""Pt Everglades — Buenos Aires""","""ARGENTINA""","""EAST COAST SOUTH AMERICA""","""HAMBURG SUD""","""SUDU""","""HLCU""",False,1.0,"""5203""","""9208021_5203""",,"""Non-alliance Carriers""",False,"""Non-alliance Carriers""","""non-ally""",2443.75,"""US Gulf Coast (Houston) to Chi…",1551.629431,,
1.0,2014-03-14 00:00:00,,,"""58023""","""BUSAN""","""5301""","""HOUSTON""","""GULF""","""842699""","""AMERICAN PRESIDENT LINES""","""APLU""","""JULIE B""","""007""",9384849,"""export""","""APLU_APL086143631""",2014,"""201403""","""5301_58023""","""Houston — Busan""","""REPUBLIC OF KOREA""","""NORTH EAST ASIA""","""AMERICAN PRESIDENT LINES""","""APLU""","""APLU""",True,0.0,"""5301""","""9384849_5301""",,"""Non-alliance Carriers""",False,"""Non-alliance Carriers""","""non-ally""",,"""US Gulf Coast (Houston) to Kor…",7.585076,,
1.0,2015-07-09 00:00:00,,,"""58023""","""BUSAN""","""2704""","""LOS ANGELES""","""WEST""","""320415""","""RICKMERS LINE""","""RCKI""","""RICKMERS TIANJIN""","""2014""",9480136,"""export""","""RCKI_CKI2014LAX01""",2015,"""201507""","""2704_58023""","""Los Angeles — Busan""","""REPUBLIC OF KOREA""","""NORTH EAST ASIA""","""RICKMERS LINE""","""RCKI""","""RCKI""",True,0.0,"""2704""","""9480136_2704""",,"""Non-alliance Carriers""",False,"""Non-alliance Carriers""","""non-ally""",414.779412,"""US West Coast (Los Angeles) to…",35.963042,,
1.0,2015-01-19 00:00:00,,,"""35159""","""PARANAGUA""","""1303""","""BALTIMORE""","""EAST""","""760320""","""HAMBURG SUD""","""SUDU""","""MONTE ALEGRE""","""060S""",9348065,"""export""","""SUDU_250011297PKG""",2015,"""201501""","""1303_35159""","""Baltimore — Paranagua""","""BRAZIL""","""EAST COAST SOUTH AMERICA""","""HAMBURG SUD""","""SUDU""","""SUDU""",True,0.0,"""1303""","""9348065_1303""",,"""Non-alliance Carriers""",False,"""Non-alliance Carriers""","""non-ally""",2560.514706,"""US (Baltimore) to Brazil (Sant…",554.463623,,


In [24]:
lf.drop('route').describe()

statistic,teus,date_raw,origin_territory,origin_region,arrival_port_code,arrival_port_name,departure_port_code,departure_port_name,coast_region,hs_code,carrier_name,carrier_scac,vessel_name,voyage_number,vessel_id,direction,bol_id,year,month,lane_id,lane_name,dest_territory,dest_region,unified_carrier_name,unified_carrier_scac,vessel_owner,primary_cargo,shared_teus,us_port,vessel_port_pair,date,alliance,alliance_member,pc_alliance,cargo_source,vessel_capacity,drewery_lane,dist,rate_20,rate_40
str,f64,str,str,str,str,str,str,str,str,str,str,str,str,str,f64,str,str,f64,str,str,str,str,str,str,str,str,f64,f64,str,str,str,str,f64,str,str,f64,str,f64,f64,f64
"""count""",63737455.0,"""63737455""","""0""","""0""","""63737455""","""63737455""","""63737455""","""63737455""","""63737173""","""63736118""","""63612885""","""63737455""","""63737455""","""59354969""",63737455.0,"""63737455""","""63737454""",63737455.0,"""63737455""","""63737455""","""63737455""","""63735019""","""63735019""","""63696852""","""63737455""","""63737455""",63737455.0,63737455.0,"""63737455""","""63737455""","""63737380""","""63737455""",63737455.0,"""63737455""","""63737455""",59237960.0,"""63724294""",63735015.0,20050312.0,20050312.0
"""null_count""",0.0,"""0""","""63737455""","""63737455""","""0""","""0""","""0""","""0""","""282""","""1337""","""124570""","""0""","""0""","""4382486""",0.0,"""0""","""1""",0.0,"""0""","""0""","""0""","""2436""","""2436""","""40603""","""0""","""0""",0.0,0.0,"""0""","""0""","""75""","""0""",0.0,"""0""","""0""",4499495.0,"""13161""",2440.0,43687143.0,43687143.0
"""mean""",3.220728,"""2015-12-20 09:20:55.450497""",,,,,,,,,,,,,9231900.0,,,2015.465131,,,,,,,,,0.702033,1.040767,,,"""2015-12-19 23:55:16.226000""",,0.301566,,,2160.482803,,1436.83395,1240.723823,1618.603076
"""std""",5.982657,,,,,,,,,,,,,,474506.524554,,,4.741281,,,,,,,,,,3.692469,,,,,,,,1499.188471,,1194.214466,587.260645,863.504854
"""min""",0.01,"""2007-01-01 00:00:00""",,,,,,,,"""-1""",,,"""26 AGUSTOS""","""'44S""",196.0,,"""079A_26004878070""",2007.0,"""200701""",,,,,,,,0.0,0.0,,,"""2007-01-01""","""2M Alliance""",0.0,"""2M Alliance""","""ally""",0.0,,5.93014,310.0,400.0
"""25%""",2.0,"""2012-02-28 00:00:00""",,,,,,,,,,,,,9218686.0,,,2012.0,,,,,,,,,,0.0,,,"""2012-02-27""",,,,,905.147059,,425.931559,790.0,980.0
"""50%""",2.533158,"""2016-02-21 00:00:00""",,,,,,,,,,,,,9315202.0,,,2016.0,,,,,,,,,,0.0,,,"""2016-02-20""",,,,,2036.911765,,1261.373246,1150.0,1470.0
"""75%""",2.533158,"""2019-12-14 00:00:00""",,,,,,,,,,,,,9430868.0,,,2019.0,,,,,,,,,,2.0,,,"""2019-12-13""",,,,,3253.161765,,2377.521062,1540.0,2020.0
"""max""",3729.25,"""2023-12-31 00:00:00""",,,,,,,,"""ddedo""",,,"""ZUMA""","""|SAL5""",9979125.0,,"""zzzz_ZZZZ""",2023.0,"""202312""",,,,,,,,1.0,1123.25,,,"""2023-12-31""","""The Alliance""",1.0,"""The Alliance""","""non-ally""",17889.705882,,8769.910108,11480.0,14000.0
