# Port Loading

One of the features to be derived from the AIS data is the port loading. This is defined as the number of ships within the port at a given timestamp. As port loading will not change significantly each second the feature is calculated on a minute by minute basis. 

Port loading is also derived by ship type and segment.

In [1]:
# base libraries
import numpy as np
import pandas as pd
import datetime
from random import randint
import os
import json
import math
#import geopy.distance3

In [2]:
# set variable from config file
config_path = os.path.abspath('..')

with open(config_path + '/config.json', 'r') as f:
    config = json.load(f)

processing_path = config['DEFAULT']['processing_path']
shipping_rot_filename = config['DEFAULT']['shipping_rot_filename']
segment_filename = config['DEFAULT']['segment_filename']
port_loading_filename = config['DEFAULT']['port_loading_filename']

In [3]:
# import data
dtype_dic = {'MMSI':int,'dt':'str', 'lat':'float', 'long':'float','SOG':'float', 'rot':'float', 
             'Type':'str', 'gross_tonnage':'float','vessel_name':'str', 'ETA':'str', 'POC_LOCODE':'str',
             'last_port_LOCODE':'str', 'next_port_LOCODE':'str', 'status':'str','voyage_id':'float','tripid':int,
            'in_hazmat':'str','out_hazmat':'str'}
parse_dates = ['dt', 'ETA']

shipping_data = pd.read_csv(processing_path + shipping_rot_filename,header = 0,delimiter = ',',dtype = dtype_dic, parse_dates=parse_dates)
segment_data = pd.read_csv(processing_path + segment_filename,header = 0,delimiter = ',')
segment_data['dt'] = pd.to_datetime(segment_data["dt"])

### High level port loading

Derive the distributions of ship type on a minute by minute basis

In [4]:
# drop unwanted fields
port_loading = shipping_data[['MMSI','dt']].copy(deep = True)

# extract start window and end windows - this is the date rounded down to the nearest minute
port_loading['start_window'] = port_loading['dt'].dt.floor('1min')
port_loading['end_window'] = port_loading['dt'].dt.ceil('1min')

# dedup across al the rows
port_loading.drop_duplicates(['MMSI','start_window','end_window'],inplace = True)

# aggregate to 1 row per time window
port_loading = port_loading.groupby(['start_window','end_window'])['MMSI'].count().to_frame(name = 'count').reset_index()

# sort by time window
port_loading.sort_values(['start_window','end_window'],ascending = True,inplace = True)

### Port loading by ship type

Derive the distributions of ship type on a minute by minute basis by ship type

In [5]:
port_loading_by_class = shipping_data[['MMSI','dt','Type']].copy(deep = True)

# extract start window and end windows - this is the date rounded down to the nearest minute
port_loading_by_class['start_window'] = shipping_data['dt'].dt.floor('1min')
port_loading_by_class['end_window'] = shipping_data['dt'].dt.ceil('1min')

# dedup across all the rows, so we have 1 ship per window
port_loading_by_class.drop_duplicates(['MMSI','start_window','end_window'], inplace = True)

# reset index
port_loading_by_class = port_loading_by_class.reset_index(drop = True)

In [6]:
port_loading_by_class['Type'] = port_loading_by_class['Type'].map({'tug':'tug', 'container ship':'container', 
                                                                   'pilot':'pilot', 'ro-ro cargo ship':'cargo_ship', 
                                                                   'hopper dredger':'dredger', 
                                                                   'general cargo ship':'general_cargo', 
                                                                   'passenger/ro-ro cargo ship':'passenger', 
                                                                   'work/repair vessel':'work_vessel', 'sar':'sar', 
                                                                   'unknown type':'unknown'})

# create one hot encoding for each ship type
one_hot = pd.get_dummies(port_loading_by_class['Type'],prefix = 'pl_type')

# merge dummy fields back on
port_loading_by_class = port_loading_by_class[['start_window','end_window']].merge(one_hot, left_index = True,
                                 right_index = True, how = 'inner')

# create a variable list
var_list = list(port_loading_by_class.columns.values)
var_list.remove('start_window')
var_list.remove('end_window')

# aggregate to 1 row per time window
port_loading_by_class = port_loading_by_class.groupby(['start_window','end_window'])[var_list].sum().reset_index()

# sort by time window
port_loading_by_class.sort_values(['start_window','end_window'], ascending = True, inplace = True)

### Port Loading by segment

Derive the distributions of ship type on a minute by minute basis by segment

In [7]:
port_loading_by_seg = segment_data[['MMSI','dt','segment']].copy(deep = True)

# extract start window and end windows - this is the date rounded down to the nearest minute
port_loading_by_seg['start_window'] = port_loading_by_seg['dt'].dt.floor('1min')
port_loading_by_seg['end_window'] = port_loading_by_seg['dt'].dt.ceil('1min')

In [8]:
port_loading_by_seg['seg_desc'] = segment_data['segment'].map({0:'Docked', 1:'Trans_general',
                                                               2:'Docking_inital', 3:'Docking_terminal',
                                                               4:'Docking_mid', 5:'Trans_border'})

# create one hot encoding for each segment
one_hot_seg = pd.get_dummies(port_loading_by_seg['seg_desc'],prefix = 'pl_seg')

# merge dummy fields back on
port_loading_by_seg = port_loading_by_seg[['start_window','end_window']].merge(one_hot_seg, left_index = True,
                               right_index = True, how = 'inner')

# create a variable list
var_list = list(port_loading_by_seg.columns.values)
var_list.remove('start_window')
var_list.remove('end_window')

# aggregate to 1 row per time window
port_loading_by_seg = port_loading_by_seg.groupby(['start_window','end_window'])[var_list].sum().reset_index()

# sort by time window
port_loading_by_seg.sort_values(['start_window','end_window'], ascending = True, inplace = True)

Merge all port loading variables together

In [9]:
port_loading = port_loading.merge(port_loading_by_class, on = ['start_window','end_window'], how = 'inner')
port_loading = port_loading.merge(port_loading_by_seg, on = ['start_window','end_window'], how = 'left')

## Export data

In [10]:
port_loading.to_csv(processing_path + port_loading_filename,header=True,index=False,sep=',')

In [11]:
port_loading.describe()

Unnamed: 0,count,pl_type_cargo_ship,pl_type_container,pl_type_dredger,pl_type_general_cargo,pl_type_passenger,pl_type_pilot,pl_type_sar,pl_type_tug,pl_type_unknown,pl_type_work_vessel,pl_seg_Docked,pl_seg_Docking_inital,pl_seg_Docking_mid,pl_seg_Docking_terminal,pl_seg_Trans_border,pl_seg_Trans_general
count,602419.0,602419.0,602419.0,602419.0,602419.0,602419.0,602419.0,602419.0,602419.0,602419.0,602419.0,400120.0,400120.0,400120.0,400120.0,400120.0,400120.0
mean,2.384653,0.294712,0.770837,0.092331,0.231211,0.117954,0.500401,0.007307,0.119551,0.035248,0.003149,2.177387,0.217357,3.285187,1.579104,0.162814,0.824143
std,1.519289,0.599757,0.834798,0.292374,0.507387,0.322554,0.568532,0.088364,0.39457,0.185787,0.056027,3.319475,1.028202,4.858679,2.724825,0.896873,1.984772
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,3.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,5.0,0.0,6.0,3.0,0.0,0.0
max,22.0,4.0,5.0,2.0,5.0,1.0,3.0,2.0,5.0,2.0,1.0,30.0,18.0,46.0,33.0,15.0,29.0
