# Ship Dynamics
Calculate ship acceleration and distance between ports.

In [1]:
# base libraries
import pandas as pd
import os
import json

In [2]:
# set variable from config file
config_path = os.path.abspath('..')

with open(config_path + '/config.json', 'r') as f:
    config = json.load(f)

processing_path = config['DEFAULT']['processing_path']
shipping_rot_filename = config['DEFAULT']['shipping_rot_filename']
port_data1 = config['DEFAULT']['port_data1']
port_data2 = config['DEFAULT']['port_data2']
port_data3 = config['DEFAULT']['port_data3']
port_distance_prep_filename = config['DEFAULT']['port_distance_prep_filename']
walking_distance_filename = config['DEFAULT']['walking_distance_filename']
ship_dynamics_filename = config['DEFAULT']['ship_dynamics_filename']

In [3]:
# import data
dtype_dic = {'MMSI':int,'dt':'str', 'lat':'float', 'long':'float','SOG':'float', 'rot':'float', 
             'Type':'str', 'gross_tonnage':'float','vessel_name':'str', 'ETA':'str', 'POC_LOCODE':'str',
             'last_port_LOCODE':'str', 'next_port_LOCODE':'str', 'status':'str','voyage_id':'float','tripid':int,
            'in_hazmat':'str','out_hazmat':'str'}
parse_dates = ['dt', 'ETA']

shipping_data = pd.read_csv(processing_path + shipping_rot_filename,header = 0,delimiter = ',',dtype = dtype_dic, parse_dates=parse_dates)

# keep only necessary columns
CERS_data = shipping_data[['MMSI','dt','tripid','POC_LOCODE', 'last_port_LOCODE', 'next_port_LOCODE']]
CERS_data = CERS_data.dropna(subset=['POC_LOCODE','last_port_LOCODE'])
CERS_data.drop_duplicates(subset=['MMSI','tripid'],keep = 'first', inplace = True)

Import worldwide port locations and names and keep only necessary features

Downloaded from https://www.unece.org/cefact/codesfortrade/codes_index.html 

In [4]:
# import port data
data_ports1 = pd.read_csv(processing_path + port_data1, header=None, sep = ',', encoding='latin1')
data_ports2 = pd.read_csv(processing_path + port_data2, header=None, sep = ',', encoding='latin1')
data_ports3 = pd.read_csv(processing_path + port_data3, header=None, sep = ',', encoding='latin1')

In [5]:
# append port data and tidy up
data_ports = data_ports1.append(data_ports2)
data_ports = data_ports.append(data_ports3)
# drop missing port codes
data_ports = data_ports.dropna(subset=[2])
# only keep necessary columns
data_ports = data_ports[[1,2,4,10]]
data_ports['LOCODE'] = data_ports[1] + data_ports[2]
data_ports.rename(columns={4:'last_port_name', 10:'Coordinates'}, inplace=True)
data_ports.drop([1,2],inplace=True)

In [6]:
# rename port names so that can be found from the mapdist function later
orig_port_names = ['Dunkirk','Liverpool','Montrose','Teesport','Piraeus','Colombo','Leixoes','Southampton']
new_port_names = {'Dunkirk':'Dunkerque','Liverpool':'Liverpool Port','Montrose':'Montrose Port','Teesport':'Tees Sport',
                  'Piraeus':'Pireas','Colombo':'Colombo Sri Lanka','Leixoes':'Porto','Southampton':'Southampton Port'}

for n in orig_port_names:
    data_ports['last_port_name'].str.replace(n,new_port_names[n])

In [7]:
# merge port names to shipping data
CERS_data = CERS_data.merge(data_ports[['LOCODE','last_port_name']],how='inner',left_on='last_port_LOCODE',
                                   right_on = 'LOCODE')
CERS_data = CERS_data.merge(data_ports[['LOCODE','last_port_name']],how='inner',left_on='POC_LOCODE',
                                    right_on = 'LOCODE')

CERS_data.drop(['LOCODE_x','LOCODE_y'], inplace=True, axis=1)
CERS_data.rename(columns={'last_port_name_x':'last_port_name','last_port_name_y':'port_name'},inplace=True)

In [8]:
# find unique pair combinations between ports
unique_trips = CERS_data[['port_name','last_port_name']].copy()

unique_trips.drop_duplicates(keep = 'first', inplace = True)
unique_trips = unique_trips.reset_index(drop=True)

Calculate distances between ports using walking mode. Driving mode would not work for long distances or ports in Asia

In [9]:
unique_trips.to_csv(processing_path + port_distance_prep_filename,header=True,index=False,sep=',')

As R has a nicer libary for calculating walking distance the next step is performed in 3a_port_distance(R)

In [11]:
walking_distance = pd.read_csv(processing_path + walking_distance_filename,header = 0,delimiter = ',')
walking_distance.drop('Unnamed: 0',axis=1,inplace=True)

In [12]:
CERS_data = CERS_data.merge(walking_distance, how = 'inner', on = ['port_name','last_port_name'])

## Acceleration

In [13]:
accel = shipping_data[['MMSI','dt','tripid','SOG']].copy(deep = True)

accel.sort_values(['MMSI','dt'],inplace = True)

accel['lag_dt'] = accel.groupby(['MMSI','tripid'])['dt'].shift(1)
accel['lag_SOG'] = accel.groupby(['MMSI','tripid'])['SOG'].shift(1)
accel['dt_delta'] = (accel['dt'] - accel['lag_dt']).dt.total_seconds()
accel['accel'] = (accel['SOG'] - accel['lag_SOG'])/accel['dt_delta']

accel.drop(['lag_dt','lag_SOG','dt_delta'], axis=1, inplace=True)

## Export Data

In [14]:
ship_dynamics = accel.merge(CERS_data[['MMSI','tripid','port_dist']], how = 'left', on = ['MMSI','tripid'])

In [15]:
ship_dynamics.to_csv(processing_path + ship_dynamics_filename,header=True,index=False,sep=',')

In [17]:
ship_dynamics.describe().astype(int)

Unnamed: 0,MMSI,tripid,SOG,accel,port_dist
count,10765119,10765119,10765119,10751531,3945602
mean,277389392,107,14,0,1161
std,108990305,119,8,0,2152
min,41491304,1,0,-100,0
25%,235078345,7,8,0,239
50%,235099364,60,15,0,319
75%,246171000,186,23,0,1334
max,970013941,693,102,97,13367
