Notebook to convert AIS points into trade routes / journeys

In [1]:
%load_ext autoreload
%autoreload 2

In [287]:
import os
from os.path import join
from glob import glob
import pandas as pd
import geopandas as gpd
import folium
from shapely.geometry import Point
import folium.plugins as plugins
import seaborn as sns
from matplotlib import pyplot as plt

import numpy as np
import datetime
from datetime import timedelta
from port_call import create_port_calls
import boto3

In [3]:
pd.options.display.max_columns = None

In [4]:
ais_dir = join(os.path.expanduser("~"), 'data', 'AIS')
data_dir = join(ais_dir, 'Syria')

In [5]:
data_files = glob(data_dir+"/*.csv")

In [6]:
dfs = [pd.read_csv(f, index_col=0) for f in data_files]

In [7]:
df = pd.concat(dfs)

In [8]:
df.polygon_name.unique()

array(['AL LADHIQIYAH', 'TARTUS', 'BANIYAS'], dtype=object)

In [9]:
df_latakia = df.loc[df.polygon_name=="AL LADHIQIYAH"].copy()
df_tartus = df.loc[df.polygon_name=="TARTUS"].copy()
df_baniyas = df.loc[df.polygon_name=="BANIYAS"].copy()

In [10]:
start = '2018-12-01'
end = '2022-08-31'
country = "Syria"

#### 1.2 Data Prep

Run port call algorithm

In [11]:
trips = create_port_calls(df_latakia, start, end, "Latakia", "Syria")

In [12]:
len(trips)

793

In [13]:
trips.loc[(trips.turn_around_time < 10) & (trips['heading-diff'].abs()<45), "passing2"] = "passing by"
trips.loc[trips.turn_around_time<5, "passing2"] = "maintenance"

In [14]:
trips.passing2.value_counts()

passing by     70
maintenance    48
Name: passing2, dtype: int64

In [51]:
data = trips.loc[trips.passing2.isna()].copy()

In [52]:
len(data)

675

In [53]:
data.vessel_type.unique()

array(['Cargo', 'UNAVAILABLE', 'Passenger', 'Unknown', 'Reserved'],
      dtype=object)

In [54]:
data.vessel_type_main.unique()

array(['Container Ship', 'General Cargo Ship', nan,
       'Oil And Chemical Tanker', 'Bulk Carrier', 'Ro Ro Cargo Ship',
       'Specialized Cargo Ship', 'Other', 'Offshore Vessel'], dtype=object)

In [55]:
data.loc[data.vessel_type=="Reserved", "vessel_type_main"].unique()

array([nan], dtype=object)

In [56]:
drop_types = ["Reserved", "Passenger"]
data = data.loc[~(data.vessel_type.isin(drop_types))].copy()

In [57]:
len(data)

667

In [58]:
data.vessel_type_sub.unique()

array([nan, 'Oil Products Tanker', 'Vehicles Carrier',
       'Livestock Carrier', 'Crewboat', 'Chemical Tanker',
       'Offshore Tug Supply Ship'], dtype=object)

#### 1.3 Predict DWT

In [59]:
data.loc[:, "mmsi"] = data.loc[:, "mmsi"].astype('int')

In [60]:
vessels = pd.read_excel(join(ais_dir, "Pacific_vessel_database_2021_new.xlsx"), 0)

In [61]:
vessels.head(2)

Unnamed: 0,imo,mmsi,vessel_type,sub_vessel_type,flag,dwt,gt,length,width,year_built,Draft,Engine_KW_Total,TEU,TEU14
0,9572496,209483000.0,cargo,Anchor handling supply tug,Cyprus,840.0,1290.0,53.0,14.0,2011.0,4.8,6120.0,,0
1,9388962,257322000.0,cargo,Anchor handling supply tug,Norway,2350.0,3070.0,78.0,17.0,2008.0,7.0,10812.0,,0


In [62]:
vessels = vessels.loc[~(vessels.mmsi.isna())].copy()

In [63]:
vessels.loc[:, "mmsi"] = vessels.loc[:, "mmsi"].astype('int')

In [80]:
vessels_filt = vessels[['sub_vessel_type', 'vessel_type', 'mmsi', 'dwt', 'gt', 'length', 'width', 'Draft', 'flag']].copy()

In [81]:
vessels_filt.loc[:, 'vessel_info'] = 1

In [82]:
data_join = data.merge(vessels_filt, on='mmsi', how='left', suffixes=['_ais', '_vessel'])

In [83]:
data_join.loc[data_join.vessel_info.isna(), "vessel_info"] = 0

In [84]:
data_join.vessel_info.value_counts()

1.0    538
0.0    129
Name: vessel_info, dtype: int64

In [86]:
data_join.vessel_type_vessel.unique()

array(['cargo', nan], dtype=object)

In [88]:
data_join.sub_vessel_type.unique()

array(['Container ship', nan, 'General cargo vessel', 'Cargo ship',
       'Bulk carrier', 'Reefer', 'Vehicle carrier', 'RoRo ship',
       'Livestock carrier', 'Forest-product carrier'], dtype=object)

In [223]:
block_coefficients = {
    'bulk':0.79,
    'container':0.73,
    'tanker':0.83,
    'LNG':0.79
}

In [24]:
groups = {
    'Dry Bulk':['Bulk carrier'],
    'Container':['Container ship'],
    'General Cargo':['General cargo vessel', 'Cargo ship'],
    'Reefer':['Reefer'],
    'Vehicle carrier':['Vehicle carrier'],
    'Ro Ro Cargo Ship':['RoRo ship'],
    'Animal products':['livestock carrier'],
    'Forest':['Forest-producer carrier']
}

In [97]:
data_join.to_csv(join(ais_dir, "port_calls_latakia.csv"))

In [11]:
data_join = pd.read_csv(join(ais_dir, "port_calls_latakia.csv"), index_col=0)

In [12]:
data_join.loc[:, "draught_delta"] = 1
data_join.loc[data_join['draught-diff']==0, "draught_delta"] = 0

In [13]:
data_join.loc[data_join['draught-diff']==0, "draught_delta"] = 0

In [14]:
data_join.draught_delta.value_counts()/len(data_join)

0    0.629685
1    0.370315
Name: draught_delta, dtype: float64

Only 40% have some draught delta

In [15]:
data_join.draught_delta.value_counts()

0    420
1    247
Name: draught_delta, dtype: int64

In [16]:
data_join = data_join.loc[data_join.draught_delta==1].copy()

In [17]:
data_join = data_join.loc[data_join.vessel_info==1].copy()

In [18]:
data_join['draught-diff'].describe()

count    204.000000
mean      -0.929412
std        0.997740
min       -6.500000
25%       -1.500000
50%       -0.800000
75%       -0.200000
max        1.000000
Name: draught-diff, dtype: float64

In [20]:
data_join.loc[:, 'block_cat'] = ""

In [25]:
block_coefficients.keys()

dict_keys(['bulk', 'container', 'tanker', 'LNG'])

In [26]:
data_join.loc[data_join.sub_vessel_type.str.lower().str.contains('container'), "block_cat"] = 'container'
data_join.loc[data_join.sub_vessel_type.str.lower().str.contains('bulk'), "block_cat"] = 'bulk'
data_join.loc[data_join.sub_vessel_type.str.lower().str.contains('tanker'), "block_cat"] = 'tanker'
data_join.loc[data_join.sub_vessel_type.str.lower().str.contains('lng'), "block_cat"] = 'LNG'

In [227]:
data_join.loc[data_join.block_cat=='', "sub_vessel_type"].unique()

array(['Vehicle carrier', 'Cargo ship', 'RoRo ship',
       'General cargo vessel'], dtype=object)

In [233]:
data_join.loc[data_join.block_cat=='bulk', "sub_vessel_type"].unique()

array(['Bulk carrier'], dtype=object)

In [234]:
data_join.loc[data_join.block_cat=='container', "sub_vessel_type"].unique()

array(['Container ship'], dtype=object)

In [235]:
data_join.loc[data_join.sub_vessel_type.str.lower().str.contains('cargo'), "block_cat"] = 'container'
data_join.loc[data_join.sub_vessel_type.str.lower().str.contains('roro'), "block_cat"] = 'container'
data_join.loc[data_join.sub_vessel_type.str.lower().str.contains('vehicle '), "block_cat"] = 'container'

In [236]:
data_join.block_cat.unique()

array(['container', 'bulk'], dtype=object)

In [280]:
row = data_join.iloc[0]

In [281]:
row

date-leave                        2019-01-27
mmsi                               477552700
turn_around_time                   21.858056
datetime-leave           2019-01-27 08:41:36
draught-out                              9.7
heading-out                              0.0
seconds                              78689.0
datetime-entry           2019-01-26 10:50:07
date-entry                        2019-01-26
vessel_type_ais                        Cargo
dtg                      2019-01-26 10:20:01
length_ais                             211.0
width_ais                               30.0
draught-in                              10.0
heading-in                               0.0
vessel_type_main              Container Ship
vessel_type_sub                          NaN
draught-diff                            -0.3
heading-diff                             0.0
passing                                    3
port-name                            Latakia
country                                Syria
passing2  

In [239]:
Din = row['draught-in']
Dout = row['draught-out']
L = row['length_vessel']
W = row['width_vessel']
Dd = row['Draft']
DWT = row['dwt']
Cb = block_coefficients[row['block_cat']]
pw = 1.025 # tons/m3  1029kg/m3

In [240]:
Dr = Din # or Dout
Din, Dd

(10.0, 11.4)

In [216]:
Cbr = 1 - ((1 - Cb)*((Dr / Dd)**(1/3)))
Cb, Cbr

(0.73, 0.7415387261941899)

In [219]:
payload = ((((Cbr*Dr) - (Cb*Dd)) * (L*W*pw)) + DWT ) / DWT
payload

0.83279809847061

In [282]:
def calculate_payload(row, direction):
    
    # get parameters
    Din = row['draught-in']
    Dout = row['draught-out']
    L = row['length_vessel']
    W = row['width_vessel']
    Dd = row['draft_max']
    DWT = row['dwt']
    Cb = block_coefficients[row['block_cat']]
    pw = 1.025 # tons/m3  1029kg/m3
    
    if direction=="in":
        Dr = Din
    if direction =='out':
        Dr = Dout
        
    # calcualte block coefficient for reported draft
    Cbr = 1 - ((1 - Cb)*((Dr / Dd)**(1/3)))
    
    # calcualte payload rate
    payload = ((((Cbr*Dr) - (Cb*Dd)) * (L*W*pw)) + DWT ) / DWT
    return payload

In [283]:
calculate_payload(row, 'in')

0.83279809847061

In [284]:
data_join.loc[:, 'draft_max'] = data_join[['Draft', 'draught-in', 'draught-out']].max(axis=1)

In [285]:
data_join.loc[:, "payload_in"] = data_join.apply(lambda x: calculate_payload(x, 'in'), axis=1)
data_join.loc[:, "payload_out"] = data_join.apply(lambda x: calculate_payload(x, 'out'), axis=1)

In [286]:
data_join

Unnamed: 0,date-leave,mmsi,turn_around_time,datetime-leave,draught-out,heading-out,seconds,datetime-entry,date-entry,vessel_type_ais,dtg,length_ais,width_ais,draught-in,heading-in,vessel_type_main,vessel_type_sub,draught-diff,heading-diff,passing,port-name,country,passing2,sub_vessel_type,vessel_type_vessel,dwt,gt,length_vessel,width_vessel,Draft,flag,vessel_info,draught_delta,block_cat,payload_in,payload_out,draft_max
3,2019-01-27,477552700,21.858056,2019-01-27 08:41:36,9.7,0.0,78689.0,2019-01-26 10:50:07,2019-01-26,Cargo,2019-01-26 10:20:01,211.0,30.0,10.0,0.0,Container Ship,,-0.3,0.0,3,Latakia,Syria,,Container ship,cargo,35181.0,25904.0,211.0,30.0,11.400,Hong Kong SAR of China,1.0,1,container,0.832798,0.796441,11.4
4,2019-02-02,271044398,10.985000,2019-02-02 02:49:03,6.2,0.0,39546.0,2019-02-01 15:49:57,2019-02-01,Cargo,2019-02-01 13:43:30,152.0,24.0,9.2,0.0,Container Ship,,-3.0,0.0,3,Latakia,Syria,,Container ship,cargo,13623.0,10925.0,151.0,24.0,8.250,Turkey,1.0,1,container,1.000000,0.459115,9.2
15,2019-03-13,622122208,21.396111,2019-03-13 12:31:21,8.3,289.0,77026.0,2019-03-12 15:07:35,2019-03-12,Cargo,2019-03-12 09:49:35,155.0,25.0,9.7,228.0,Container Ship,,-1.4,61.0,3,Latakia,Syria,,Container ship,cargo,17728.0,14557.0,155.0,24.0,9.515,Egypt,1.0,1,container,1.000000,0.804588,9.7
17,2019-03-15,563044300,44.134167,2019-03-15 09:45:17,10.2,0.0,158883.0,2019-03-13 13:37:14,2019-03-13,Cargo,2019-03-13 11:54:57,194.0,30.0,11.1,93.0,,,-0.9,-93.0,3,Latakia,Syria,,Container ship,cargo,35220.0,26050.0,211.0,30.0,11.400,Singapore,1.0,1,container,0.964542,0.857088,11.4
19,2019-03-21,636092669,24.461667,2019-03-21 19:06:33,8.2,301.0,88062.0,2019-03-20 18:38:51,2019-03-20,Cargo,2019-03-20 14:51:09,175.0,27.0,11.0,228.0,Container Ship,,-2.8,73.0,3,Latakia,Syria,,Container ship,cargo,23580.0,18334.0,175.0,27.0,10.900,Liberia,1.0,1,container,1.000000,0.622598,11.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
644,2022-07-19,477552700,28.031389,2022-07-19 05:34:38,9.9,0.0,100913.0,2022-07-18 01:32:45,2022-07-18,Cargo,2022-07-18 00:29:40,211.0,30.0,11.0,0.0,Container Ship,,-1.1,0.0,3,Latakia,Syria,,Container ship,cargo,35181.0,25904.0,211.0,30.0,11.400,Hong Kong SAR of China,1.0,1,container,0.952631,0.820701,11.4
650,2022-07-28,271044633,17.705556,2022-07-28 03:28:13,5.7,271.0,63740.0,2022-07-27 09:45:53,2022-07-27,Cargo,2022-07-27 09:21:53,151.0,24.0,8.4,95.0,Container Ship,,-2.7,176.0,3,Latakia,Syria,,Container ship,cargo,13700.0,10917.0,151.0,24.0,8.300,Turkey,1.0,1,container,1.000000,0.516181,8.4
654,2022-08-04,622112801,21.228333,2022-08-04 09:18:39,6.5,272.0,76422.0,2022-08-03 12:04:57,2022-08-03,Cargo,2022-08-03 06:16:36,135.0,20.0,7.6,18.0,General Cargo Ship,,-1.1,254.0,3,Latakia,Syria,,Cargo ship,cargo,9389.0,8443.0,135.0,20.0,7.450,Egypt,1.0,1,container,1.000000,0.789577,7.6
659,2022-08-17,477552700,36.240556,2022-08-17 19:37:56,9.7,267.0,130466.0,2022-08-16 07:23:30,2022-08-16,Cargo,2022-08-16 06:59:30,211.0,30.0,10.3,0.0,Container Ship,,-0.6,267.0,3,Latakia,Syria,,Container ship,cargo,35181.0,25904.0,211.0,30.0,11.400,Hong Kong SAR of China,1.0,1,container,0.868964,0.796441,11.4


Get draft differences from next

In [174]:
aws_bucket = "wbgdecinternal-ntl"
path = "Andres_Temp/AIS"

In [175]:
client = boto3.client('s3')

In [176]:
file_list = client.list_objects_v2(Bucket=aws_bucket, Prefix=path, MaxKeys=5000)
bucket_files = [os.path.join("s3://", aws_bucket, content['Key']) for content in file_list['Contents']]
bucket_files

['s3://wbgdecinternal-ntl/Andres_Temp/AIS/',
 's3://wbgdecinternal-ntl/Andres_Temp/AIS/AIS_MMSI_2019-01-01_2019-06-30.csv',
 's3://wbgdecinternal-ntl/Andres_Temp/AIS/AIS_MMSI_2019-01-01_2019-06-30_port.csv',
 's3://wbgdecinternal-ntl/Andres_Temp/AIS/Latakia_next_draught.csv',
 's3://wbgdecinternal-ntl/Andres_Temp/AIS/Port_mapping.csv',
 's3://wbgdecinternal-ntl/Andres_Temp/AIS/books.csv',
 's3://wbgdecinternal-ntl/Andres_Temp/AIS/port_calls_latakia.csv',
 's3://wbgdecinternal-ntl/Andres_Temp/AIS/port_calls_test.csv']

In [172]:
df = pd.read_csv('s3://wbgdecinternal-ntl/Andres_Temp/AIS/Latakia_next_draught.csv', index_col=0)

In [186]:
len(df)

308

In [185]:
data_join.draught_delta.value_counts()

0    420
1    247
Name: draught_delta, dtype: int64