In [2]:
import pandas as pd
import numpy as np
import json
import csv
import ast
import h3 as h3_module
import h3.api.basic_int as h3
from haversine import haversine, haversine_vector, Unit
from collections import defaultdict, namedtuple, Counter
import itertools
from functools import partial
import operator
import string
import re
import sys
import datetime
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import os
import time
from functools import partial

In [3]:
pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows', 50)
pd.set_option('display.min_rows', 25)
pd.set_option('display.expand_frame_repr', True)
!jt -t grade3 -T -N
!jt -t monokai -f fira -fs 13 -nf ptsans -nfs 11 -N -kl -cursw 5 -cursc r -cellw 95% -T
VESSEL_MOVEMENTS_FILE = "vessel_movements_with_hexes.feather"

In [4]:
# Returns (run_starts, run_lengths, run_vals, [run_nums]) and does not use
# any equality tolerance for floats. Does not work for NaN values (they will
# not form a run).  If arr is actually a tuple, it should consist of multiple
# sequences of the same length, and all values for a position must match.
def np_runlengths(seq, return_run_numbers=False, as_frame=False):
    if isinstance(seq, tuple):
        multi_match = True
        all_arrs = [np.asarray(x) for x in seq]
        arr, *other_arrs = all_arrs
    else:
        multi_match = False
        arr = np.asarray(seq)
    arr_len = len(arr)
    if arr_len == 0:
        no_ints = np.zeros(0, dtype=int)
        nums_ret = [no_ints] if return_run_numbers else []
        if multi_match:
            return (no_ints, no_ints, tuple(arr2[no_ints] for arr2 in all_arrs),
                    *nums_ret)
        else:
            return (no_ints, no_ints, arr[no_ints], *nums_ret)
    unequal = (arr[1:] != arr[:-1])
    if multi_match:
        for arr2 in other_arrs:
            unequal |= (arr2[1:] != arr2[:-1])
    run_starts = np.r_[0, np.flatnonzero(unequal) + 1]
    run_lengths = np.diff(np.r_[run_starts, arr_len])
    if multi_match:
        vals = tuple(arr2[run_starts] for arr2 in all_arrs)
    else:
        vals = arr[run_starts]
    if return_run_numbers:
        run_nums = np.empty(len(arr), dtype=int)
        for i, runstart, runlen in zip(itertools.count(), run_starts, run_lengths):
            run_nums[runstart:runstart+runlen] = i
    if as_frame:
        if multi_match:
            coldict = { f"value{i+1}" : v for i, v in enumerate(vals) }
            coldict['run_start'] = run_starts
            coldict['run_len'] = run_lengths
            df = pd.DataFrame(coldict)
        else:
            df = pd.DataFrame(dict(
                value=vals,
                run_start=run_starts,
                run_len=run_lengths))
        if return_run_numbers:
            return (df, run_nums)
        else:
            return df
    elif return_run_numbers:
        return (run_starts, run_lengths, vals, run_nums)
    else:
        return (run_starts, run_lengths, vals)

In [5]:
np_runlengths(list("abbcbbb"))

(array([0, 1, 3, 4]),
 array([1, 2, 1, 3]),
 array(['a', 'b', 'c', 'b'], dtype='<U1'))

In [6]:
np_runlengths(list("abbcbbb"), return_run_numbers=True)

(array([0, 1, 3, 4]),
 array([1, 2, 1, 3]),
 array(['a', 'b', 'c', 'b'], dtype='<U1'),
 array([0, 1, 1, 2, 3, 3, 3]))

In [7]:
np_runlengths( (list("abbbcbbb"), [1, 1, 1, 2, 2, 2, 3, 3]) )

(array([0, 1, 3, 4, 5, 6]),
 array([1, 2, 1, 1, 1, 2]),
 (array(['a', 'b', 'b', 'c', 'b', 'b'], dtype='<U1'),
  array([1, 1, 2, 2, 2, 3])))

In [8]:
np_runlengths( (list("abbbbcbbb"), [1, 1, 1, 2, 2, 2, 2, 3, 3]), as_frame=True)

Unnamed: 0,value1,value2,run_start,run_len
0,a,1,0,1
1,b,1,1,2
2,b,2,3,2
3,c,2,5,1
4,b,2,6,1
5,b,3,7,2


In [9]:
np_runlengths( (list("abbbbcbbb"), [1, 1, 1, 2, 2, 2, 2, 3, 3]), as_frame=True, return_run_numbers=True)

(  value1  value2  run_start  run_len
 0      a       1          0        1
 1      b       1          1        2
 2      b       2          3        2
 3      c       2          5        1
 4      b       2          6        1
 5      b       3          7        2,
 array([0, 1, 1, 2, 2, 3, 4, 5, 5]))

# Loading Sai's edited ports file

This CSV ports file  was posted to Teams by Sai on 27-Feb-2021 7:49pm.  Earlier versions of this notebook used a different ports file (JSON format) that Sai found earlier on GitHub.

In [10]:
edited_ports_csv = pd.read_csv("ports_trimmed_modified.csv")
edited_ports_csv

Unnamed: 0,locode,lat,lon,country,location,Name,name_alternative,h3_4,h3_5,mapped_locode
0,ITNAP,40.837990,14.276230,IT,NAP,Napoli,Napoli,595011935825559551,599515530084220927,ITNAP
1,AEJEA,25.003280,55.052065,AE,JEA,Jebel Ali,Jebel Ali,595664899703570431,600168492888489983,AEJEA
2,USSAV,32.100710,-81.101700,US,SAV,Savannah,Savannah,595686116842012671,600189710026932223,USSAV
3,NZAKL,-36.500000,174.480000,NZ,AKL,NZL_Auckland_PORT,NZL_Auckland_PORT,597770404341219327,602273996452397055,NZAKL
4,MYKCH,1.608000,110.379000,MY,KCH,MYS_Kuching_PORT,MYS_Kuching_PORT,596332938916790271,601451985882841087,MYKCH
5,BEANR,51.302490,4.311460,BE,ANR,Antwerp Port,Antwerp Port,595031838704009215,599535431888928767,BEANR
6,MACAS,33.350000,-7.360000,MA,CAS,MAR_Casablanca_PORT,MAR_Casablanca_PORT,595489604908351487,599993201314496511,MACAS
7,SRPBM,5.852000,-55.204000,SR,PBM,SUR_Paramaribo_PORT,SUR_Paramaribo_PORT,596148349812342783,600651942997262335,SRPBM
8,JMKIN,18.018000,-76.810000,JM,KIN,JAM_Kingston_PORT,JAM_Kingston_PORT,596290624898990079,600794223452618751,JMKIN
9,USMSY,29.935600,-90.073100,US,MSY,USA_New Orleans_PORT,USA_New Orleans_PORT,595676255597101055,600179847708278783,USMSY


In [11]:
edited_ports_csv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 908 entries, 0 to 907
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   locode            908 non-null    object 
 1   lat               908 non-null    float64
 2   lon               908 non-null    float64
 3   country           906 non-null    object 
 4   location          907 non-null    object 
 5   Name              908 non-null    object 
 6   name_alternative  908 non-null    object 
 7   h3_4              908 non-null    int64  
 8   h3_5              908 non-null    int64  
 9   mapped_locode     908 non-null    object 
dtypes: float64(2), int64(2), object(6)
memory usage: 71.1+ KB


In [12]:
port_to_latlon = dict(zip(edited_ports_csv['locode'], zip(edited_ports_csv['lat'], edited_ports_csv['lon'])))
len(port_to_latlon)

908

In [13]:
port_to_mapped_port = dict(zip(edited_ports_csv['locode'], edited_ports_csv['mapped_locode']))

## Loading the ODs for which port sequences are extracted 

In [14]:
odlist_df = pd.read_csv("od_routes_v2.csv")
odlist_df['mapped_origin'] = odlist_df['origin'].map(port_to_mapped_port)
odlist_df['mapped_destination'] = odlist_df['destination'].map(port_to_mapped_port)
odlist_df.head(10)

Unnamed: 0,route,origin,destination,mapped_origin,mapped_destination
0,KRBUK-CNQDG,KRBUK,CNQDG,KRPUS,CNQDG
1,KRBUK-CNSHG,KRBUK,CNSHG,KRPUS,CNSHA
2,KRBUK-CNSZX,KRBUK,CNSZX,KRPUS,CNSZX
3,KRBUK-SGSIN,KRBUK,SGSIN,KRPUS,SGSIN
4,KRBUK-MYOKG,KRBUK,MYOKG,KRPUS,MYPKG
5,KRBUK-INMAA,KRBUK,INMAA,KRPUS,INMAA
6,KRBUK-INVTZ,KRBUK,INVTZ,KRPUS,INVTZ
7,KRBUK-PHMNL,KRBUK,PHMNL,KRPUS,PHMNL
8,KRBUK-PHSFS,KRBUK,PHSFS,KRPUS,PHSFS
9,KRBUK-TWKEL,KRBUK,TWKEL,KRPUS,TWKEL


# Precomputing a superset of H3 hexes that are close to a port

A port might be close to the boundary of hex, so that lat/lon points in adjacent hexes might still be close even though they do not map to the same hex as the port.  Mark the ring of adjacent hexes as possible hexes for being close to the port, and we'll check with a distance computation.  Actually, we may need to go further than the immediately adjacent ring.

We are using the integer representation for H3 hexes.

In [15]:
# returns map hex : portlist
def mark_hexes_near_ports(resolution=5, rings=2):
    hexports = defaultdict(list)
    for port, latlon in port_to_latlon.items():
        hex1 = h3.geo_to_h3(*latlon, 5)
        for hex2 in h3.k_ring(hex1, rings):
            hexports[hex2].append(port)
    return dict(hexports)

In [16]:
hex5_to_possible_ports = mark_hexes_near_ports()

In [17]:
hex5_to_possible_ports[h3.geo_to_h3(*port_to_latlon['USNYC'], 5)]

['USNYC']

In [18]:
port_to_mapped_port['USNYC']

'USNYC'

# Loading the procured vessel-movement history

It's already been saved in feather format with hex columns added.

In [19]:
%%time
vessel_movements_df = pd.read_feather(VESSEL_MOVEMENTS_FILE)

CPU times: user 2.61 s, sys: 3.76 s, total: 6.37 s
Wall time: 1.13 s


In [20]:
vessel_movements_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32333557 entries, 0 to 32333556
Data columns (total 12 columns):
 #   Column        Dtype              
---  ------        -----              
 0   MMSI          int64              
 1   TimePosition  datetime64[ns, UTC]
 2   Latitude      float32            
 3   Longitude     float32            
 4   Speed         float32            
 5   NavStatus     category           
 6   IMO           int64              
 7   Name          category           
 8   TimeETA       datetime64[ns, UTC]
 9   Destination   category           
 10  h3_5          int64              
 11  h3_4          int64              
dtypes: category(3), datetime64[ns, UTC](2), float32(3), int64(4)
memory usage: 2.0 GB


In [21]:
vessel_movements_df.head()

Unnamed: 0,MMSI,TimePosition,Latitude,Longitude,Speed,...,Name,TimeETA,Destination,h3_5,h3_4
0,310133000,2020-02-01 00:00:00+00:00,51.352272,4.257915,0.0,...,VALENCIA EXPRESS,2020-01-31 05:00:00+00:00,BEANR,599535436183896063,595031838704009215
1,211553000,2020-02-01 00:00:00+00:00,37.886166,-0.226667,13.8,...,BENEDIKT,2020-02-01 10:00:00+00:00,ESVLC,599987177622863871,595483583364202495
2,255805588,2020-02-01 00:00:00+00:00,-2.19615,145.746414,15.5,...,MOL GENESIS,2020-02-05 08:30:00+00:00,AUBNE,600998056158035967,596494464046858239
3,566760000,2020-02-01 00:00:00+00:00,25.301809,121.393227,15.4,...,WAN HAI 105,2020-02-01 10:00:00+00:00,KEL,600309536393265151,595805929249701887
4,477178200,2020-02-01 00:00:00+00:00,29.321198,122.645439,20.700001,...,MAERSK TAIKUNG,2020-02-05 04:00:00+00:00,CN NGB SG SIN,600301587482542079,595797992150138879


In [22]:
vessel_movements_df['IMO'].value_counts().head()

9247871    9064
9745550    8792
9584865    8775
9775775    8766
9349796    8766
Name: IMO, dtype: int64

In [23]:
vessel_movements_df['NavStatus'].value_counts()

under way using engine                                          19910673
moored                                                           8106282
at anchor                                                        3852708
underway sailing                                                  154525
not under command                                                 141891
not available                                                     127418
restricted manoeuvrability                                         15178
constrained by draught                                             12225
aground                                                             9205
reserved for future use                                              940
engaged in fishing                                                   648
reserved for future amendment of navigational status for HSC         210
reserved for future amendment of navigational status for WIG         146
Name: NavStatus, dtype: int64

# Finding closest port within threshold for locations of a single vessel

Doing this operation for points of a single vessel reduces the number of distances we need to calculate.  Before we save out the result, we will also sort the dataframe by IMO and location-update time.

In [24]:
# Elements with no port within threshold are not included in the
# output series.  Because an indexed Series is returned, the values
# can still be aligned with the original.  Also, it works for 
# vessel_df to be already a subset, and then the returned Series
# can be aligned with the original.
def closest_port_ser(vessel_df, threshold=15, stopped_only=True):
    if stopped_only:
        sub_df = vessel_df[ vessel_df['NavStatus'].isin(['moored', 'at anchor', 'aground'])
                           & (vessel_df['Speed']< 0.5) ]
    else:
        sub_df = vessel_df
    port_set = set()
    for hex1 in sub_df['h3_5'].unique():
        port_set.update(hex5_to_possible_ports.get(hex1, []))
    candidate_ports = np.array(list(port_set), dtype=object)
    if len(candidate_ports) > 0 and len(sub_df.index) > 0:
        distances = haversine_vector(
            [port_to_latlon[p] for p in candidate_ports],
            list(zip(sub_df['Latitude'], sub_df['Longitude'])),
            Unit.NAUTICAL_MILES,
            comb=True)
        # result has one row per movement
        closest = np.argmin(distances, axis=1)
        closest_dist = distances[range(len(closest)), closest]
        full_series = pd.Series(candidate_ports[closest], index=sub_df.index)
        return full_series[closest_dist <= threshold]
    else:
        return pd.Series(np.nan, index=sub_df.index)

In [25]:
%%time
vessel_movements_df['stopped_closest_port'] = vessel_movements_df.groupby('IMO', group_keys=False).apply(closest_port_ser)

CPU times: user 57.9 s, sys: 6.2 s, total: 1min 4s
Wall time: 1min 4s


In [26]:
vessel_movements_df.head(20)

Unnamed: 0,MMSI,TimePosition,Latitude,Longitude,Speed,...,TimeETA,Destination,h3_5,h3_4,stopped_closest_port
0,310133000,2020-02-01 00:00:00+00:00,51.352272,4.257915,0.0,...,2020-01-31 05:00:00+00:00,BEANR,599535436183896063,595031838704009215,BEANR
1,211553000,2020-02-01 00:00:00+00:00,37.886166,-0.226667,13.8,...,2020-02-01 10:00:00+00:00,ESVLC,599987177622863871,595483583364202495,
2,255805588,2020-02-01 00:00:00+00:00,-2.19615,145.746414,15.5,...,2020-02-05 08:30:00+00:00,AUBNE,600998056158035967,596494464046858239,
3,566760000,2020-02-01 00:00:00+00:00,25.301809,121.393227,15.4,...,2020-02-01 10:00:00+00:00,KEL,600309536393265151,595805929249701887,
4,477178200,2020-02-01 00:00:00+00:00,29.321198,122.645439,20.700001,...,2020-02-05 04:00:00+00:00,CN NGB SG SIN,600301587482542079,595797992150138879,
5,477174700,2020-02-01 00:00:01+00:00,6.246488,3.183412,0.1,...,2020-01-23 11:30:00+00:00,NA WVB NG APP,600537642106355711,596034043552727039,
6,563549000,2020-02-01 00:00:01+00:00,54.814388,18.906809,10.6,...,2020-02-05 00:00:00+00:00,BE ANR,599531949744193535,595028351190564863,
7,477415200,2020-02-01 00:00:01+00:00,35.488316,139.753784,0.0,...,2020-02-01 07:45:00+00:00,JP KWS XX,599811813034426367,595308219849506815,
8,477346500,2020-02-01 00:00:01+00:00,28.630568,-89.614967,12.2,...,2020-02-01 01:30:00+00:00,USMSY,600182743589978111,595679150405058559,
9,636014557,2020-02-01 00:00:01+00:00,42.445576,144.743469,15.0,...,2020-02-05 23:00:00+00:00,USOAK TWKEL,599789347134242815,595285748580614143,


In [27]:
# Also add the mapped ports
vessel_movements_df['mapped_stopped_closest_port'] = vessel_movements_df['stopped_closest_port'].map(port_to_mapped_port, na_action='ignore')

In [28]:
# Sort the main dataframe.  We depend on both levels of this sorting!  We are also
# resetting the index values.
vessel_movements_df.sort_values(['IMO', 'TimePosition'], inplace=True, ignore_index=True)

In [29]:
vessel_movements_df.head(10)

Unnamed: 0,MMSI,TimePosition,Latitude,Longitude,Speed,...,Destination,h3_5,h3_4,stopped_closest_port,mapped_stopped_closest_port
0,366365000,2020-04-06 15:02:23+00:00,37.771606,-122.304024,0.0,...,OAK 63,599685771850416127,595182179739238399,USOAK,USOAK
1,366365000,2020-04-06 16:02:23+00:00,37.771549,-122.303978,0.0,...,OAK 63,599685771850416127,595182179739238399,USOAK,USOAK
2,366365000,2020-04-06 17:02:24+00:00,37.7715,-122.303886,0.1,...,OAK 63,599685771850416127,595182179739238399,USOAK,USOAK
3,366365000,2020-04-06 18:08:25+00:00,37.77441,-122.308411,0.1,...,OAK 63,599685771850416127,595182179739238399,USOAK,USOAK
4,366365000,2020-04-06 19:02:21+00:00,37.77459,-122.308624,0.1,...,OAK 63,599685771850416127,595182179739238399,USOAK,USOAK
5,366799000,2020-02-01 00:02:54+00:00,33.759998,-118.276138,0.0,...,LOS ANGELES,599711395625304063,595207803514126335,USLAX,USLAX
6,366799000,2020-02-01 01:08:54+00:00,33.760002,-118.276138,0.0,...,LOS ANGELES,599711395625304063,595207803514126335,USLAX,USLAX
7,366799000,2020-02-01 02:08:55+00:00,33.760006,-118.276138,0.0,...,LOS ANGELES,599711395625304063,595207803514126335,USLAX,USLAX
8,366799000,2020-02-01 03:02:55+00:00,33.760002,-118.276138,0.0,...,LOS ANGELES,599711395625304063,595207803514126335,USLAX,USLAX
9,366799000,2020-02-01 04:05:55+00:00,33.760006,-118.276138,0.0,...,LOS ANGELES,599711395625304063,595207803514126335,USLAX,USLAX


In [30]:
imo_range_data = np_runlengths(vessel_movements_df['IMO'])
imo_range_data

(array([       0,        5,     8737, ..., 32332147, 32333014, 32333062]),
 array([   5, 8732, 8718, ...,  867,   48,  495]),
 array([7334204, 7617890, 7617905, ..., 9893591, 9893852, 9894648]))

In [31]:
imo_range_df = pd.DataFrame(dict(zip(['range_start', 'range_len', 'IMO'], imo_range_data)))
imo_range_df

Unnamed: 0,range_start,range_len,IMO
0,0,5,7334204
1,5,8732,7617890
2,8737,8718,7617905
3,17455,7433,7729459
4,24888,7608,7729461
5,32496,5980,7907984
6,38476,8556,7907996
7,47032,1607,7908005
8,48639,1048,8107610
9,49687,49,8117299


In [32]:
imo_to_main_range_start = { imo: start for imo, start in zip(imo_range_df['IMO'], imo_range_df['range_start']) }

In [33]:
# Save out so we can reload
vessel_movements_df.to_feather("vessel_movements_sorted_IMO_with_hexes_and_ports.feather")

In [34]:
# Save out the IMO ranges too
imo_range_df[['IMO', 'range_start', 'range_len']].to_csv("sorted_IMO_ranges.csv", index=False)

In [35]:
vessel_movements_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32333557 entries, 0 to 32333556
Data columns (total 14 columns):
 #   Column                       Dtype              
---  ------                       -----              
 0   MMSI                         int64              
 1   TimePosition                 datetime64[ns, UTC]
 2   Latitude                     float32            
 3   Longitude                    float32            
 4   Speed                        float32            
 5   NavStatus                    category           
 6   IMO                          int64              
 7   Name                         category           
 8   TimeETA                      datetime64[ns, UTC]
 9   Destination                  category           
 10  h3_5                         int64              
 11  h3_4                         int64              
 12  stopped_closest_port         object             
 13  mapped_stopped_closest_port  object             
dtypes: category(3), 

# Assigning letters to the historical ports of a vessel

In [36]:
# When I thought of this idea, I had no idea that some IMO numbers might have
# over 50 values in stopped_closest_port.  This is a trouble sign
# for the method and for the data.  Pull in some Greek letters too, but
# we shouldn't need them right away.
PORT_LETTER_CHARS = ( 
    string.ascii_letters 
    + string.digits
    + ''.join(map(chr, range(ord('\N{GREEK SMALL LETTER ALPHA}'), 
                             ord('\N{GREEK SMALL LETTER OMEGA}')+1)))
    + ''.join(map(chr, range(ord('\N{GREEK CAPITAL LETTER ALPHA}'), 
                             ord('\N{GREEK CAPITAL LETTER OMEGA}')+1)))
)
len(PORT_LETTER_CHARS)

112

In [37]:
PORT_LETTER_CHARS

'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789αβγδεζηθικλμνξοπρςστυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ\u03a2ΣΤΥΦΧΨΩ'

In [38]:
JOURNEY_BREAKER_LETTER = "\N{CROSS MARK}"
JOURNEY_BREAKER_LETTER

'❌'

In [39]:
# If we are executing just this section and some earlier defs,
# we will need to reload.
#vessel_movements_df = pd.read_feather(f"{DATA_DIR}/prj731_sorted_IMO_with_journey_breaker.feather")

In [40]:
vessel_movements_df.groupby('IMO')['stopped_closest_port'].nunique().sort_values(ascending=False)

IMO
9222285    42
9341964    42
9226918    39
9706310    39
9304447    38
9469572    38
9756731    37
9437050    37
9702106    37
9335197    37
9629445    37
9289922    36
           ..
9183893     0
9693654     0
9658848     0
9216743     0
8907931     0
9044607     0
9603594     0
9599236     0
9566382     0
9232395     0
9545015     0
9894648     0
Name: stopped_closest_port, Length: 5186, dtype: int64

In [41]:
VesselPortSequence = namedtuple("VesselPortSequence", ['portstr', 'portmap', 'rowpos'])
VesselPortSequence.EMPTY = VesselPortSequence("", {}, np.array([], dtype=int))

def create_vessel_port_sequence(vessel_df, 
                                port_col='mapped_stopped_closest_port',
                                journey_breaker_col='journey_breaker'):
    if len(vessel_df.index) == 0:
        return VesselPortSequence.EMPTY
    assert vessel_df['TimePosition'].is_monotonic_increasing
    assert (vessel_df['IMO'] == vessel_df['IMO'].iat[0]).all()
    # Reset the index before dropping some rows, so the index values
    # will represent row positions within vessel_df.
    if journey_breaker_col is None:
        ports = vessel_df[port_col].reset_index(drop=True).dropna()
    else:
        # Artificially include the journey breaker positions in the ports
        # string, even if no port was identified; but make those positions
        # contain NaN, which in this code will unambiguiously mark a
        # journey breaker.
        vdf2 = vessel_df[[port_col, journey_breaker_col]].reset_index(drop=True)
        jbreak = vdf2[journey_breaker_col]
        ports = vdf2[port_col].mask(jbreak, other=np.nan)
        ports = ports[ports.notna() | jbreak]
    if len(ports) == 0:
        return VesselPortSequence.EMPTY
    unique_ports = ports.unique()
    letter_limit = len(PORT_LETTER_CHARS)
    # note: this will now skip a letter if there are any journey breakers
    letter_map = { p: PORT_LETTER_CHARS[i] for i, p in enumerate(unique_ports)
                   if i < letter_limit and type(p) is str }
    port_chars = [ ( JOURNEY_BREAKER_LETTER if type(p) is not str
                     else letter_map.get(p, '?') )
                   for p in ports ]
    # In the current calling sequence, we require that vessel_df must be
    # a contiguous subrange of the main DF, so we record row positions within
    # vessel_df and we will later offset them to get row positions within 
    # the main DF.  For large datasets, it is much faster to deal with row
    # positions rather than index labels.
    return VesselPortSequence(
        portstr=''.join(port_chars),
        portmap=letter_map,
        # the index values of ports are row positions within vessel_df,
        # because we reset the index before taking the notna subset
        rowpos=ports.index.to_numpy())

In [42]:
# reconstruct this in case we've reloaded and didn't execute all
vessel_movements_grouped_IMO = vessel_movements_df.groupby('IMO', group_keys=False)

In [43]:
%%time
IMO_to_digested_port_sequence = { 
    imo: create_vessel_port_sequence(vessel_movements_df.iloc[startrow:startrow+nrows],'mapped_stopped_closest_port',None)
    for imo, startrow, nrows in zip(imo_range_df['IMO'],
                                    imo_range_df['range_start'],
                                    imo_range_df['range_len']) }

CPU times: user 12.7 s, sys: 47.1 ms, total: 12.7 s
Wall time: 12.7 s


In [44]:
IMO_to_digested_port_sequence

{7334204: VesselPortSequence(portstr='aaaaa', portmap={'USOAK': 'a'}, rowpos=array([0, 1, 2, 3, 4])),
 7617890: VesselPortSequence(portstr='aaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbcccccccccccaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbcccccccccccaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbcccccccccccaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbcccccccccccaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbb

In [45]:
[ vp for vp in IMO_to_digested_port_sequence.values() 
  if JOURNEY_BREAKER_LETTER in vp.portstr ]

[]

In [46]:
def remove_consec_dup_chars(str):
    return re.sub(r'(.)\1+', r'\1', str)

In [47]:
def str_has_dup_chars(str):
    return len(str) != len(set(str))

In [48]:
def intermed_port_chars_admissible(str):
    return not str_has_dup_chars(remove_consec_dup_chars(str))

In [49]:
# doesn't work for NaN in list, because NaN != NaN and we haven't made this
# more complicated... see also earlier function np_runlengths that
# returns a similar result in its third value
def remove_consec_dup_elts(portlist):
    return [x for x, prev in zip(portlist, itertools.chain([None], portlist))
            if x != prev]

In [50]:
remove_consec_dup_chars("abcaabbccc")

'abcabc'

In [51]:
remove_consec_dup_elts(list("abcaabbccc"))

['a', 'b', 'c', 'a', 'b', 'c']

In [52]:
def expand_iloc_slice_list(slices):
    if slices:
        return np.r_.__getitem__(tuple(slices))
    else:
        return np.array([], dtype=int)

In [53]:
def get_slice_len(slx):
    if isinstance(slx, slice):
        return len(range(*slx.indices(sys.maxsize)))
    elif isinstance(slx, np.ndarray) and slx.ndim == 1:
        # OK, that's not a slice, but it's accepted by
        # expand_iloc_slice_list so let's allow it
        return len(slx)
    else:
        raise ValueError(f"value type {type(slx).__name__} is not supported")

In [54]:
# works for scalars or series
def days_between_ts(t1, t2):
    tdiff = t2 - t1
    if isinstance(tdiff, pd.Series):
        return tdiff.dt.total_seconds() / (24*3600)
    else:
        try:
            return tdiff.total_seconds() / (24*3600)
        except:
            return np.nan

In [55]:
def add_lead_time_cols(df2, timechunks, jdurs):
    slicelens = list(map(len, timechunks))
    starts = pd.Series(
        np.repeat([tchunk.iat[0] for tchunk in timechunks],
                  slicelens),
        index=df2.index)
    durations = np.repeat(jdurs, slicelens)
    times = pd.Series(
        np.concatenate(timechunks),
        index=df2.index)
    sofar = days_between_ts(starts, times)
    # division by zero here should be impossible unless the timestamps
    # are duplicated, but let's protect
    fracs = np.where(np.isclose(durations, 0), 0., sofar/durations)
    df2['remaining_lead_time'] = durations - sofar
    df2['journey_percent'] = fracs

In [56]:
# main_df must match the earlier construction of IMO_to_digested_port_sequence
# (same index and rows content, but could have new columns)... returns None
# if no matches
def get_vessel_od_subframe(main_df, vessel_imo, imo_range_start,
                           orig_port, dest_port,
                           return_slices_only=False,
                           add_lead_times=False,
                           # it's not a great convention, but if 
                           # return_journey_starts_only is passed as 2,
                           # it means return the start and end of each
                           # journey
                           return_journey_starts_only=False):
    vp = IMO_to_digested_port_sequence.get(vessel_imo)
    if not vp or len(vp.portstr) == 0:
        return None
    p1 = port_to_mapped_port.get(orig_port)
    p2 = port_to_mapped_port.get(dest_port)
    od = orig_port+'-'+dest_port
    if p1 is None or p2 is None or p1 == p2:
        # we can't handle journeys where we have mapped origin and destination
        # to the same port
        return None
    ##### verify with Ed ################
    c1 = vp.portmap.get(p1)
    c2 = vp.portmap.get(p2)
    if c1 is None or c2 is None:
        return None
    # We let the pattern stop with the first occurrence of the destination
    # after the origin.
    pat = re.compile(f"{c1}(?P<intermed>[^{c1}{c2}{JOURNEY_BREAKER_LETTER}]*){c2}")
    matches = list(pat.finditer(vp.portstr))
    if not matches:
        return None
    # The rows that we select will be a contiguous range in the main DF.
    # We depend on having the main DF sorted by both vessel and time,
    # and the caller now tells us where the IMO starts within the
    # rows of the main DF.
    mainslices = []
    odlist = []
    if add_lead_times:
        tcolpos = main_df.columns.get_loc('TimePosition')
        timechunks = []
        jdurs = []
    for m in matches:
        if intermed_port_chars_admissible(m.group('intermed')):
            i1 = imo_range_start+vp.rowpos[m.start()]
            i2 = imo_range_start+vp.rowpos[m.end()-1]  # inclusive
            if i2 == i1:
                # options about returning less than the full journey do not apply...
                # this case should no longer occur because the mapped ports
                # must be distinct
                mainslices.append(slice(i1, i1+1))
                if add_lead_times:
                    jdurs.append(0.)
            elif return_journey_starts_only:
                if return_journey_starts_only == 2:
                    # we can express first and last as an unusual slice, 
                    # and the code that is calling this has been adjusted
                    # to call get_slice_len instead of just doing end-start...
                    # might also have worked to use a numpy array instead of
                    # a slice, because of the way the calling code is using
                    # the slice
                    mainslices.append(slice(i1, i2+1, (i2-i1)))
                    odlist.append(od)
                    # and we will still have both the first and last times
                    # available in tchunk (below)
                else:
                    mainslices.append(slice(i1, i1+1))
                    odlist.append(od)
                    if add_lead_times:
                        # can't use the normal case (below) because we will have
                        # only the first time available in tchunk
                        jdurs.append(days_between_ts(main_df.iat[i1, tcolpos],
                                                     main_df.iat[i2, tcolpos]))
            else:
                mainslices.append(slice(i1, i2+1))
                odlist.append(od)
            if add_lead_times:
                timechunks.append(main_df.iloc[mainslices[-1], tcolpos])
                if len(jdurs) < len(mainslices):
                    # not a special case that was already handled
                    tchunk = timechunks[-1]
                    jdurs.append(days_between_ts(tchunk.iat[0], tchunk.iat[-1]))
    if not mainslices:
        return None
    elif return_slices_only:
        if add_lead_times:
            return mainslices, timechunks, jdurs, odlist
        else:
            return mainslices,odlist
    else:
        if len(mainslices) == 1:
            df2 = main_df.iloc[mainslices[0]].assign(route_ID=1)
        else:
            df2 = (
                main_df.iloc[expand_iloc_slice_list(mainslices)]
                .assign(
                    route_ID=np.repeat(
                        np.arange(1, len(mainslices)+1),
                        list(map(get_slice_len, mainslices)))) )
        if add_lead_times:
            add_lead_time_cols(df2, timechunks, jdurs)
        return df2

In [57]:
# the subframes for different vessels are all combined
def get_all_od_subframes(main_df, orig_arg, dest_arg, 
                         return_journey_starts_only=False,
                         add_lead_times=True,
                         unique_route_IDs=False):
    # allow orig and dest to be passed as parallel arrays/lists
    if type(orig_arg) is str and type(dest_arg) is str:
        od_list = [(orig_arg, dest_arg)]
    else:
        assert type(orig_arg) is not str
        assert type(dest_arg) is not str
        assert len(orig_arg) == len(dest_arg)
        od_list = list(zip(orig_arg, dest_arg))
        # can't just use zip directly, because we want to iterate twice
    if all( port_to_mapped_port.get(orig, orig) == port_to_mapped_port.get(dest, dest)
            for orig, dest in od_list ):
        # all of the journey tracing is by mapped OD, so it's useless
        return None
    slicelist = []
    routeidlist = []
    odlist=[]
    if add_lead_times:
        timechunklist = []
        jdurlist = []
    routebase = 1
    for orig, dest in od_list:
        for vessel_imo, range_start in imo_to_main_range_start.items():
            ret1 = get_vessel_od_subframe(main_df, vessel_imo, range_start, orig, dest,
                                          return_slices_only=True,
                                          return_journey_starts_only=return_journey_starts_only,
                                          add_lead_times=add_lead_times)
            if ret1:
                if add_lead_times:
                    slices, tchunks, jdurs, od = ret1
                else:
                    slices,od = ret1
                slicelist.extend(slices)
                odlist.append(od)
                routeidlist.extend(range(routebase, routebase+len(slices)))
                if unique_route_IDs:
                    routebase += len(slices)
                if add_lead_times:
                    timechunklist.extend(tchunks)
                    jdurlist.extend(jdurs)
    if len(slicelist) == 1:
        df2 = main_df.iloc[slicelist[0]].assign(OD = odlist[0],route_ID=routeidlist[0])
    elif len(slicelist) > 1:
        flattened_odlist =  list(itertools.chain(*odlist))
        df2 = (vessel_movements_df.iloc[expand_iloc_slice_list(slicelist)]
             .assign(OD=np.repeat(
                flattened_odlist,
                list(map(get_slice_len, slicelist))),
                route_ID=np.repeat(
                routeidlist,
                list(map(get_slice_len, slicelist)))) )
    else:
        return None
    if add_lead_times:
        add_lead_time_cols(df2, timechunklist, jdurlist)
    
    if len(od_list) > 1:
        # index values may have been duplicated
        #flattened_odlist =  list(itertools.chain(*odlist))
        return df2.reset_index(drop=True)
    else:
        
        return df2

In [68]:
# the subframes for different vessels are all combined
def write_all_od_subframes(main_df, orig_arg, dest_arg,route_threshold_od = 3 ):
    cwd = os.getcwd()
    if type(orig_arg) is str and type(dest_arg) is str:
        od_list = [(orig_arg, dest_arg)]
    else:
        assert type(orig_arg) is not str
        assert type(dest_arg) is not str
        assert len(orig_arg) == len(dest_arg)
        od_list = list(zip(orig_arg, dest_arg))
        # can't just use zip directly, because we want to iterate twice
    if all( port_to_mapped_port.get(orig, orig) == port_to_mapped_port.get(dest, dest)
            for orig, dest in od_list ):
        # all of the journey tracing is by mapped OD, so it's useless
        return None
    success_odlist=[]
    failed_odlist=[]
    for orig, dest in od_list:
        slicelist = []
        routeidlist = []
        odlist=[]
        timechunklist = []
        jdurlist = []
        routebase = 1
        print("The movement extraction process started for: ",orig+'-'+dest );
        for vessel_imo, range_start in imo_to_main_range_start.items():
            ret1 = get_vessel_od_subframe(main_df, vessel_imo, range_start, orig, dest,
                                          return_slices_only=True,
                                          return_journey_starts_only=False,
                                          add_lead_times=True)
            if ret1:
                slices, tchunks, jdurs, od = ret1
                slicelist.extend(slices)
                odlist.append(od)
                routeidlist.extend(range(routebase, routebase+len(slices)))
                timechunklist.extend(tchunks)
                jdurlist.extend(jdurs)
        
        if slicelist:
            num_slices = len(slicelist)
        else:
            num_slices = 0
        print("The number of routes stitched for this OD are:", num_slices)
        if num_slices > route_threshold_od:
            flattened_odlist =  list(itertools.chain(*odlist))
            od_df = (vessel_movements_df.iloc[expand_iloc_slice_list(slicelist)]
                        .assign(OD=np.repeat(
                        flattened_odlist,
                        list(map(get_slice_len, slicelist))),
                        route_ID=np.repeat(
                        routeidlist,
                        list(map(get_slice_len, slicelist)))))
            add_lead_time_cols(od_df, timechunklist, jdurlist)            
        else:
            od_df = pd.DataFrame()
        if len(od_df) > 1:
            print("The movement extraction sucessfully completed for: ",orig+'-'+dest );
            cleansed_od_df,routeID_stats, portsequence_stats = cleanse_port_sequence(od_df)
            route_rank = cleansed_od_df.groupby(['IMO','route_ID'])['TimePosition'].min().reset_index().sort_values(by = 'TimePosition').reset_index(drop = True)
            route_rank['unique_route_ID'] = route_rank.index + 1
            cleansed_od_df['week'] = cleansed_od_df['TimePosition'].dt.week
            cleansed_od_df = cleansed_od_df.merge(route_rank[['IMO','route_ID','unique_route_ID']], how = 'inner', on = ['IMO','route_ID'])
            cleansed_od_df = cleansed_od_df.merge(routeID_stats[['IMO','route_ID','journey_time']], how = 'inner', on = ['IMO','route_ID'])
            cleansed_od_df['elapsed_time'] = cleansed_od_df['journey_time'] - cleansed_od_df['remaining_lead_time']
            filename = cwd + '/od_extracts/' + orig + dest + '.feather'
            routeID_stats_filename = cwd + '/od_stats/' + 'routeID_' + orig + dest + '.csv'
            portsequence_stats_filename = cwd + '/od_stats/'+ 'portsequence_'+orig + dest + '.csv'
            if cleansed_od_df.unique_route_ID.max() >= route_threshold_od:
                print("The port sequence cleansing generated training file for: ",orig+'-'+dest );
                print("The number of cleansed routes for this OD are: ", cleansed_od_df.unique_route_ID.max())
                cleansed_od_df.to_feather(filename)
                routeID_stats.to_csv(routeID_stats_filename)
                portsequence_stats.to_csv(portsequence_stats_filename)
                success_odlist.extend([orig+'-'+dest])
            else:
                print("The port sequence cleansing resulted in no training file for: ",orig+'-'+dest );
                failed_odlist.extend([orig+'-'+dest])
        else:
            print("The movement extraction resulted in no training file for: ", orig+'-'+dest );
            failed_odlist.extend([orig+'-'+dest])
    # opening the csv file in 'w+' mode
    success_df = pd.DataFrame(success_odlist,columns=['OD'])
    failed_df = pd.DataFrame(failed_odlist,columns=['OD'])
    success_df.to_csv('ods_sucessfully_processed.csv', index = False)
    failed_df.to_csv('ods_insuccessfully_processed.csv', index = False)

In [69]:
def q95(x):
    return x.quantile(0.95)

def get_port_sequence(port_ser):
    runs = np_runlengths(port_ser.fillna(method='ffill'))
    all_ports = runs[2]
    return all_ports

def match_valid_port_sequence(df,od_port_sequence_valid,od_valid_stats,port_sequences_valid):
    invalid_port_sequence = df['port_sequence']
    od_invalid = df['OD']
    journey_time_mean = df['journey_time_mean']
    result = process.extractOne(invalid_port_sequence, od_port_sequence_valid[od_port_sequence_valid.OD == od_invalid]['port_sequence'].tolist(), scorer=fuzz.ratio)
    matched_valid_port_sequence = result[0]
    score = result[1]
    return pd.Series([od_invalid, invalid_port_sequence,matched_valid_port_sequence,score, journey_time_mean], index=['OD','invalid_port_sequence','matched_valid_port_sequence', 'score','journey_time_mean'])

def cleanse_port_sequence(od_df):
    port_sequences = (
        od_df
        .groupby(['OD','IMO', 'route_ID'], group_keys=False)
        ['mapped_stopped_closest_port'].apply(get_port_sequence).reset_index()
        )
    journey_times = (od_df
            .groupby(['OD','IMO', 'route_ID'], group_keys=False).agg(
            journey_time=('remaining_lead_time', 'max'))).reset_index()
    port_sequences.columns = ['OD','IMO','route_ID','port_sequence_list']
    port_sequences['port_sequence'] = port_sequences.apply(lambda row :  '-'.join(row['port_sequence_list']), axis = 1)
    port_sequences['num_intermediate_ports'] =  port_sequences.apply(lambda row :  len(row['port_sequence_list'])-2, axis = 1)
    port_sequences = port_sequences.merge(journey_times, how = 'inner', on =['OD','IMO','route_ID'])
    od_port_sequence = port_sequences[['OD', 'IMO','port_sequence','num_intermediate_ports','journey_time']].groupby(['OD','port_sequence']).agg(
        num_routes = ('IMO', 'count'),
        num_intermediate_ports=('num_intermediate_ports', 'mean'),
        journey_time_max= ('journey_time','max'),
        journey_time_min= ('journey_time','min'),
        journey_time_mean = ('journey_time','mean'),
        journey_time_median = ('journey_time','median'),
        journey_time_95perc = ('journey_time',q95)
        ).reset_index().sort_values(by = ['OD','num_routes','num_intermediate_ports'], ascending = False)
    threshold_port_sequences = 2
    od_port_sequence_valid = od_port_sequence[od_port_sequence.num_routes >= threshold_port_sequences].copy()
    od_port_sequence_invalid = od_port_sequence[od_port_sequence.num_routes < threshold_port_sequences].copy()
    #print('Number of valid port sequences after threshold cut-off are: ',len(od_port_sequence_valid))
    #print('Number of invalid port sequences after threshold cut-off are: ',len(od_port_sequence_invalid))
    port_sequences_valid = port_sequences[port_sequences.port_sequence.isin(od_port_sequence_valid.port_sequence)]
    od_valid_stats = port_sequences_valid[['OD', 'IMO','port_sequence','num_intermediate_ports','journey_time']].groupby(['OD']).agg(
        num_routes = ('OD', 'count'),
        num_intermediate_ports=('num_intermediate_ports', 'mean'),
        journey_time_max= ('journey_time','max'),
        journey_time_min= ('journey_time','min'),
        journey_time_mean = ('journey_time','mean'),
        journey_time_median = ('journey_time','median'),
        journey_time_95perc = ('journey_time',q95)
        ).reset_index().sort_values(by = ['OD','num_routes','num_intermediate_ports'], ascending = False)
    od_valid_stats['journey_time_filter'] = od_valid_stats['journey_time_95perc']
    if ((len(od_port_sequence_invalid) > 0) & (len(od_port_sequence_valid) > 0)):
        matched_df = pd.DataFrame(od_port_sequence_invalid.apply(match_valid_port_sequence ,axis=1,args=[od_port_sequence_valid,od_valid_stats,port_sequences_valid])).sort_values(by = 'score', ascending = False)
        matched_df = matched_df.merge(od_valid_stats[['OD','journey_time_filter']], how = 'inner', on = 'OD')
        matched_df['journey_time_outlier_filter'] = 1.25 * matched_df['journey_time_filter']
        journey_time_filter = (matched_df.journey_time_mean <= matched_df.journey_time_filter)
        good_match_filter = (matched_df.journey_time_mean <= matched_df.journey_time_outlier_filter) &  (matched_df.score >= 85)
        filtered_matched_df = matched_df[(journey_time_filter | good_match_filter)]
        valid_to_invalid = od_port_sequence_invalid[od_port_sequence_invalid.port_sequence.isin(filtered_matched_df.invalid_port_sequence)]
        od_port_sequence_valid = pd.concat([od_port_sequence_valid,valid_to_invalid])
        od_port_sequence_invalid = od_port_sequence_invalid.drop(valid_to_invalid.index)
        #print('Number of valid port sequences after recovering routes with slight variation are: ',len(od_port_sequence_valid));
        #print('Number of invalid port sequences after recovering routes with slight variation are: ',len(od_port_sequence_invalid));
    valid_port_sequences = port_sequences[port_sequences.port_sequence.isin(od_port_sequence_valid.port_sequence)]
    od_df_valid = od_df.merge(valid_port_sequences[['OD','IMO','route_ID','num_intermediate_ports','port_sequence']], how = 'inner', on = ['OD','IMO','route_ID']).reset_index(drop = True)
    return od_df_valid,valid_port_sequences, od_port_sequence_valid

In [70]:
%time write_all_od_subframes(vessel_movements_df,['USSAV','USSAV'],['BEANR','NLRTM'])

The movement extraction process started for:  USSAV-BEANR
The number of routes stitched for this OD are: 185
The movement extraction sucessfully completed for:  USSAV-BEANR
The port sequence cleansing generated training file for:  USSAV-BEANR
The number of cleansed routes for this OD are:  175
The movement extraction process started for:  USSAV-NLRTM
The number of routes stitched for this OD are: 189
The movement extraction sucessfully completed for:  USSAV-NLRTM
The port sequence cleansing generated training file for:  USSAV-NLRTM
The number of cleansed routes for this OD are:  174
CPU times: user 1.32 s, sys: 50.5 ms, total: 1.37 s
Wall time: 1.32 s


In [71]:
%time write_all_od_subframes(vessel_movements_df,['USNYC','USSAV','USORF','USNYC','USSAV','USORF','USNYC','USSAV','USORF'], ['BEANR','BEANR','BEANR','NLRTM','NLRTM','NLRTM','GBSOU','GBSOU','GBSOU'])

The movement extraction process started for:  USNYC-BEANR
The number of routes stitched for this OD are: 179
The movement extraction sucessfully completed for:  USNYC-BEANR
The port sequence cleansing generated training file for:  USNYC-BEANR
The number of cleansed routes for this OD are:  163
The movement extraction process started for:  USSAV-BEANR
The number of routes stitched for this OD are: 185
The movement extraction sucessfully completed for:  USSAV-BEANR
The port sequence cleansing generated training file for:  USSAV-BEANR
The number of cleansed routes for this OD are:  175
The movement extraction process started for:  USORF-BEANR
The number of routes stitched for this OD are: 221
The movement extraction sucessfully completed for:  USORF-BEANR
The port sequence cleansing generated training file for:  USORF-BEANR
The number of cleansed routes for this OD are:  213
The movement extraction process started for:  USNYC-NLRTM
The number of routes stitched for this OD are: 174
The mo

### Running for all ODs in OD file

In [None]:
%time write_all_od_subframes(vessel_movements_df, odlist_df.origin.tolist(),odlist_df.destination.tolist())

The movement extraction process started for:  KRBUK-CNQDG
The number of routes stitched for this OD are: 665
The movement extraction sucessfully completed for:  KRBUK-CNQDG
The port sequence cleansing generated training file for:  KRBUK-CNQDG
The number of cleansed routes for this OD are:  635
The movement extraction process started for:  KRBUK-CNSHG
The number of routes stitched for this OD are: 1409
The movement extraction sucessfully completed for:  KRBUK-CNSHG
The port sequence cleansing generated training file for:  KRBUK-CNSHG
The number of cleansed routes for this OD are:  1385
The movement extraction process started for:  KRBUK-CNSZX
The number of routes stitched for this OD are: 1259
The movement extraction sucessfully completed for:  KRBUK-CNSZX
The port sequence cleansing generated training file for:  KRBUK-CNSZX
The number of cleansed routes for this OD are:  1225
The movement extraction process started for:  KRBUK-SGSIN
The number of routes stitched for this OD are: 380
Th

The movement extraction sucessfully completed for:  CNQDG-IDJKT
The port sequence cleansing generated training file for:  CNQDG-IDJKT
The number of cleansed routes for this OD are:  154
The movement extraction process started for:  CNQDG-USSAV
The number of routes stitched for this OD are: 140
The movement extraction sucessfully completed for:  CNQDG-USSAV
The port sequence cleansing generated training file for:  CNQDG-USSAV
The number of cleansed routes for this OD are:  129
The movement extraction process started for:  CNSHG-CNSZX
The number of routes stitched for this OD are: 2670
The movement extraction sucessfully completed for:  CNSHG-CNSZX
The port sequence cleansing generated training file for:  CNSHG-CNSZX
The number of cleansed routes for this OD are:  2584
The movement extraction process started for:  CNSHG-SGSIN
The number of routes stitched for this OD are: 1457
The movement extraction sucessfully completed for:  CNSHG-SGSIN
The port sequence cleansing generated training f

The number of routes stitched for this OD are: 531
The movement extraction sucessfully completed for:  CNSZX-CNTXG
The port sequence cleansing generated training file for:  CNSZX-CNTXG
The number of cleansed routes for this OD are:  505
The movement extraction process started for:  CNSZX-CNLYG
The number of routes stitched for this OD are: 301
The movement extraction sucessfully completed for:  CNSZX-CNLYG
The port sequence cleansing generated training file for:  CNSZX-CNLYG
The number of cleansed routes for this OD are:  285
The movement extraction process started for:  CNSZX-IDJKT
The number of routes stitched for this OD are: 641
The movement extraction sucessfully completed for:  CNSZX-IDJKT
The port sequence cleansing generated training file for:  CNSZX-IDJKT
The number of cleansed routes for this OD are:  628
The movement extraction process started for:  CNSZX-USSAV
The number of routes stitched for this OD are: 266
The movement extraction sucessfully completed for:  CNSZX-USSAV


The port sequence cleansing generated training file for:  MYOKG-TWKEL
The number of cleansed routes for this OD are:  52
The movement extraction process started for:  MYOKG-KRINC
The number of routes stitched for this OD are: 105
The movement extraction sucessfully completed for:  MYOKG-KRINC
The port sequence cleansing generated training file for:  MYOKG-KRINC
The number of cleansed routes for this OD are:  94
The movement extraction process started for:  MYOKG-CNTXG
The number of routes stitched for this OD are: 208
The movement extraction sucessfully completed for:  MYOKG-CNTXG
The port sequence cleansing generated training file for:  MYOKG-CNTXG
The number of cleansed routes for this OD are:  185
The movement extraction process started for:  MYOKG-CNLYG
The number of routes stitched for this OD are: 34
The movement extraction sucessfully completed for:  MYOKG-CNLYG
The port sequence cleansing generated training file for:  MYOKG-CNLYG
The number of cleansed routes for this OD are:  

The movement extraction sucessfully completed for:  PHMNL-KRBUK
The port sequence cleansing generated training file for:  PHMNL-KRBUK
The number of cleansed routes for this OD are:  207
The movement extraction process started for:  PHMNL-CNQDG
The number of routes stitched for this OD are: 286
The movement extraction sucessfully completed for:  PHMNL-CNQDG
The port sequence cleansing generated training file for:  PHMNL-CNQDG
The number of cleansed routes for this OD are:  277
The movement extraction process started for:  PHMNL-CNSZX
The number of routes stitched for this OD are: 633
The movement extraction sucessfully completed for:  PHMNL-CNSZX
The port sequence cleansing generated training file for:  PHMNL-CNSZX
The number of cleansed routes for this OD are:  586
The movement extraction process started for:  PHMNL-MYOKG
The number of routes stitched for this OD are: 93
The movement extraction sucessfully completed for:  PHMNL-MYOKG
The port sequence cleansing generated training file 

The port sequence cleansing generated training file for:  TWKEL-CNTXG
The number of cleansed routes for this OD are:  144
The movement extraction process started for:  TWKEL-CNQDG
The number of routes stitched for this OD are: 175
The movement extraction sucessfully completed for:  TWKEL-CNQDG
The port sequence cleansing generated training file for:  TWKEL-CNQDG
The number of cleansed routes for this OD are:  169
The movement extraction process started for:  TWKEL-CNLYG
The number of routes stitched for this OD are: 169
The movement extraction sucessfully completed for:  TWKEL-CNLYG
The port sequence cleansing generated training file for:  TWKEL-CNLYG
The number of cleansed routes for this OD are:  167
The movement extraction process started for:  TWKEL-SGSIN
The number of routes stitched for this OD are: 102
The movement extraction sucessfully completed for:  TWKEL-SGSIN
The port sequence cleansing generated training file for:  TWKEL-SGSIN
The number of cleansed routes for this OD are

The movement extraction sucessfully completed for:  CNLYG-CNQDG
The port sequence cleansing generated training file for:  CNLYG-CNQDG
The number of cleansed routes for this OD are:  721
The movement extraction process started for:  CNLYG-USSAV
The number of routes stitched for this OD are: 2
The movement extraction resulted in no training file for:  CNLYG-USSAV
The movement extraction process started for:  IDJKT-SGSIN
The number of routes stitched for this OD are: 1136
The movement extraction sucessfully completed for:  IDJKT-SGSIN
The port sequence cleansing generated training file for:  IDJKT-SGSIN
The number of cleansed routes for this OD are:  1121
The movement extraction process started for:  IDJKT-CNSZX
The number of routes stitched for this OD are: 667
The movement extraction sucessfully completed for:  IDJKT-CNSZX
The port sequence cleansing generated training file for:  IDJKT-CNSZX
The number of cleansed routes for this OD are:  645
The movement extraction process started for:

The port sequence cleansing generated training file for:  USPEF-NLRTM
The number of cleansed routes for this OD are:  59
The movement extraction process started for:  USPEF-DEWVN
The number of routes stitched for this OD are: 2
The movement extraction resulted in no training file for:  USPEF-DEWVN
The movement extraction process started for:  USPEF-GBLON
The number of routes stitched for this OD are: 7
The movement extraction sucessfully completed for:  USPEF-GBLON
The port sequence cleansing generated training file for:  USPEF-GBLON
The number of cleansed routes for this OD are:  7
The movement extraction process started for:  USPEF-USNYC
The number of routes stitched for this OD are: 146
The movement extraction sucessfully completed for:  USPEF-USNYC
The port sequence cleansing generated training file for:  USPEF-USNYC
The number of cleansed routes for this OD are:  140
The movement extraction process started for:  USPEF-USSAV
The number of routes stitched for this OD are: 200
The mo

The movement extraction sucessfully completed for:  USCHS-USORF
The port sequence cleansing generated training file for:  USCHS-USORF
The number of cleansed routes for this OD are:  278
The movement extraction process started for:  USCHS-BEZEE
The number of routes stitched for this OD are: 1
The movement extraction resulted in no training file for:  USCHS-BEZEE
The movement extraction process started for:  BEANR-NLRTM
The number of routes stitched for this OD are: 1335
The movement extraction sucessfully completed for:  BEANR-NLRTM
The port sequence cleansing generated training file for:  BEANR-NLRTM
The number of cleansed routes for this OD are:  1291
The movement extraction process started for:  BEANR-DEWVN
The number of routes stitched for this OD are: 37
The movement extraction sucessfully completed for:  BEANR-DEWVN
The port sequence cleansing generated training file for:  BEANR-DEWVN
The number of cleansed routes for this OD are:  28
The movement extraction process started for:  

The port sequence cleansing generated training file for:  NLRTM-EGPSD
The number of cleansed routes for this OD are:  766
The movement extraction process started for:  NLRTM-LBBEY
The number of routes stitched for this OD are: 36
The movement extraction sucessfully completed for:  NLRTM-LBBEY
The port sequence cleansing generated training file for:  NLRTM-LBBEY
The number of cleansed routes for this OD are:  32
The movement extraction process started for:  NLRTM-ESALG
The number of routes stitched for this OD are: 432
The movement extraction sucessfully completed for:  NLRTM-ESALG
The port sequence cleansing generated training file for:  NLRTM-ESALG
The number of cleansed routes for this OD are:  410
The movement extraction process started for:  NLRTM-GBFXT
The number of routes stitched for this OD are: 458
The movement extraction sucessfully completed for:  NLRTM-GBFXT
The port sequence cleansing generated training file for:  NLRTM-GBFXT
The number of cleansed routes for this OD are: 

The port sequence cleansing generated training file for:  GBLON-BEANR
The number of cleansed routes for this OD are:  109
The movement extraction process started for:  GBLON-DEWVN
The number of routes stitched for this OD are: 1
The movement extraction resulted in no training file for:  GBLON-DEWVN
The movement extraction process started for:  GBLON-NLRTM
The number of routes stitched for this OD are: 125
The movement extraction sucessfully completed for:  GBLON-NLRTM
The port sequence cleansing generated training file for:  GBLON-NLRTM
The number of cleansed routes for this OD are:  109
The movement extraction process started for:  GBLON-MATNG
The number of routes stitched for this OD are: 0
The movement extraction resulted in no training file for:  GBLON-MATNG
The movement extraction process started for:  GBLON-EGPSD
The number of routes stitched for this OD are: 1
The movement extraction resulted in no training file for:  GBLON-EGPSD
The movement extraction process started for:  GBL

The movement extraction process started for:  MATNG-ESALG
The number of routes stitched for this OD are: 364
The movement extraction sucessfully completed for:  MATNG-ESALG
The port sequence cleansing generated training file for:  MATNG-ESALG
The number of cleansed routes for this OD are:  358
The movement extraction process started for:  MATNG-GBFXT
The number of routes stitched for this OD are: 10
The movement extraction sucessfully completed for:  MATNG-GBFXT
The port sequence cleansing generated training file for:  MATNG-GBFXT
The number of cleansed routes for this OD are:  6
The movement extraction process started for:  MATNG-BEANR
The number of routes stitched for this OD are: 30
The movement extraction sucessfully completed for:  MATNG-BEANR
The port sequence cleansing generated training file for:  MATNG-BEANR
The number of cleansed routes for this OD are:  25
The movement extraction process started for:  MATNG-DEHAM
The number of routes stitched for this OD are: 18
The movement

The movement extraction sucessfully completed for:  ESALG-GBFXT
The port sequence cleansing generated training file for:  ESALG-GBFXT
The number of cleansed routes for this OD are:  180
The movement extraction process started for:  ESALG-BEANR
The number of routes stitched for this OD are: 528
The movement extraction sucessfully completed for:  ESALG-BEANR
The port sequence cleansing generated training file for:  ESALG-BEANR
The number of cleansed routes for this OD are:  502
The movement extraction process started for:  ESALG-DEHAM
The number of routes stitched for this OD are: 329
The movement extraction sucessfully completed for:  ESALG-DEHAM
The port sequence cleansing generated training file for:  ESALG-DEHAM
The number of cleansed routes for this OD are:  297
The movement extraction process started for:  ESALG-DEBRV
The number of routes stitched for this OD are: 242
The movement extraction sucessfully completed for:  ESALG-DEBRV
The port sequence cleansing generated training file

The port sequence cleansing generated training file for:  DEBRV-GBFXT
The number of cleansed routes for this OD are:  228
The movement extraction process started for:  DEBRV-FRLEH
The number of routes stitched for this OD are: 319
The movement extraction sucessfully completed for:  DEBRV-FRLEH
The port sequence cleansing generated training file for:  DEBRV-FRLEH
The number of cleansed routes for this OD are:  317
The movement extraction process started for:  DEBRV-USNYC
The number of routes stitched for this OD are: 153
The movement extraction sucessfully completed for:  DEBRV-USNYC
The port sequence cleansing generated training file for:  DEBRV-USNYC
The number of cleansed routes for this OD are:  149
The movement extraction process started for:  DEBRV-USBOS
The number of routes stitched for this OD are: 47
The movement extraction sucessfully completed for:  DEBRV-USBOS
The port sequence cleansing generated training file for:  DEBRV-USBOS
The number of cleansed routes for this OD are:

The port sequence cleansing generated training file for:  BEZEE-FRLEH
The number of cleansed routes for this OD are:  76
The movement extraction process started for:  BEZEE-USNYC
The number of routes stitched for this OD are: 18
The movement extraction sucessfully completed for:  BEZEE-USNYC
The port sequence cleansing generated training file for:  BEZEE-USNYC
The number of cleansed routes for this OD are:  18
The movement extraction process started for:  BEZEE-USBOS
The number of routes stitched for this OD are: 0
The movement extraction resulted in no training file for:  BEZEE-USBOS
The movement extraction process started for:  BEZEE-USORF
The number of routes stitched for this OD are: 0
The movement extraction resulted in no training file for:  BEZEE-USORF
The movement extraction process started for:  BEZEE-USSAV
The number of routes stitched for this OD are: 17
The movement extraction sucessfully completed for:  BEZEE-USSAV
The port sequence cleansing generated training file for:  

The movement extraction sucessfully completed for:  CNSHG-PKKHI
The port sequence cleansing generated training file for:  CNSHG-PKKHI
The number of cleansed routes for this OD are:  176
The movement extraction process started for:  CNSHG-LKCMB
The number of routes stitched for this OD are: 276
The movement extraction sucessfully completed for:  CNSHG-LKCMB
The port sequence cleansing generated training file for:  CNSHG-LKCMB
The number of cleansed routes for this OD are:  243
The movement extraction process started for:  CNSHG-AUSYD
The number of routes stitched for this OD are: 206
The movement extraction sucessfully completed for:  CNSHG-AUSYD
The port sequence cleansing generated training file for:  CNSHG-AUSYD
The number of cleansed routes for this OD are:  197
The movement extraction process started for:  CNSHG-TWKHH
The number of routes stitched for this OD are: 1276
The movement extraction sucessfully completed for:  CNSHG-TWKHH
The port sequence cleansing generated training fil

The movement extraction sucessfully completed for:  MYOKG-AUSYD
The port sequence cleansing generated training file for:  MYOKG-AUSYD
The number of cleansed routes for this OD are:  140
The movement extraction process started for:  MYOKG-TWKHH
The number of routes stitched for this OD are: 647
The movement extraction sucessfully completed for:  MYOKG-TWKHH
The port sequence cleansing generated training file for:  MYOKG-TWKHH
The number of cleansed routes for this OD are:  606
The movement extraction process started for:  MYOKG-CNXMG
The number of routes stitched for this OD are: 340
The movement extraction sucessfully completed for:  MYOKG-CNXMG
The port sequence cleansing generated training file for:  MYOKG-CNXMG
The number of cleansed routes for this OD are:  331
The movement extraction process started for:  MYOKG-CNNGB
The number of routes stitched for this OD are: 545
The movement extraction sucessfully completed for:  MYOKG-CNNGB
The port sequence cleansing generated training file

The number of routes stitched for this OD are: 3267
The movement extraction sucessfully completed for:  TWKHH-CNSZX
The port sequence cleansing generated training file for:  TWKHH-CNSZX
The number of cleansed routes for this OD are:  3196
The movement extraction process started for:  TWKHH-CNNGB
The number of routes stitched for this OD are: 686
The movement extraction sucessfully completed for:  TWKHH-CNNGB
The port sequence cleansing generated training file for:  TWKHH-CNNGB
The number of cleansed routes for this OD are:  642
The movement extraction process started for:  CNXMG-CNSHG
The number of routes stitched for this OD are: 1037
The movement extraction sucessfully completed for:  CNXMG-CNSHG
The port sequence cleansing generated training file for:  CNXMG-CNSHG
The number of cleansed routes for this OD are:  1000
The movement extraction process started for:  CNXMG-AUSYD
The number of routes stitched for this OD are: 81
The movement extraction sucessfully completed for:  CNXMG-AUS

The port sequence cleansing generated training file for:  USCHS-GBSOU
The number of cleansed routes for this OD are:  37
The movement extraction process started for:  MXZLO-SGSIN
The number of routes stitched for this OD are: 56
The movement extraction sucessfully completed for:  MXZLO-SGSIN
The port sequence cleansing generated training file for:  MXZLO-SGSIN
The number of cleansed routes for this OD are:  50
The movement extraction process started for:  PRSJU-USJAX
The number of routes stitched for this OD are: 96
The movement extraction sucessfully completed for:  PRSJU-USJAX
The port sequence cleansing generated training file for:  PRSJU-USJAX
The number of cleansed routes for this OD are:  96
The movement extraction process started for:  USJAX-PRSJU
The number of routes stitched for this OD are: 96
The movement extraction sucessfully completed for:  USJAX-PRSJU
The port sequence cleansing generated training file for:  USJAX-PRSJU
The number of cleansed routes for this OD are:  96


The number of routes stitched for this OD are: 371
The movement extraction sucessfully completed for:  SGSIN-BEANR
The port sequence cleansing generated training file for:  SGSIN-BEANR
The number of cleansed routes for this OD are:  342
The movement extraction process started for:  SGSIN-KRPUS
The number of routes stitched for this OD are: 377
The movement extraction sucessfully completed for:  SGSIN-KRPUS
The port sequence cleansing generated training file for:  SGSIN-KRPUS
The number of cleansed routes for this OD are:  349
The movement extraction process started for:  USLGB-SGSIN
The number of routes stitched for this OD are: 240
The movement extraction sucessfully completed for:  USLGB-SGSIN
The port sequence cleansing generated training file for:  USLGB-SGSIN
The number of cleansed routes for this OD are:  216
The movement extraction process started for:  USLGB-NZAKL
The number of routes stitched for this OD are: 0
The movement extraction resulted in no training file for:  USLGB-N

The movement extraction sucessfully completed for:  BEANR-CLSAI
The port sequence cleansing generated training file for:  BEANR-CLSAI
The number of cleansed routes for this OD are:  84
The movement extraction process started for:  DEBRV-USCHI
The number of routes stitched for this OD are: 0
The movement extraction resulted in no training file for:  DEBRV-USCHI
The movement extraction process started for:  SGSIN-USNYC
The number of routes stitched for this OD are: 316
The movement extraction sucessfully completed for:  SGSIN-USNYC
The port sequence cleansing generated training file for:  SGSIN-USNYC
The number of cleansed routes for this OD are:  290
The movement extraction process started for:  USLAX-HKHKG
The number of routes stitched for this OD are: 1
The movement extraction resulted in no training file for:  USLAX-HKHKG
The movement extraction process started for:  USORF-BRSAO
The number of routes stitched for this OD are: 0
The movement extraction resulted in no training file for:

The port sequence cleansing generated training file for:  ITGOA-AUSYD
The number of cleansed routes for this OD are:  14
The movement extraction process started for:  THLCH-AUMEL
The number of routes stitched for this OD are: 26
The movement extraction sucessfully completed for:  THLCH-AUMEL
The port sequence cleansing generated training file for:  THLCH-AUMEL
The number of cleansed routes for this OD are:  23
The movement extraction process started for:  HKHKG-AUSYD
The number of routes stitched for this OD are: 0
The movement extraction resulted in no training file for:  HKHKG-AUSYD
The movement extraction process started for:  ITGOA-AUBNE
The number of routes stitched for this OD are: 1
The movement extraction resulted in no training file for:  ITGOA-AUBNE
The movement extraction process started for:  CNSHA-AUMEL
The number of routes stitched for this OD are: 199
The movement extraction sucessfully completed for:  CNSHA-AUMEL
The port sequence cleansing generated training file for: 

The movement extraction sucessfully completed for:  CNSHG-SUILK
The port sequence cleansing generated training file for:  CNSHG-SUILK
The number of cleansed routes for this OD are:  190
The movement extraction process started for:  CNSHK-CLSAI
The number of routes stitched for this OD are: 95
The movement extraction sucessfully completed for:  CNSHK-CLSAI
The port sequence cleansing generated training file for:  CNSHK-CLSAI
The number of cleansed routes for this OD are:  88
The movement extraction process started for:  CNYTN-HRRJK
The number of routes stitched for this OD are: 2
The movement extraction resulted in no training file for:  CNYTN-HRRJK
The movement extraction process started for:  CNBJO-FRLEH
The number of routes stitched for this OD are: 0
The movement extraction resulted in no training file for:  CNBJO-FRLEH
The movement extraction process started for:  DEHAM-AUFRE
The number of routes stitched for this OD are: 29
The movement extraction sucessfully completed for:  DEHAM

The movement extraction sucessfully completed for:  CNSHK-AUMEL
The port sequence cleansing generated training file for:  CNSHK-AUMEL
The number of cleansed routes for this OD are:  164
The movement extraction process started for:  CNSHG-DEBRV
The number of routes stitched for this OD are: 3
The movement extraction resulted in no training file for:  CNSHG-DEBRV
The movement extraction process started for:  THLCH-AUSYD
The number of routes stitched for this OD are: 28
The movement extraction sucessfully completed for:  THLCH-AUSYD
The port sequence cleansing generated training file for:  THLCH-AUSYD
The number of cleansed routes for this OD are:  24
The movement extraction process started for:  CNYTN-ITVCE
The number of routes stitched for this OD are: 0
The movement extraction resulted in no training file for:  CNYTN-ITVCE
The movement extraction process started for:  CNZSN-SEGOT
The number of routes stitched for this OD are: 0
The movement extraction resulted in no training file for: 

The port sequence cleansing generated training file for:  DEHAM-AUSYD
The number of cleansed routes for this OD are:  33
The movement extraction process started for:  TWKHH-AUBNE
The number of routes stitched for this OD are: 162
The movement extraction sucessfully completed for:  TWKHH-AUBNE
The port sequence cleansing generated training file for:  TWKHH-AUBNE
The number of cleansed routes for this OD are:  155
The movement extraction process started for:  KRPUS-ITGOA
The number of routes stitched for this OD are: 2
The movement extraction resulted in no training file for:  KRPUS-ITGOA
The movement extraction process started for:  CNWHI-DEHAM
The number of routes stitched for this OD are: 0
The movement extraction resulted in no training file for:  CNWHI-DEHAM
The movement extraction process started for:  ITTRS-AUSYD
The number of routes stitched for this OD are: 0
The movement extraction resulted in no training file for:  ITTRS-AUSYD
The movement extraction process started for:  CNBJ

The number of routes stitched for this OD are: 523
The movement extraction sucessfully completed for:  CNSHG-THLCH
The port sequence cleansing generated training file for:  CNSHG-THLCH
The number of cleansed routes for this OD are:  505
The movement extraction process started for:  CNSHG-VNHPH
The number of routes stitched for this OD are: 365
The movement extraction sucessfully completed for:  CNSHG-VNHPH
The port sequence cleansing generated training file for:  CNSHG-VNHPH
The number of cleansed routes for this OD are:  363
The movement extraction process started for:  CNSHG-SESTO
The number of routes stitched for this OD are: 0
The movement extraction resulted in no training file for:  CNSHG-SESTO
The movement extraction process started for:  CNNGB-CLVAP
The number of routes stitched for this OD are: 0
The movement extraction resulted in no training file for:  CNNGB-CLVAP
The movement extraction process started for:  CNBJO-MYPKG
The number of routes stitched for this OD are: 0
The m

The port sequence cleansing resulted in no training file for:  KRPUS-CLSAI
The movement extraction process started for:  CNYTN-RULED
The number of routes stitched for this OD are: 0
The movement extraction resulted in no training file for:  CNYTN-RULED
The movement extraction process started for:  HKHKG-NLRTM
The number of routes stitched for this OD are: 0
The movement extraction resulted in no training file for:  HKHKG-NLRTM
The movement extraction process started for:  CNZSN-FIHEL
The number of routes stitched for this OD are: 0
The movement extraction resulted in no training file for:  CNZSN-FIHEL
The movement extraction process started for:  HKHKG-PLGDN
The number of routes stitched for this OD are: 0
The movement extraction resulted in no training file for:  HKHKG-PLGDN
The movement extraction process started for:  CNNGB-HRRJK
The number of routes stitched for this OD are: 58
The movement extraction sucessfully completed for:  CNNGB-HRRJK
The port sequence cleansing generated tra

In [63]:
vessel_movements_df.to_feather("vessel_movements_with_hexes_sorted_mapped_ports.feather")