## Extract Congestion data via pythonsdk

This notebook will show how to extract VLCC congestion data via VoyageSearchEnriched endpoint, and then aggregate the data to show congestion level in different regions.

### 1. Import Libraries

In [1]:
import vortexasdk as v
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
from dateutil.relativedelta import relativedelta
import time
import plotly.express as px
import matplotlib.pyplot as plt

  from pandas.core import (


### 2. Extract Global congestion event via VoyageSearchEnriched (excl bunkering tanker)

In [2]:
start_date = datetime.today() - relativedelta(months = 1)
end_date = datetime.today()

In [3]:
port_df = v.Geographies().search(filter_layer = 'port').to_df(columns = 'all')
port_df.head()

You should consider upgrading via the 'pip install vortexasdk --upgrade' command.


Unnamed: 0,id,name,layer,leaf,parent,exclusion_rule,ref_type,hierarchy,pos,aliases,tags
0,d38a8f7bf8ed422b439ad5270be65b60b964bed9568936...,A Pobra Do Caraminal [ES],[port],True,"[{'name': 'Northwest Europe (NWE)', 'layer': [...","[{'name': 'A Pobra Do Caraminal [ES]', 'layer'...",geography,"[{'label': 'A Pobra Do Caraminal [ES]', 'layer...","[-8.9387512207, 42.6076514714]",[],"{'importProductTags': [], 'exportProductTags':..."
1,00723089c249c618c5a2c720875eb358b1e884b1e55016...,ABOT Al Basrah Oil Terminal [IQ],[port],False,"[{'name': 'Iraq', 'layer': ['country'], 'id': ...","[{'name': 'ABOT Al Basrah Oil Terminal [IQ]', ...",geography,"[{'label': 'ABOT Al Basrah Oil Terminal [IQ]',...","[48.8202900013, 29.6744388004]",[],"{'importProductTags': [], 'exportProductTags':..."
2,4880a5b66e255014dc4be726ed34f7299f282481dbd063...,Aabenraa [DK],[port],False,"[{'name': 'Denmark', 'layer': ['country'], 'id...","[{'name': 'Aabenraa [DK]', 'layer': ['port'], ...",geography,"[{'label': 'Aabenraa [DK]', 'layer': 'port', '...","[9.4387066106, 55.0324657404]",[],"{'importProductTags': ['Chemicals', 'Oil'], 'e..."
3,5df67f654c8ec7f87376f806ca3fbe748ce572e9bdc211...,Aalborg [DK],[port],False,"[{'name': 'UK Cont', 'layer': ['alternative_re...","[{'name': 'Aalborg [DK]', 'layer': ['port'], '...",geography,"[{'label': 'Aalborg [DK]', 'layer': 'port', 'i...","[9.976109057, 57.0631051971]",[],"{'importProductTags': ['Chemicals', 'Oil'], 'e..."
4,ea11a733224f84345b6d12ca8bb219b48e55e52a97debd...,Aappilattoq [GL],[port],False,"[{'name': 'Greenland', 'layer': ['country'], '...","[{'name': 'Aappilattoq [GL]', 'layer': ['port'...",geography,"[{'label': 'Aappilattoq [GL]', 'layer': 'port'...","[-55.6093936026, 72.8825631558]",[],"{'importProductTags': [], 'exportProductTags':..."


In [4]:
## Extract voyages that have congestion status
df = v.VoyagesSearchEnriched().search(
        time_min = start_date,
        time_max = end_date,
        vessels = 'oil_vlcc',
        movement_status = "congestion",
    ).to_list()

### 3. Helper Function

In [6]:
def extract_element_from_list(l1,layer = None):
    if l1 == None:
        return None
    if len(l1)>0:
        if layer == None:
            return l1[0]
        elif layer == layer:
            return l1[0].label
        else:
            pass
    else:
        return None
    
def extract_element_from_list(l1,layer = None):
    if l1 == None:
        return None
    if len(l1)==1:
        if layer == None:
            return l1[0]
        elif layer == l1[0].layer:
            return l1[0].label
    elif len(l1)>1:
        if layer == None:
            return [i.label for i in l1][0]
        else:
            return [i.label for i in l1 if i.layer == layer][0]
    else:
        return None
        
def calculating_congestion_time(list_of_voyage):
    voyage_rows = []
    idx = 0
    for voyage in list_of_voyage:
        cargo_events = pd.DataFrame(i.__dict__ for i in voyage.events)
        cargo_events = cargo_events[cargo_events['event_type']=='cargo']
        cargo_events['end_timestamp'].fillna((datetime.now()+timedelta(days = 1)).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), inplace = True)
        
        if len(voyage.latest_product_details) == 0:
            latest_product_details = 'None'
        else: 
            latest_product_details = [i.label for i in voyage.latest_product_details[0] if i.layer == 'group'][0]
        record_row = {}
        for event in voyage.events:
            if event.activity == 'congestion':
                record_row = {
                  #'voyage_id': voyage.voyage_id,
                  #'event_id': event.event_id,
                  'vessel_id': voyage.vessel.id,
                  'vessel name': voyage.vessel.name,
                  'vessel_imo': voyage.vessel.imo,
                  'vessel_class': voyage.vessel.vessel_class,
                  #'cargo_movement_id': event.cargo_movement_id,
                  'entry_timestamp': event.start_timestamp,
                  'exit_timestamp': event.end_timestamp,
                  'location_id': event.location_id,
                  'location_details':event.waiting_event_target_geography_details,
                  'location': extract_element_from_list(event.waiting_event_target_geography_details, layer = 'port'),
                    'voyage_status':voyage.voyage_status,
                    'latest_products_details':latest_product_details
                }
                if record_row['exit_timestamp'] == None:
                    record_row['exit_timestamp'] = datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%fZ")
                    print(f"{record_row['vessel name']} currently in congestion")
                filtered_cargo_event = cargo_events[(cargo_events['start_timestamp'] < record_row['entry_timestamp'])
                                                   &(cargo_events['end_timestamp'] > record_row['exit_timestamp'])].reset_index(drop = True)
                if 'quantity_barrels' not in filtered_cargo_event.columns:
                    cargo_type = 'None'
                    #cargo_category = 'None'
                    quantity_sum = 0
                elif len(filtered_cargo_event)==0:
                    cargo_type = 'None'
                    cargo_category = 'None'
                    quantity_sum = 0
                else:
                    object_list = filtered_cargo_event.loc[0,'product_details']
                    cargo_type = [obj.label for obj in object_list if obj.layer =='group'][0]
                    #cargo_category = [obj.label for obj in object_list if obj.layer =='category'][0]
                    
                    # To-change unit - quantity_barrels, quantity_tonnes
                    quantity_sum = filtered_cargo_event['quantity_barrels'].sum()
                
                # cargo origin
                if len(filtered_cargo_event)>0:
                    object_list = filtered_cargo_event.loc[0,'cargo_origin_details']
                    origin_port = [obj.label for obj in object_list if obj.layer =='port']
                    origin_country = [obj.label for obj in object_list if obj.layer =='country']

                    # cargo dest
                    object_list = filtered_cargo_event.loc[0,'cargo_destination_details']
                    dest_port = [obj.label for obj in object_list if obj.layer =='port']
                    dest_country = [obj.label for obj in object_list if obj.layer =='country']
                else:
                    origin_port,origin_country,dest_port,dest_country = None,None,None,None
                
                record_row['origin_port'] = extract_element_from_list(origin_port)
                record_row['origin_country'] = extract_element_from_list(origin_country)
                record_row['dest_port'] = extract_element_from_list(dest_port)
                record_row['dest_country'] = extract_element_from_list(dest_country)
                record_row['quantity'] = quantity_sum
                record_row['product'] = cargo_type
                #record_row['category'] = cargo_category
                voyage_rows.append(record_row)
        idx+=1
    voyage_df = pd.DataFrame(voyage_rows)
    return voyage_df

def waiting_time_by_location(df, start_date, end_date, ENTRY_TIME_COL, EXIT_TIME_COL, method, location_col):
    """
    Compute the average/maximum/count waiting time per day, split by location.
    
    Parameters
    ----------
    df : DataFrame
        A DataFrame containing the congestion events
    start_date : str
        The start date of the period
    end_date : str
        The end date of the period 
    ENTRY_TIME_COL : str
        The column name of the entry time
    EXIT_TIME_COL : str
        The column name of the exit time
    method : str
        The method to compute the waiting time. It can be either 'average', 'maximum', or 'count'
    location_col : str
        The column name of the location
    
    Returns
    -------
    result_df: DataFrame
        A DataFrame containing the average/maximum/count waiting time per day for each location
    output_df: DataFrame
        A DataFrame containing the detailed waiting times for each location
    """
    
    grouped = df.groupby(location_col)
    result_list = []
    output_list = []
    
    for location, group in grouped:
        idx = pd.date_range(start=start_date, end=end_date, freq="D")
        ts = pd.DataFrame(idx, columns=['Date'])
        ts[f'{method}_waiting_time'] = 0
        output_df = []
        
        for i in range(len(ts)):
            date = ts['Date'][i]
            waiting_time_list = []
            for j in range(len(group)):
                if (date >= group[ENTRY_TIME_COL].iloc[j]) & (date <= group[EXIT_TIME_COL].iloc[j]):
                    waiting_time = (date - group[ENTRY_TIME_COL].iloc[j]).total_seconds() / (3600 * 24)
                    waiting_time_list.append(waiting_time)
                    output = group.iloc[j].copy()
                    output['current_date'] = date
                    output['current_waiting_time'] = waiting_time
                    output_df.append(output)
            
            if len(waiting_time_list) > 0:
                if method == 'average':
                    ts.at[i, f'{method}_waiting_time'] = np.mean(waiting_time_list)
                elif method == 'maximum':
                    ts.at[i, f'{method}_waiting_time'] = np.max(waiting_time_list)
                elif method == 'count':
                    ts.at[i, f'{method}_waiting_time'] = len(waiting_time_list)
            else:
                ts.at[i, f'{method}_waiting_time'] = 0
        
        ts['Location'] = location
        result_list.append(ts)
        output_list.append(pd.DataFrame(output_df))
    
    result_df = pd.concat(result_list)
    output_df = pd.concat(output_list)
    
    return result_df, output_df

### 4. Data Processing

In [7]:
example = calculating_congestion_time(df)
example['entry_timestamp'] = pd.to_datetime(example['entry_timestamp']).dt.tz_localize(None)
example['exit_timestamp'] = pd.to_datetime(example['exit_timestamp']).dt.tz_localize(None)
example['waiting_time (hrs)'] = (example['exit_timestamp'] - example['entry_timestamp']).dt.total_seconds()/3600
example['waiting_time (days)'] = (example['exit_timestamp'] - example['entry_timestamp']).dt.total_seconds()/(3600*24)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  cargo_events['end_timestamp'].fillna((datetime.now()+timedelta(days = 1)).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), inplace = True)


YUAN KUN YANG currently in congestion
V. PROSPERITY currently in congestion
UNIVERSAL HONOR currently in congestion
TRIKWONG VENTURE currently in congestion
SHADEN currently in congestion
SEAWAYS LIBERTY currently in congestion
SEARACER currently in congestion
SEA MAJESTY currently in congestion
SANAM currently in congestion
PHOENIX JAMNAGAR currently in congestion
OCEANIC FORTUNE currently in congestion
NEW RENOWN currently in congestion
NEW ENTERPRISE currently in congestion
NEW CHAMPION currently in congestion
MARAN ARIADNE currently in congestion
MARAN ARETE currently in congestion
KITSOS currently in congestion
JAHAM currently in congestion
ILMA currently in congestion
HAKKAISAN currently in congestion
FRONT DEE currently in congestion
EVGENIA I currently in congestion
EAGLE TRADER currently in congestion
DHT LION currently in congestion
DESH VAIBHAV currently in congestion
CITY OF TOKYO currently in congestion
BRIGHT PIONEER currently in congestion
ALMI ATLAS currently in congest

### 5. Compute current congestion level (per location) in each day

In [8]:
result_df, output_df = waiting_time_by_location(example, start_date, end_date, 'entry_timestamp', 'exit_timestamp', 'average', 'location')

  ts.at[i, f'{method}_waiting_time'] = np.mean(waiting_time_list)
  ts.at[i, f'{method}_waiting_time'] = np.mean(waiting_time_list)
  ts.at[i, f'{method}_waiting_time'] = np.mean(waiting_time_list)
  ts.at[i, f'{method}_waiting_time'] = np.mean(waiting_time_list)
  ts.at[i, f'{method}_waiting_time'] = np.mean(waiting_time_list)
  ts.at[i, f'{method}_waiting_time'] = np.mean(waiting_time_list)
  ts.at[i, f'{method}_waiting_time'] = np.mean(waiting_time_list)
  ts.at[i, f'{method}_waiting_time'] = np.mean(waiting_time_list)
  ts.at[i, f'{method}_waiting_time'] = np.mean(waiting_time_list)
  ts.at[i, f'{method}_waiting_time'] = np.mean(waiting_time_list)
  ts.at[i, f'{method}_waiting_time'] = np.mean(waiting_time_list)
  ts.at[i, f'{method}_waiting_time'] = np.mean(waiting_time_list)
  ts.at[i, f'{method}_waiting_time'] = np.mean(waiting_time_list)
  ts.at[i, f'{method}_waiting_time'] = np.mean(waiting_time_list)
  ts.at[i, f'{method}_waiting_time'] = np.mean(waiting_time_list)
  ts.at[i,

In [9]:
result_df[result_df['Location'] == 'Zhoushan [CN]'].tail(20)

Unnamed: 0,Date,average_waiting_time,Location
12,2024-08-29 15:34:03.739079,10.228735,Zhoushan [CN]
13,2024-08-30 15:34:03.739079,5.728487,Zhoushan [CN]
14,2024-08-31 15:34:03.739079,0.488064,Zhoushan [CN]
15,2024-09-01 15:34:03.739079,0.912445,Zhoushan [CN]
16,2024-09-02 15:34:03.739079,1.443238,Zhoushan [CN]
17,2024-09-03 15:34:03.739079,1.824025,Zhoushan [CN]
18,2024-09-04 15:34:03.739079,1.800594,Zhoushan [CN]
19,2024-09-05 15:34:03.739079,1.417506,Zhoushan [CN]
20,2024-09-06 15:34:03.739079,2.686392,Zhoushan [CN]
21,2024-09-07 15:34:03.739079,4.073994,Zhoushan [CN]


### 6. [Optional] Data Visualization

### Normalize the output and visualize the congestion level using heatmap

In [10]:
# normalise the waiting time by  in each location per location and date

def normalise_waiting_time(df, method, location_col):
    '''
    Normalise the waiting time by the maximum waiting time in each location per location and date
    
    Parameters
    ----------
    df : DataFrame
        A DataFrame containing the congestion events
    method : str
        The method to compute the waiting time. It can be either 'average', 'maximum', or 'count'
    location_col : str
        The column name of the location
        
    Returns
    -------
    df : DataFrame
        A DataFrame containing the normalised waiting time
    '''
    max_waiting_time = df.groupby([location_col])[f'{method}_waiting_time'].transform('max')
    mean_waiting_time = df.groupby([location_col])[f'{method}_waiting_time'].transform('mean')
    std_waiting_time = df.groupby([location_col])[f'{method}_waiting_time'].transform('std')
    df['mean_waiting_time'] = mean_waiting_time
    df[f'normalised_{method}_waiting_time'] = (df[f'{method}_waiting_time']) / max_waiting_time
    return df
    

In [11]:
normalised_df = normalise_waiting_time(result_df, 'average', 'Location')

In [12]:
normalised_df

Unnamed: 0,Date,average_waiting_time,Location,mean_waiting_time,normalised_average_waiting_time
0,2024-08-17 15:34:03.739079,20.375662,ABOT Al Basrah Oil Terminal [IQ],25.449300,0.313803
1,2024-08-18 15:34:03.739079,20.651131,ABOT Al Basrah Oil Terminal [IQ],25.449300,0.318046
2,2024-08-19 15:34:03.739079,13.986178,ABOT Al Basrah Oil Terminal [IQ],25.449300,0.215399
3,2024-08-20 15:34:03.739079,21.566183,ABOT Al Basrah Oil Terminal [IQ],25.449300,0.332138
4,2024-08-21 15:34:03.739079,22.003637,ABOT Al Basrah Oil Terminal [IQ],25.449300,0.338875
...,...,...,...,...,...
27,2024-09-13 15:34:03.739079,0.000000,Zirku Island [AE],0.159252,0.000000
28,2024-09-14 15:34:03.739079,0.000000,Zirku Island [AE],0.159252,0.000000
29,2024-09-15 15:34:03.739079,0.000000,Zirku Island [AE],0.159252,0.000000
30,2024-09-16 15:34:03.739079,0.479928,Zirku Island [AE],0.159252,0.263653


In [13]:
# Merge with the port_df to get the coordinates of the locations

merged_df = pd.merge(result_df, port_df, left_on='Location', right_on='name', how='left')

# Split pos into latitude and longitude
merged_df[['longitude','latitude']] = pd.DataFrame(merged_df['pos'].tolist(), index=merged_df.index)

# Select desired output columns
cols = ['Date', 'Location', 'average_waiting_time','normalised_average_waiting_time','mean_waiting_time', 'latitude', 'longitude']
merged_df = merged_df[cols]

In [16]:
# plot animated chart with plotly on the map of the world with the normalised average waiting time and date
min_value = merged_df['normalised_average_waiting_time'].min()
max_value = merged_df['normalised_average_waiting_time'].max()
color_scale = [[0, 'green'], [1, 'red']]
fig = px.scatter_geo(merged_df, lat='latitude', lon='longitude', color='normalised_average_waiting_time', animation_frame='Date', projection='equirectangular',
                     color_continuous_scale=color_scale,  # You can choose any color scale you prefer
                    range_color=[min_value, max_value])

# Add title
fig.update_layout(
    title_text = 'Global congestion level by location (VLCC only)'
)
fig.show()