In [1]:
import pandas as pd
from pathlib import Path

PATH = Path.cwd().parent.joinpath('data')

In [2]:
# Import cleaned vessel data

vessels  = pd.read_excel(PATH.joinpath('processed', 'vessels_summary.xlsx'), sheet_name='vessel_check')

# Filter for vessels that have a final check in/out
selection = vessels[vessels['final check in/out']==True].copy()
selection[['start_date', 'end_date_equasis', 'end_date_checked']] = selection[['start_date', 'end_date_equasis', 'end_date_checked']].apply(pd.to_datetime)

In [18]:
# Import latest KSE data

kse1 = pd.read_excel(PATH.joinpath('processed', 'kse_shadowfleetv2.xlsx'), skiprows=1)
kse1.imo = kse1.imo.astype('int64')
kse1['tanker_type'] = 'oil tanker'
kse2 = pd.read_excel(PATH.joinpath('processed', 'kse_shadowfleetv2.xlsx'), sheet_name='Oli product.All', skiprows=4)
kse2['tanker_type'] = 'oil product tanker'
kse2.columns = kse1.columns


kse = pd.concat([kse1, kse2])
kse.dropna(subset=['imo'], inplace=True)
kse.imo = kse.imo.astype('int64')
kse.build = kse.build.astype('int64')
len(kse)

798

In [19]:
kse.to_csv(PATH.joinpath('processed', 'kse_shadowfleetv2.csv'), index=False)

## Get company flow

In [6]:
# Get imos from checked vessels

imos = selection.imo.unique()

# Import company data

companies = pd.read_excel(PATH.joinpath('processed', 'vessels_summary.xlsx'), sheet_name='companies')

# Filter relevant companies
companies = companies[(companies.imo.isin(imos)) & (companies.role=='Registered owner') & (companies.start_date > '2022-03-01')].copy()

companies.start_date = pd.to_datetime(companies.start_date)

# Merge with checked data data

df = pd.merge(companies, 
            selection[['imo', 'jurisdiction', 'ubo_jurisdiction', 'end_date_checked']],
            on='imo',
            how='left')

# Create columns with next jurisdiction

# Merge the dataframe with itself to get the next jurisdiction and next ubo_jurisdiction
df = pd.merge(df, df[['imo', 'start_date', 'jurisdiction', 'ubo_jurisdiction']], 
              left_on=['imo', 'end_date'], 
              right_on=['imo', 'start_date'], 
              suffixes=('', '_next'), 
              how='left')

# Fill the next_jurisdiction and next_ubo_jurisdiction with the country if there is no end_date match
df['next_jurisdiction'] = df['jurisdiction_next'].fillna(df['country'])
df['next_ubo_jurisdiction'] = df['ubo_jurisdiction_next'].fillna(df['country'])

# Drop the unnecessary columns
df.drop(columns=['start_date_next', 'jurisdiction_next', 'ubo_jurisdiction_next'], inplace=True)

df.head()

Unnamed: 0,imo,company,role,start_date,end_date,company_imo,address,country,last_update,jurisdiction,ubo_jurisdiction,end_date_checked,next_jurisdiction,next_ubo_jurisdiction
0,9224283,STRATEGIC ALLIANCE LTD,Registered owner,2023-05-14,2024-04-11,6390100.0,"Trust Company Complex, Ajeltake Road, Ajeltake...",Marshall Islands,2024-10-08,Norway,Bermuda,2022-07-19,Norway,Bermuda
1,9224283,WORTHALIANCE LTD,Registered owner,2024-04-11,NaT,6478345.0,"Trust Company Complex, Ajeltake Road, Ajeltake...",Marshall Islands,2024-10-08,Norway,Bermuda,2022-07-19,Marshall Islands,Marshall Islands
2,9231468,CRODO LTD,Registered owner,2024-04-17,NaT,6471745.0,"Unit 2508A, 25th Floor, Bank of America Tower,...",China,2024-10-08,Liberia,Greece,2023-02-17,China,China
3,9232888,ALVIDO CHARTERING LTD,Registered owner,2023-02-03,NaT,6376415.0,"OST Shipmanagement' Srl Apartment 36, Hincesti...",Moldova,2024-10-01,Liberia,Greece,2023-02-03,Moldova,Moldova
4,9232931,TAIHONG SHIPPING LTD,Registered owner,2023-05-15,NaT,6399616.0,"Room 1602, 16th Floor, Lucky Centre, 165-171, ...",China,2024-10-08,Marshall Islands,Greece,2023-05-15,China,China


In [7]:
df[['ubo_jurisdiction', 'next_ubo_jurisdiction']].value_counts().reset_index().to_csv(PATH.joinpath('processed', 'ubo_jurisdiction_changes.csv'), index=False)

## Compare vessels with lloyds list data

In [8]:
lloyds = pd.read_csv(PATH.joinpath('processed', 'lloydslist_shadowfleet.csv'))

In [11]:
len(set(lloyds.IMO).difference(set(vessels.imo)))

325

In [28]:
kse.columns

Index(['earliest_sanction_date', 'imo', 'vessel_name', 'size', 'build',
       'flag_01-12-24', 'actual_insurance_01-12-24', 'ship_manager',
       'shadow_fleet', '1-2023', '2-2023', '3-2023', '4-2023', '5-2023',
       '6-2023', '7-2023', '8-2023', '9-2023', '10-2023', '11-2023', '12-2023',
       '1-2024', '2-2024', '3-2024', '4-2024', '5-2024', '6-2024', '7-2024',
       '8-2024', '9-2024', '10-2024', 'total_mln_barrels', 'ship_manager_1',
       'ship_manager_2', 'ship_manager_3', 'ship_manager_4', 'Sanctions time',
       'IMO', 'Vessel's name', 'Tanker's size', 'Build',
       'Actual Flag (25.11.2024)', 'Actual 'IG' P&I insurance (03.12.2024)',
       'Actual Ship manager (03.12.2024)', 'Shadow Fleet?', 'Jan 23', 'Feb 23',
       'Mar 23', 'Apr 23', 'May 23', 'Jun 23', 'Jul 23', 'Aug 23', 'Sep 23',
       'Oct 23', 'Nov 23', 'Dec 23', 'Jan 24', 'Feb 24', 'Mar 24', 'Apr 24',
       'May 24', 'Jun 24', 'Jul 24', 'Aug 24', 'Sep 24', 'Oct 24',
       'Total, mln barrels', 'Ship man

In [32]:
kse[~kse.imo.isin(list(set(lloyds.IMO)))].groupby('build').size()

build
2002     1
2003     6
2004     7
2005    15
2006    13
2007     8
2008     7
2009    13
2010    11
2011     2
2012     2
2013     2
2016     1
2017     2
2018     1
2019     3
2020     2
2022     4
2023     2
dtype: int64

In [66]:
rows = []
cols = [col for col in kse1.columns if '202' in col]
for i, row in kse1.iterrows():
    for col in cols[3:]:
        if pd.isnull(row[col]):
            continue    
        else:
            record = {'imo': row.imo,
                      'start_date': f'01-{col[:-5]}-{col[-4:]}'}
            rows.append(record)

uninsured = pd.DataFrame(rows)
uninsured.start_date = pd.to_datetime(uninsured.start_date, 
                                      format='%d-%m-%Y', 
                                      dayfirst=True)

uninsured['end_date'] = pd.to_datetime(uninsured['start_date'], 
                                       format="%Y%m") + pd.tseries.offsets.MonthEnd(0)

# Create date ranges
uninsured = uninsured.groupby('imo').agg({'start_date': 'min', 
                                          'end_date': 'max'}).reset_index()

# And add sanction date to the uninsured dataframe

uninsured = pd.merge(uninsured, 
                     kse1[['imo', 'earliest_sanction_date', 'total_mln_barrels']], 
                     on='imo', 
                     how='left')


In [None]:
# Merge the dataframes on 'imo'
merged_df = pd.merge(selection[['imo', 'end_date_equasis']], uninsured, on='imo', how='left')

# Filter the merged dataframe
filtered_selection = merged_df[merged_df['start_date'] > merged_df['end_date_equasis']]

# Drop the columns from the uninsured dataframe to get the original structure of selection
#filtered_selection = filtered_selection[selection.columns]

# Display the filtered dataframe
filtered_selection

In [None]:
len(filtered_selection)