In [1]:
import pandas as pd
from pathlib import Path

PATH = Path.cwd().parent.joinpath('data')

In [2]:
# Import cleaned vessel data

vessels  = pd.read_excel(PATH.joinpath('processed', 'vessels_summary.xlsx'), sheet_name='vessel_check')

# Filter for vessels that have a final check in/out
selection = vessels[vessels['final check in/out']==True].copy()
selection[['start_date', 'end_date_equasis', 'end_date_checked']] = selection[['start_date', 'end_date_equasis', 'end_date_checked']].apply(pd.to_datetime)

In [None]:
# Import latest KSE data

kse1 = pd.read_excel(PATH.joinpath('processed', 'kse_shadowfleetv2.xlsx'), skiprows=1)
kse1.imo = kse1.imo.astype('int64')
kse1['tanker_type'] = 'oil tanker'
kse2 = pd.read_excel(PATH.joinpath('processed', 'kse_shadowfleetv2.xlsx'), sheet_name='Oli product.All', skiprows=4)
kse2['tanker_type'] = 'oil product tanker'
kse2.columns = kse1.columns


kse = pd.concat([kse1, kse2])
kse.dropna(subset=['imo'], inplace=True)
kse.imo = kse.imo.astype('int64')
kse.build = kse.build.astype('int64')
len(kse)

In [19]:
kse.to_csv(PATH.joinpath('processed', 'kse_shadowfleetv2.csv'), index=False)

## Get company flow

In [None]:
# Get imos from checked vessels

imos = selection.imo.unique()

# Import company data

companies = pd.read_excel(PATH.joinpath('processed', 'vessels_summary.xlsx'), sheet_name='companies')

# Filter relevant companies
companies = companies[(companies.imo.isin(imos)) & (companies.role=='Registered owner') & (companies.start_date > '2022-03-01')].copy()

companies.start_date = pd.to_datetime(companies.start_date)

# Merge with checked data data

df = pd.merge(companies, 
            selection[['imo', 'jurisdiction', 'ubo_jurisdiction', 'end_date_checked']],
            on='imo',
            how='left')

# Create columns with next jurisdiction

# Merge the dataframe with itself to get the next jurisdiction and next ubo_jurisdiction
df = pd.merge(df, df[['imo', 'start_date', 'jurisdiction', 'ubo_jurisdiction']], 
              left_on=['imo', 'end_date'], 
              right_on=['imo', 'start_date'], 
              suffixes=('', '_next'), 
              how='left')

# Fill the next_jurisdiction and next_ubo_jurisdiction with the country if there is no end_date match
df['next_jurisdiction'] = df['jurisdiction_next'].fillna(df['country'])
df['next_ubo_jurisdiction'] = df['ubo_jurisdiction_next'].fillna(df['country'])

# Drop the unnecessary columns
df.drop(columns=['start_date_next', 'jurisdiction_next', 'ubo_jurisdiction_next'], inplace=True)

df.head()

In [7]:
df[['ubo_jurisdiction', 'next_ubo_jurisdiction']].value_counts().reset_index().to_csv(PATH.joinpath('processed', 'ubo_jurisdiction_changes.csv'), index=False)

## Compare vessels with lloyds list data

In [8]:
lloyds = pd.read_csv(PATH.joinpath('processed', 'lloydslist_shadowfleet.csv'))

In [None]:
len(set(lloyds.IMO).difference(set(vessels.imo)))

In [None]:
kse.columns

In [None]:
kse[~kse.imo.isin(list(set(lloyds.IMO)))].groupby('build').size()

In [66]:
rows = []
cols = [col for col in kse1.columns if '202' in col]
for i, row in kse1.iterrows():
    for col in cols[3:]:
        if pd.isnull(row[col]):
            continue    
        else:
            record = {'imo': row.imo,
                      'start_date': f'01-{col[:-5]}-{col[-4:]}'}
            rows.append(record)

uninsured = pd.DataFrame(rows)
uninsured.start_date = pd.to_datetime(uninsured.start_date, 
                                      format='%d-%m-%Y', 
                                      dayfirst=True)

uninsured['end_date'] = pd.to_datetime(uninsured['start_date'], 
                                       format="%Y%m") + pd.tseries.offsets.MonthEnd(0)

# Create date ranges
uninsured = uninsured.groupby('imo').agg({'start_date': 'min', 
                                          'end_date': 'max'}).reset_index()

# And add sanction date to the uninsured dataframe

uninsured = pd.merge(uninsured, 
                     kse1[['imo', 'earliest_sanction_date', 'total_mln_barrels']], 
                     on='imo', 
                     how='left')


In [None]:
# Merge the dataframes on 'imo'
merged_df = pd.merge(selection[['imo', 'end_date_equasis']], uninsured, on='imo', how='left')

# Filter the merged dataframe
filtered_selection = merged_df[merged_df['start_date'] > merged_df['end_date_equasis']]

# Drop the columns from the uninsured dataframe to get the original structure of selection
#filtered_selection = filtered_selection[selection.columns]

# Display the filtered dataframe
filtered_selection

In [None]:
len(filtered_selection)