In [1]:
import pandas as pd
from pathlib import Path

PATH = Path.cwd().parent.joinpath('data')

### Create vessel table

In [119]:
vessels = pd.read_csv(PATH.joinpath('processed', 'kse_shadowfleet.csv'))
vessels.imo = vessels.imo.astype(int)

cols = ['imo', 'vessel_name', 'tanker_size', 'buildyear']

vessels = vessels[cols].copy()
vessels.drop_duplicates(subset='imo', inplace=True)
vessels.rename(columns={'imo': 'vessel_imo'}, inplace=True)
vessels.to_csv(PATH.joinpath('db', 'vessels.csv'), index=False)

In [None]:
vessels.head()

### Create vessel-company relationship table

In [64]:
com = pd.read_csv(PATH.joinpath('processed', 'company_vessels_final.csv'))

cols = ['imo', 'company_imo', 'role', 'start_date', 'end_date']

vessel_com = com[cols].copy()

vessel_com = vessel_com[vessel_com.company_imo.notna()].copy()
vessel_com.company_imo = vessel_com.company_imo.astype(int)
vessel_com.rename(columns={'imo': 'vessel_imo'}, inplace=True)

vessel_com.to_csv(PATH.joinpath('db', 'vessel_company.csv'), index=False)

In [None]:
vessel_com.role.value_counts()

### Create company table

In [67]:
com = pd.read_csv(PATH.joinpath('processed', 'company_vessels_final.csv'))

cols = ['company_imo', 'company', 'address', 'country']
companies = com[cols].copy()
companies = companies.drop_duplicates(subset='company_imo')
companies = companies[companies.company_imo.notna()].copy()
companies.company_imo = companies.company_imo.astype(int)
companies.rename(columns={'company': 'company_name', 'country': 'jurisdiction'}, inplace=True)
companies.to_csv(PATH.joinpath('db', 'companies.csv'), index=False)

### Create port visits table

In [72]:
port_visits = pd.read_csv(PATH.joinpath('processed', 'port_visits_2022.csv'))
cols = ['imo', 'port_visit_startAnchorage_id', 'start', 'end', 'port_visit_durationHrs', 'lat', 'lon']
port_visits = port_visits[cols].copy()
port_visits.rename(columns={'imo': 'vessel_imo', 'port_visit_startAnchorage_id': 'port_id', 'start': 'start_date',
                            'end': 'end_date', 'port_visit_durationHrs': 'duration_hrs'}, inplace=True)
port_visits.to_csv(PATH.joinpath('db', 'port_visits.csv'), index=False)

In [None]:
port_visits.columns

### Create loitering table

In [74]:
loitering = pd.read_csv(PATH.joinpath('processed', 'loitering_2022.csv'))

cols = ['imo', 'start', 'end', 'loitering_totalTimeHours', 'lat', 'lon']
loitering = loitering[cols].copy()
loitering.rename(columns={'imo': 'vessel_imo', 'loitering_totalTimeHours': 'duration_hrs', 'start': 'start_date',
                            'end': 'end_date'}, inplace=True)
loitering.to_csv(PATH.joinpath('db', 'loitering.csv'), index=False)

### Create ais gaps table

In [90]:
ais = pd.read_parquet(PATH.joinpath('processed', 'ais.parquet'))
ais.start = pd.to_datetime(ais.start).dt.tz_localize(None)
ais = ais[ais.start.dt.year >= 2022].copy()
ais.end = pd.to_datetime(ais.end).dt.tz_localize(None)

cols = ['imo', 'start', 'end', 'gap_durationhours','gap_offposition_lat', 'gap_offposition_lon', 'gap_onposition_lat', 'gap_onposition_lon']
ais = ais[cols].copy()
ais.rename(columns={'imo': 'vessel_imo', 'gap_durationhours': 'duration_hrs', 'gap_offposition_lat': 'lat_off',
                    'gap_offposition_lon': 'lon_off', 'gap_onposition_lat': 'lat_on', 'gap_onposition_lon': 'lon_on'}, inplace=True)
ais.to_csv(PATH.joinpath('db', 'ais.csv'), index=False)

### Create insurance table

In [92]:
ins = pd.read_csv(PATH.joinpath('processed', 'uninsured.csv'))
ins.drop('earliest_sanction_date', axis=1, inplace=True)
ins.rename(columns={'imo': 'vessel_imo'}, inplace=True)
ins.to_csv(PATH.joinpath('db', 'uninsured.csv'), index=False)

### Create sanctioned table

In [130]:
ins = pd.read_csv(PATH.joinpath('processed', 'kse_shadowfleet.csv'))
ins = ins[['imo', 'earliest_sanction_date', 'sanction_country']].copy()
ins.dropna(subset=['earliest_sanction_date'], inplace=True)
ins.rename(columns={'imo': 'vessel_imo'}, inplace=True)
ins.earliest_sanction_date = pd.to_datetime(ins.earliest_sanction_date, format='mixed', dayfirst=True)
ins.to_csv(PATH.joinpath('db', 'sanctions.csv'), index=False)

In [None]:
ins.head()

### Create name changes table

In [102]:
names = pd.read_csv(PATH.joinpath('processed', 'owners_names.csv'))
cols = ['imo', 'vessel_name', 'start_date', 'end_date']
names = names[cols].copy()
names.rename(columns={'imo': 'vessel_imo'}, inplace=True)
names.to_csv(PATH.joinpath('db', 'vessel_names.csv'), index=False)

### Create vessel flag table

In [106]:
flags = pd.read_csv(PATH.joinpath('processed', 'owners_flags.csv'))
cols = ['imo', 'flag', 'start_date', 'end_date']
flags = flags[cols].copy()
flags.rename(columns={'imo': 'vessel_imo'}, inplace=True)
flags.to_csv(PATH.joinpath('db', 'vessel_flags.csv'), index=False)

### Create inspections table

In [112]:
inspections = pd.read_csv(PATH.joinpath('processed', 'owners_inspections.csv'))
inspections.drop(['duration', 'PSC_organisation'], axis=1, inplace=True)
inspections.rename(columns={'imo': 'vessel_imo', 'authority': 'country'}, inplace=True)
cols = ['vessel_imo', 'country', 'port', 'date', 'inspection_type', 'number_of_deficiencies', 'detention']
inspections = inspections[cols].copy()
inspections.number_of_deficiencies = inspections.number_of_deficiencies.fillna(0).astype(int)
inspections.to_csv(PATH.joinpath('db', 'vessel_inspections.csv'), index=False)

### Create MMSI table

In [117]:
mmsi = pd.read_csv(PATH.joinpath('processed', 'shadowfleet_vessels.csv'))
cols = ['imo', 'ssvid', 'start_date', 'end_date']
mmsi = mmsi[cols].copy()
mmsi.rename(columns={'imo': 'vessel_imo', 'ssvid': 'mmsi'}, inplace=True)
mmsi.to_csv(PATH.joinpath('db', 'vessel_mmsi.csv'), index=False)