In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [9]:
full_data_path = 'c:/Users/Abhedaya/CO2Prediction/Datasets/Norbulk_2025.xlsx'
df = pd.read_excel(full_data_path)

In [10]:
df = df[df['SHIPTYPEID_CII'].notna()]
df['SHIPTYPEID_CII'] = df['SHIPTYPEID_CII'].astype(float).astype(int)
ship_types = df['SHIPTYPEID_CII'].unique()

In [11]:
fuel_columns = [
    'ME_MDO/MGO', 'ME_HFO', 'ME_LFO',
    'AE_Boiler_MDO/MGO', 'AE_Boiler_HFO', 'AE_Boiler_LFO'
]

# Apply encoding: value > 0 → 1, else 0
df[fuel_columns] = df[fuel_columns].gt(0).astype(int)

# Optional: Check result
print(df[fuel_columns].head(30))

    ME_MDO/MGO  ME_HFO  ME_LFO  AE_Boiler_MDO/MGO  AE_Boiler_HFO  \
0            0       0       0                  1              0   
1            0       0       0                  1              0   
2            0       0       0                  1              0   
3            1       0       0                  1              0   
4            0       0       0                  1              0   
5            0       0       0                  1              0   
6            0       0       0                  1              0   
7            0       0       0                  1              0   
8            0       0       0                  1              0   
9            0       0       0                  1              0   
10           0       0       0                  1              0   
11           0       0       0                  1              0   
12           0       0       0                  1              0   
13           0       0       0                  

In [12]:
output_dir = 'c:/Users/Abhedaya/CO2Prediction/Data_2025'
os.makedirs(output_dir, exist_ok=True)

In [13]:
ship_type_dfs = {}
for ship_type in ship_types:
    ship_df = df[df['SHIPTYPEID_CII'] == ship_type]
    ship_type_dfs[ship_type] = ship_df
    output_file = os.path.join(output_dir, f'ShipType{ship_type}.xlsx')
    ship_df.to_excel(output_file, index=False)
    print(f"Saved Ship Type {ship_type}: {len(ship_df)} records → {output_file}")

Saved Ship Type 6: 591 records → c:/Users/Abhedaya/CO2Prediction/Data_2025\ShipType6.xlsx
Saved Ship Type 4: 1580 records → c:/Users/Abhedaya/CO2Prediction/Data_2025\ShipType4.xlsx
Saved Ship Type 2: 623 records → c:/Users/Abhedaya/CO2Prediction/Data_2025\ShipType2.xlsx
Saved Ship Type 3: 3142 records → c:/Users/Abhedaya/CO2Prediction/Data_2025\ShipType3.xlsx
Saved Ship Type 1: 1862 records → c:/Users/Abhedaya/CO2Prediction/Data_2025\ShipType1.xlsx
Saved Ship Type 9: 109 records → c:/Users/Abhedaya/CO2Prediction/Data_2025\ShipType9.xlsx


In [14]:
print("\n--- Verification after saving ---")
for ship_type in ship_types:
    file_path = os.path.join(output_dir, f'ShipType{ship_type}.xlsx')
    loaded_df = pd.read_excel(file_path)
    print(f"Ship Type {ship_type}: {len(loaded_df)} records")


--- Verification after saving ---
Ship Type 6: 591 records
Ship Type 4: 1580 records
Ship Type 2: 623 records
Ship Type 3: 3142 records
Ship Type 1: 1862 records
Ship Type 9: 109 records
