In [1]:
# imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools
from tqdm.auto import tqdm

from geopy.geocoders import Nominatim
from geopy import distance

import ipywidgets as widgets

import warnings
warnings.filterwarnings('ignore')

np.random.seed(42)

In [2]:
# Reference for approx. road distance calculation: https://pubmed.ncbi.nlm.nih.gov/12609652/#:~:text=There%20was%20a%20strong%20linear,%3D%201.3%20(air%20miles).
def distance_calculator(source: np.ndarray, destination: np.ndarray, mode: str) -> list:
    """Calculate geographical euclidean distance (air) and approx. road distance.
        
    Args:
        source (np.ndarray): numpy array containing source locations
        destination (np.ndarray): numpy array containing source locations
        mode (str): mode of transportation [required for finding distance in that mode]
            
    Returns:
        (list): list of calculated distances
    """
    
    geocoder = Nominatim(user_agent="geobot-splchn")

    dist_list = []

    loc1 = source.tolist()
    loc2 = destination.tolist()

    for i in tqdm(range(len(loc1))):
        geo1 = geocoder.geocode(loc1[i])
        geo2 = geocoder.geocode(loc2[i])

        lat1, lon1 = geo1.latitude, geo1.longitude
        lat2, lon2 = geo2.latitude, geo2.longitude

        coord1 = (lat1, lon1)
        coord2 = (lat2, lon2)

        if mode.lower() == "air":
            dist_in_km = float(str(distance.distance(coord1, coord2)).replace('km', ''))
            dist_list.append(round(dist_in_km, 2))

        elif mode.lower() == "road":
            air_dist_in_mile = 0.621371*float(str(distance.distance(coord1, coord2)).replace('km', ''))
            ground_dist_in_mile = 0.94 + 1.25*(air_dist_in_mile)
            ground_dist_in_km = 1.60934*ground_dist_in_mile
            dist_list.append(round(ground_dist_in_km, 2))
            
        elif mode.lower() == "rail":
            air_dist_in_mile = 0.621371*float(str(distance.distance(coord1, coord2)).replace('km', ''))
            ground_dist_in_mile = 0.63 + 1.04*(air_dist_in_mile)
            ground_dist_in_km = 1.60934*ground_dist_in_mile
            dist_list.append(round(ground_dist_in_km, 2))
            
    return dist_list

In [3]:
# trucks data
# Reference: https://environment.data.gov.uk/dataset/6012d166-1f4d-4cda-af20-6e5b5759a466
trucks = pd.read_excel("../data/Reference_data/Open Data - Commercial Fleet Holding (08Mar16).xlsx")

# aircrafts data
# Reference: https://www.kaggle.com/datasets/thedevastator/global-air-transportation-network-mapping-the-wo
aircrafts = pd.read_csv("../data/Reference_data/airplanes.csv")

# rails data
# Reference: https://en.wikipedia.org
rail_diesel = pd.read_html('https://en.wikipedia.org/wiki/List_of_diesel_locomotives_of_India')
rail_elec = pd.read_html("https://en.wikipedia.org/wiki/List_of_electric_locomotives_of_India")

# supply chain data
# Reference: https://www.kaggle.com/datasets/harshsingh2209/supply-chain-analysis
supply_chain_data = pd.read_csv("../data/Reference_data/supply_chain_data.csv")

In [4]:
########## Road ##########
# feature selection
final_trucks_data = trucks[['VEHICLE_TYPE']]

# Removing duplicates
final_trucks_data.drop_duplicates(inplace=True)
final_trucks_data.reset_index(inplace=True, drop=True)
final_trucks_data.dropna(inplace=True)

trucks_df = final_trucks_data[final_trucks_data.isin(['CAR', 'CAR DERIVED VAN', 'SMALL VAN', 'VAN', 'PANEL VAN', '4x4',
       'MEDIUM VAN', 'LARGE VAN', 'TIPPER', 'PICK-UP', 'DROPSIDE LORRY',
       'MOTOR CARAVAN', 'VAN/SIDE WINDOWS', 'ELECTRIC',
       'LIGHT 4*4 UTILITY', 'ESTATE', '4WD PICK-UP', 'PICK- UP',
       'TIPPER (HGV)', 'HGV >3.5 <=7.5T', 'HGV FLAT LORRY', 'HGV',
       'HGV DROPSIDE LORRY', 'BEAVER TAIL', 'HGV LOADER',
       'HGV > 18 TONNES'])].dropna()

In [5]:
########## Air ##########
# Additional features for maintain structure
aircrafts["VEHICLE_TYPE"] = "CARGO PLANE"
# Dropping unnecessary columns
aircrafts_df = aircrafts.drop(["index", "Name", "IATA code", "ICAO code"], axis=1)
aircrafts_df.drop_duplicates(inplace=True)
aircrafts_df.reset_index(inplace=True, drop=True)
aircrafts_df.dropna(inplace=True)

In [6]:
########## Rail ##########
# Diesel rail types
rail_diesel[0]['VEHICLE_TYPE'] = 'MIXED RAIL'
rail_diesel[2]['VEHICLE_TYPE'] = 'GOODS RAIL'
rail_diesel[4]['VEHICLE_TYPE'] = 'MIXED RAIL'
rail_diesel[5]['VEHICLE_TYPE'] = 'MIXED RAIL'
rail_diesel[6]['VEHICLE_TYPE'] = 'MIXED RAIL'

# 0, 2, 4, 5, 6
rail_diesel_df = pd.concat([rail_diesel[0], rail_diesel[2], rail_diesel[4], rail_diesel[5], rail_diesel[6]], axis=0).reset_index(drop=True)
rail_diesel_final = rail_diesel_df[rail_diesel_df['Current Status'].map(lambda x:x.lower()) == 'in service']

# Electric rail types
rail_elec[0]['VEHICLE_TYPE'] = 'MIXED RAIL'
rail_elec[2]['VEHICLE_TYPE'] = 'GOODS RAIL'
rail_elec[3]['VEHICLE_TYPE'] = 'MIXED RAIL'
rail_elec[6]['VEHICLE_TYPE'] = 'MIXED RAIL'
rail_elec[7]['VEHICLE_TYPE'] = 'GOODS RAIL'

# 0, 2, 3, 6, 7
rail_elec_df = pd.concat([rail_elec[0], rail_elec[2], rail_elec[3], rail_elec[6], rail_elec[7]], axis=0).reset_index(drop=True)
rail_elec_final = rail_elec_df[rail_elec_df['Current Status'].map(lambda x:x.lower()) == 'in service']

# Concatenate and feature selection
rail_final = pd.concat([rail_diesel_final, rail_elec_final], axis=0).reset_index(drop=True)
rails_df = rail_final[['VEHICLE_TYPE']]
rails_df.drop_duplicates(inplace=True)
rails_df.reset_index(inplace=True, drop=True)
rails_df.dropna(inplace=True)

Reference: [Truck CO2 Emissions Per Km Calculator: Find Semi Truck Carbon Footprint (8billiontrees.com)](https://8billiontrees.com/carbon-offsets-credits/carbon-ecological-footprint-calculators/truck-co2-emissions-per-km-calculator/#:~:text=A%20study%20done%20in%202021,vary%20to%20a%20greater%20degree%3A&text=Urban%20delivery%20truck%20CO2,emissions%20%E2%80%93%2057g%20per%20t%2Dkm)

| Mode of Transportation | CO2 Emitted (g/km) |
| :--------------------- | -----------------: |
| Train                  |  65                |
| Truck                  | 105                |
| Air Cargo              | 500                |

In [7]:
train_CO2 = np.random.normal(65, 15, size=rails_df.shape[0]) + np.random.uniform(20, 40)
trucks_CO2 = np.random.normal(105, 34, size=trucks_df.shape[0]) + np.random.uniform(50, 100)
air_cargo_CO2 = np.random.normal(500, 10, size=aircrafts_df.shape[0]) + np.random.uniform(50, 100)

trucks_df['CO2e (g/km)'] = trucks_CO2
rails_df['CO2e (g/km)'] = train_CO2
aircrafts_df['CO2e (g/km)'] = air_cargo_CO2

In [8]:
# Feature selection
supply_chain_data_2 = supply_chain_data[supply_chain_data["Transportation modes"]!="Sea"]
supply_chain_df = supply_chain_data_2[['Order quantities', 'Location', 'Transportation modes']]

supply_chain = supply_chain_df.replace('\n', '')

In [9]:
# Number of entries for each transportation mode
road_nums = supply_chain[supply_chain['Transportation modes'] == 'Road'].shape[0]
air_nums = supply_chain[supply_chain['Transportation modes'] == 'Air'].shape[0]
rail_nums = supply_chain[supply_chain['Transportation modes'] == 'Rail'].shape[0]

# Complete road data
road_list = []
for i in range(road_nums):
    road_list.append(trucks_df.sample())
    
road_vehicle = pd.concat(road_list, axis=0).reset_index()
road_data = pd.concat([supply_chain[supply_chain['Transportation modes'] == 'Road'].reset_index(), road_vehicle], axis=1)

# Complete rail data
rail_list = []
for i in range(rail_nums):
    rail_list.append(rails_df.sample())
    
rail_vehicle = pd.concat(rail_list, axis=0).reset_index()
rail_data = pd.concat([supply_chain[supply_chain['Transportation modes'] == 'Rail'].reset_index(), rail_vehicle], axis=1)

# Complete air data
air_list = []
for i in range(air_nums):
    air_list.append(aircrafts_df.sample())
    
air_vehicle = pd.concat(air_list, axis=0).reset_index()
air_data = pd.concat([supply_chain[supply_chain['Transportation modes'] == 'Air'].reset_index(), air_vehicle], axis=1)

In [10]:
# Final concatenation
final_data = pd.concat([road_data, rail_data, air_data], axis=0).sample(frac=1).drop('index', axis=1).reset_index(drop=True)

final_data.columns = final_data.columns.map(lambda x: x.capitalize())

final_data.dropna(inplace=True)
final_data.reset_index(inplace=True, drop=True)

In [11]:
vehicle_master = final_data[['Transportation modes', 'Vehicle_type', 'Co2e (g/km)']].drop_duplicates().sort_values(by='Transportation modes')
vehicle_master['Count'] = 10000
vehicle_master['Capacity (metric tons)'] = [100, 5000, 8000, 24, 18, 12, 10, 36, 2, 7, 11, 3, 9, 17, 15, 8, 13, 4, 2]
vehicle_master_data = vehicle_master[['Transportation modes', 'Vehicle_type', 'Capacity (metric tons)', 'Co2e (g/km)', 'Count']].reset_index(drop=True)

In [12]:
loc_list = supply_chain_data['Location'].unique().tolist()

permutations = [' - '.join(permutation) for permutation in itertools.permutations(loc_list, 2)]

loc_list = np.array(permutations).astype('str')
mappings = np.array(np.char.split(loc_list, sep=" - ").tolist())
distance_master_data = pd.DataFrame(mappings, columns=['Source', 'Destination'])
distance_master_data['Air Distance (km)'] = distance_calculator(mappings[:, 0], mappings[:, 1], "Air")
distance_master_data['Rail Distance (km)'] = distance_calculator(mappings[:, 0], mappings[:, 1], "Rail")
distance_master_data['Road Distance (km)'] = distance_calculator(mappings[:, 0], mappings[:, 1], "Road")

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

In [13]:
vehicle_master_data.to_csv("../data/Master_data/vehicle_master_data.csv", index=False)
distance_master_data.to_csv("../data/Master_data/distance_master_data.csv", index=False)

In [14]:
vehicle_master_data

Unnamed: 0,Transportation modes,Vehicle_type,Capacity (metric tons),Co2e (g/km),Count
0,Air,CARGO PLANE,100,551.584454,10000
1,Rail,GOODS RAIL,5000,97.565914,10000
2,Rail,MIXED RAIL,8000,107.090591,10000
3,Road,HGV DROPSIDE LORRY,24,156.598994,10000
4,Road,4WD PICK-UP,18,155.605476,10000
5,Road,VAN,12,211.365144,10000
6,Road,PICK-UP,10,209.320194,10000
7,Road,HGV > 18 TONNES,36,155.398594,10000
8,Road,CAR DERIVED VAN,2,187.850299,10000
9,Road,ELECTRIC,7,151.765792,10000


In [15]:
distance_master_data

Unnamed: 0,Source,Destination,Air Distance (km),Rail Distance (km),Road Distance (km)
0,Mumbai,Kolkata,1655.79,1723.03,2071.24
1,Mumbai,Delhi,1143.52,1190.28,1430.91
2,Mumbai,Bangalore,842.28,876.98,1054.36
3,Mumbai,Chennai,1031.41,1073.68,1290.77
4,Kolkata,Mumbai,1655.79,1723.03,2071.24
5,Kolkata,Delhi,1308.14,1361.48,1636.69
6,Kolkata,Bangalore,1558.47,1621.81,1949.59
7,Kolkata,Chennai,1355.3,1410.52,1695.63
8,Delhi,Mumbai,1143.52,1190.28,1430.91
9,Delhi,Kolkata,1308.14,1361.48,1636.69
