write a function to load your dataset, preprocess it
(e.g., handle missing values, scale features), and split it into training and testing sets.

In [29]:
import os
import pandas as pandas
import numpy as np

#### I. Load the dataset

In [30]:
def load_data(file_path):
    """
    Load data from a CSV file.
    
    Parameters:
    file_path (str): Path to the CSV file.
    
    Returns:
    pandas.DataFrame: Loaded data as a DataFrame.
    """
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"The file {file_path} does not exist.")
    
    return pandas.read_csv(file_path)

In [None]:
src_dir = os.getcwd()
root_dir = os.path.dirname(src_dir)  
data_dir = os.path.join(root_dir, 'data')  
bronze_dir = os.path.join(data_dir, 'bronze')  
silver_dir = os.path.join(data_dir, 'silver')  
data_file = os.path.join(bronze_dir, 'electric_vehicles_spec_2025.csv.csv')  

In [32]:
electric_vehicles = load_data(data_file)

In [33]:
len(electric_vehicles)

478

In [34]:
electric_vehicles.head(10)

Unnamed: 0,brand,model,top_speed_kmh,battery_capacity_kWh,battery_type,number_of_cells,torque_nm,efficiency_wh_per_km,range_km,acceleration_0_100_s,...,towing_capacity_kg,cargo_volume_l,seats,drivetrain,segment,length_mm,width_mm,height_mm,car_body_type,source_url
0,Abarth,500e Convertible,155,37.8,Lithium-ion,192.0,235.0,156,225,7.0,...,0.0,185,4,FWD,B - Compact,3673,1683,1518,Hatchback,https://ev-database.org/car/1904/Abarth-500e-C...
1,Abarth,500e Hatchback,155,37.8,Lithium-ion,192.0,235.0,149,225,7.0,...,0.0,185,4,FWD,B - Compact,3673,1683,1518,Hatchback,https://ev-database.org/car/1903/Abarth-500e-H...
2,Abarth,600e Scorpionissima,200,50.8,Lithium-ion,102.0,345.0,158,280,5.9,...,0.0,360,5,FWD,JB - Compact,4187,1779,1557,SUV,https://ev-database.org/car/3057/Abarth-600e-S...
3,Abarth,600e Turismo,200,50.8,Lithium-ion,102.0,345.0,158,280,6.2,...,0.0,360,5,FWD,JB - Compact,4187,1779,1557,SUV,https://ev-database.org/car/3056/Abarth-600e-T...
4,Aiways,U5,150,60.0,Lithium-ion,,310.0,156,315,7.5,...,,496,5,FWD,JC - Medium,4680,1865,1700,SUV,https://ev-database.org/car/1678/Aiways-U5
5,Aiways,U6,160,60.0,Lithium-ion,,315.0,150,350,7.0,...,,472,5,FWD,JC - Medium,4805,1880,1641,SUV,https://ev-database.org/car/1766/Aiways-U6
6,Alfa,Romeo Junior Elettrica 54 kWh,150,50.8,Lithium-ion,102.0,260.0,128,320,9.0,...,0.0,400,5,FWD,JB - Compact,4173,1781,1532,SUV,https://ev-database.org/car/2184/Alfa-Romeo-Ju...
7,Alfa,Romeo Junior Elettrica 54 kWh Veloce,200,50.8,Lithium-ion,102.0,345.0,164,310,6.0,...,0.0,400,5,FWD,JB - Compact,4173,1781,1505,SUV,https://ev-database.org/car/2185/Alfa-Romeo-Ju...
8,Alpine,A290 Electric 180 hp,160,52.0,Lithium-ion,184.0,285.0,138,310,7.4,...,500.0,326,5,FWD,B - Compact,3997,1823,1512,Hatchback,https://ev-database.org/car/2268/Alpine-A290-E...
9,Alpine,A290 Electric 220 hp,170,52.0,Lithium-ion,184.0,300.0,144,305,6.4,...,500.0,326,5,FWD,B - Compact,3997,1823,1512,Hatchback,https://ev-database.org/car/2269/Alpine-A290-E...


#### II. Check for null values

In [35]:
electric_vehicles.isna().sum()

brand                          0
model                          1
top_speed_kmh                  0
battery_capacity_kWh           0
battery_type                   0
number_of_cells              202
torque_nm                      7
efficiency_wh_per_km           0
range_km                       0
acceleration_0_100_s           0
fast_charging_power_kw_dc      1
fast_charge_port               1
towing_capacity_kg            26
cargo_volume_l                 1
seats                          0
drivetrain                     0
segment                        0
length_mm                      0
width_mm                       0
height_mm                      0
car_body_type                  0
source_url                     0
dtype: int64

##### A. Model

Drop null model

In [36]:
electric_vehicles[electric_vehicles['model'].isna()]

Unnamed: 0,brand,model,top_speed_kmh,battery_capacity_kWh,battery_type,number_of_cells,torque_nm,efficiency_wh_per_km,range_km,acceleration_0_100_s,...,towing_capacity_kg,cargo_volume_l,seats,drivetrain,segment,length_mm,width_mm,height_mm,car_body_type,source_url
477,firefly,,150,41.2,Lithium-ion,112.0,200.0,125,250,8.1,...,0.0,404,5,RWD,B - Compact,4003,1885,1557,Hatchback,https://ev-database.org/car/3178/firefly-firefly


In [37]:
electric_vehicles = electric_vehicles.dropna(subset=['model'])

##### B. Number of Cells


A large chunk of the dataset has no number_of_cells information, hence we drop this column altogether

In [38]:
electric_vehicles = electric_vehicles.drop(columns=['number_of_cells'])

##### C. Fast Charging

For the missing data related to fast_charging_port and fast_charging_power_kw_dc, since there are minimal occurences and this feature is kind of the main point of EVs, additional webscraping and research was performed

In [39]:
electric_vehicles[electric_vehicles['fast_charging_power_kw_dc'].isna()]

Unnamed: 0,brand,model,top_speed_kmh,battery_capacity_kWh,battery_type,torque_nm,efficiency_wh_per_km,range_km,acceleration_0_100_s,fast_charging_power_kw_dc,...,towing_capacity_kg,cargo_volume_l,seats,drivetrain,segment,length_mm,width_mm,height_mm,car_body_type,source_url
356,Renault,5 E-Tech 40kWh 95hp,130,40.0,Lithium-ion,215.0,129,255,12.0,,...,500.0,326,5,FWD,B - Compact,3922,1808,1489,Hatchback,https://ev-database.org/car/2133/Renault-5-E-T...


In [40]:
electric_vehicles.loc[electric_vehicles['fast_charging_power_kw_dc'].isna(), 'fast_charging_power_kw_dc'] = 80
electric_vehicles.loc[electric_vehicles['fast_charge_port'].isna(), 'fast_charge_port'] = 'CCS'

##### D. Towing Capacity

In [41]:
brand_means = electric_vehicles.groupby('brand').agg({
    'towing_capacity_kg': 'mean'
})

In [42]:
brand_means.fillna(0, inplace=True)

In [43]:
brand_means_dict = brand_means['towing_capacity_kg'].to_dict()

In [44]:
electric_vehicles['towing_capacity_kg'] = electric_vehicles['towing_capacity_kg'].fillna(
    electric_vehicles['brand'].map(brand_means_dict)
)

##### E. Cargo Volume

In [None]:
electric_vehicles[electric_vehicles['cargo_volume_l'].isna()]

Unnamed: 0,brand,model,top_speed_kmh,battery_capacity_kWh,battery_type,torque_nm,efficiency_wh_per_km,range_km,acceleration_0_100_s,fast_charging_power_kw_dc,...,towing_capacity_kg,cargo_volume_l,seats,drivetrain,segment,length_mm,width_mm,height_mm,car_body_type,source_url
128,Ford,e-Tourneo Courier,145,43.6,Lithium-ion,290.0,164,200,11.0,80.0,...,750.0,,5,FWD,N - Passenger Van,4337,1876,1817,Small Passenger Van,https://ev-database.org/car/3166/Ford-e-Tourne...


In [49]:
electric_vehicles.loc[ electric_vehicles['brand']=='Ford', 'cargo_volume_l'] = 571

In [52]:
electric_vehicles.to_csv(os.path.join(bronze_dir, 'electric_vehicles_preprocessed.csv'), index=False)