In [None]:
# import packages for data exploration and cleaning
import pandas as pd

In [None]:
# read csv file from local drive
ev_df = pd.read_csv('./Documents/DataAnalytics/ElectricCarData_Norm.csv')

# view dataframe
ev_df

In [None]:
# checking for null values for each column
ev_df.isnull().sum()

In [None]:
# looking for the rows with null values
rows_with_nulls = ev_df[ev_df.isna().any(axis=1)]
rows_with_nulls

# the reason these values are nulls are because 'Rapid charging is not possible'

In [None]:
# replace null values with 0 km/h instead of removing rows
ev_df.fillna('0 km/h', inplace=True)

# checking to see if null values were replaced successfully
updated_values = ev_df[ev_df['RapidCharge'] == 'Rapid charging not possible']
updated_values

In [None]:
# checking for duplicate data
ev_df.duplicated().sum()

In [None]:
# renaming multiple columns to include what unit is measured
ev_df = ev_df.rename(columns = {'Accel':'Accel_Sec', 'TopSpeed':'TopSpeed_KmH', 'Range':'Range_Km', 'Efficiency':'Efficiency_WhKm', 
                                'FastCharge':'FastCharge_KmH'})

# checking to see headers are updated successfully
ev_df.head()

In [None]:
# removing measurements in the data points for Accel_Sec, TopSpeed_KmH, Range_Km, Efficiency_WhKm, and FastCharge_KmH
# specify columns and replacements
columns_to_replace = ['Accel_Sec', 'TopSpeed_KmH', 'Range_Km', 'Efficiency_WhKm', 'FastCharge_KmH']
replacements = [' sec', ' km/h', ' km', ' Wh/km', ' km/h']

# apply replacements using a loop
for column, replacement in zip(columns_to_replace, replacements):
    ev_df[column] = ev_df[column].str.replace(replacement, '')

In [None]:
# 1/2 - changing RapidCharge values to Yes or No for simplicity

# checking all possible values for RapidCharge
ev_df['RapidCharge'].value_counts()

In [None]:
# 2/2 - changing RapidCharge values to Yes or No for simplicity
# define the mapping for replacement
replacement_mapping = {'Rapid charging possible': 'Yes', 'Rapid charging not possible': 'No'}

# use the replace method for multiple replacements
ev_df['RapidCharge'] = ev_df['RapidCharge'].replace(replacement_mapping)

In [None]:
# check data types
ev_df.dtypes

In [None]:
# change data types from object to integer for columns Accel_Sec, TopSpeed_KmH, Range_Km, Efficiency_WhKm, and FastCharge_KmH
ev_df = ev_df.astype({'Accel_Sec':'float', 'TopSpeed_KmH':'int', 'Range_Km':'int', 'Efficiency_WhKm':'int','FastCharge_KmH':'int'})

In [None]:
# verify data types have changed correctly
ev_df.dtypes

In [None]:
# save cleaned dataset as new csv file in the same folder (ready to be used for data analysis)
ev_df.to_csv('./Documents/DataAnalytics/ElectricCarData_Clean.csv', index=False)