In [256]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import warnings
warnings.filterwarnings("ignore")

In [155]:
# view the data
df = pd.read_csv('ElectricCarData_Clean.csv')
df.head()

Unnamed: 0,Brand,Model,AccelSec,TopSpeed_KmH,Range_Km,Efficiency_WhKm,FastCharge_KmH,RapidCharge,PowerTrain,PlugType,BodyStyle,Segment,Seats,PriceEuro
0,Tesla,Model 3 Long Range Dual Motor,4.6,233,450,161,940,Yes,AWD,Type 2 CCS,Sedan,D,5,55480
1,Volkswagen,ID.3 Pure,10.0,160,270,167,250,Yes,RWD,Type 2 CCS,Hatchback,C,5,30000
2,Polestar,2,4.7,210,400,181,620,Yes,AWD,Type 2 CCS,Liftback,D,5,56440
3,BMW,iX3,6.8,180,360,206,560,Yes,RWD,Type 2 CCS,SUV,D,5,68040
4,Honda,e,9.5,145,170,168,190,Yes,RWD,Type 2 CCS,Hatchback,B,4,32997


In [156]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103 entries, 0 to 102
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Brand            103 non-null    object 
 1   Model            103 non-null    object 
 2   AccelSec         103 non-null    float64
 3   TopSpeed_KmH     103 non-null    int64  
 4   Range_Km         103 non-null    int64  
 5   Efficiency_WhKm  103 non-null    int64  
 6   FastCharge_KmH   103 non-null    object 
 7   RapidCharge      103 non-null    object 
 8   PowerTrain       103 non-null    object 
 9   PlugType         103 non-null    object 
 10  BodyStyle        103 non-null    object 
 11  Segment          103 non-null    object 
 12  Seats            103 non-null    int64  
 13  PriceEuro        103 non-null    int64  
dtypes: float64(1), int64(5), object(8)
memory usage: 11.4+ KB


In [157]:
df.describe()

Unnamed: 0,AccelSec,TopSpeed_KmH,Range_Km,Efficiency_WhKm,Seats,PriceEuro
count,103.0,103.0,103.0,103.0,103.0,103.0
mean,7.396117,179.194175,338.786408,189.165049,4.883495,55811.563107
std,3.01743,43.57303,126.014444,29.566839,0.795834,34134.66528
min,2.1,123.0,95.0,104.0,2.0,20129.0
25%,5.1,150.0,250.0,168.0,5.0,34429.5
50%,7.3,160.0,340.0,180.0,5.0,45000.0
75%,9.0,200.0,400.0,203.0,5.0,65000.0
max,22.4,410.0,970.0,273.0,7.0,215000.0


In [158]:
# Viewing the different values of items present in the categorical columns
for cols in df.select_dtypes('object'):
  print(f"{cols} has unique values = {df[cols].unique()}")
  print("\n")
  # print(cols)

Brand has unique values = ['Tesla ' 'Volkswagen ' 'Polestar ' 'BMW ' 'Honda ' 'Lucid ' 'Peugeot '
 'Audi ' 'Mercedes ' 'Nissan ' 'Hyundai ' 'Porsche ' 'MG ' 'Mini ' 'Opel '
 'Skoda ' 'Volvo ' 'Kia ' 'Renault ' 'Mazda ' 'Lexus ' 'CUPRA ' 'SEAT '
 'Lightyear ' 'Aiways ' 'DS ' 'Citroen ' 'Jaguar ' 'Ford ' 'Byton '
 'Sono ' 'Smart ' 'Fiat ']


Model has unique values = ['Model 3 Long Range Dual Motor' 'ID.3 Pure' '2' 'iX3 ' 'e ' 'Air '
 'e-Golf ' 'e-208 ' 'Model 3 Standard Range Plus' 'Q4 e-tron '
 'EQC 400 4MATIC' 'Leaf ' 'Kona Electric 64 kWh' 'i4 ' 'IONIQ Electric'
 'ID.3 Pro S' 'Taycan Turbo S' 'e-Up! ' 'ZS EV' 'Cooper SE ' 'Corsa-e '
 'Model Y Long Range Dual Motor' 'Enyaq iV 50' 'e-tron GT '
 'Model 3 Long Range Performance' 'ID.4 ' 'ID.3 Pro'
 'XC40 P8 AWD Recharge' 'i3 120 Ah' 'e-2008 SUV ' 'e-tron 50 quattro'
 'e-Niro 64 kWh' 'Zoe ZE50 R110' 'Cybertruck Tri Motor' 'MX-30 ' 'Leaf e+'
 'UX 300e' 'el-Born ' 'Zoe ZE50 R135' 'EQA ' 'Model S Long Range'
 'Kona Electric 39 kWh' 'e-tron S

In [159]:
# Stripping of the extra trailing space from the Brand category
df.Brand = df.Brand.str.strip()

In [160]:
Segment_map = df.groupby('Segment').agg({'PriceEuro':'count'}).reset_index()

In [161]:
BodyStyle_map = (df.groupby('BodyStyle').agg({'PriceEuro':'mean'})//1000).reset_index()

In [162]:
Brand_map = (df.groupby('Brand').agg({'PriceEuro':'mean'})//100).reset_index()

In [163]:
df.PlugType = df.PlugType.map({'Type 1 CHAdeMO':1, 'Type 2 CHAdeMO':2, 'Type 2':3, 'Type 2 CCS':4})

In [164]:
# Since FastCharge_KmH has anonymous value - replacing that with the mean and checking the other values
df[df.FastCharge_KmH=="-"]

Unnamed: 0,Brand,Model,AccelSec,TopSpeed_KmH,Range_Km,Efficiency_WhKm,FastCharge_KmH,RapidCharge,PowerTrain,PlugType,BodyStyle,Segment,Seats,PriceEuro
57,Renault,Twingo ZE,12.6,135,130,164,-,No,RWD,3,Hatchback,A,4,24790
68,Renault,Kangoo Maxi ZE 33,22.4,130,160,194,-,No,FWD,3,SPV,N,5,38000
77,Smart,EQ forfour,12.7,130,95,176,-,No,RWD,3,Hatchback,A,4,22030
82,Smart,EQ fortwo coupe,11.6,130,100,167,-,No,RWD,3,Hatchback,A,2,21387
91,Smart,EQ fortwo cabrio,11.9,130,95,176,-,No,RWD,3,Cabrio,A,2,24565


There are two Brands that have the anomaly. 'Renault' and "Smart" Checking for the Renault Brand

In [165]:
df[df.Brand.isin(['Renault'])]

Unnamed: 0,Brand,Model,AccelSec,TopSpeed_KmH,Range_Km,Efficiency_WhKm,FastCharge_KmH,RapidCharge,PowerTrain,PlugType,BodyStyle,Segment,Seats,PriceEuro
32,Renault,Zoe ZE50 R110,11.4,135,315,165,230,Yes,FWD,4,Hatchback,B,5,31184
38,Renault,Zoe ZE50 R135,9.5,140,310,168,230,Yes,FWD,4,Hatchback,B,5,33133
57,Renault,Twingo ZE,12.6,135,130,164,-,No,RWD,3,Hatchback,A,4,24790
60,Renault,Zoe ZE40 R110,11.4,135,255,161,230,Yes,FWD,4,Hatchback,B,5,29234
68,Renault,Kangoo Maxi ZE 33,22.4,130,160,194,-,No,FWD,3,SPV,N,5,38000


In [166]:
# Get the row index of the Brand = Renault
renault_idx = df[(df.Brand.isin(['Renault']))].index

In [167]:
# Replacing the FastCharge_KmH with the mode
df.loc[renault_idx,'FastCharge_KmH'] = df[df.Brand.isin(['Renault'])].FastCharge_KmH.value_counts().index[0]

In [168]:
# Assigning the Smart brand vehicle the mode value
smart_idx = df[df.Brand.isin(['Smart'])].index
# Replacing the FastCharge_KmH with the mode of all the values
df.loc[smart_idx, 'FastCharge_KmH'] = df.FastCharge_KmH.value_counts().index[0]

In [169]:
# Converting the FastCharge_KmH into int type
df.FastCharge_KmH = df.FastCharge_KmH.astype('int64')

In [170]:
# Converting Rapid charge
df.RapidCharge = df.RapidCharge.map({'Yes':1, 'No':0})

In [171]:
# Converting PowerTrain ['AWD' 'RWD' 'FWD']
df.PowerTrain = df.PowerTrain.map({'AWD':3, 'RWD': 1, 'FWD':2})

In [172]:
# Droping the model name for now
df = df.drop(columns=['Model'], axis=1)

In [173]:
df.columns

Index(['Brand', 'AccelSec', 'TopSpeed_KmH', 'Range_Km', 'Efficiency_WhKm',
       'FastCharge_KmH', 'RapidCharge', 'PowerTrain', 'PlugType', 'BodyStyle',
       'Segment', 'Seats', 'PriceEuro'],
      dtype='object')

In [175]:
df = pd.merge(df,Brand_map, left_on="Brand", right_on="Brand", suffixes=(None,'_Brand'))

In [176]:
df = pd.merge(df,BodyStyle_map, left_on="BodyStyle", right_on="BodyStyle", suffixes=(None,'_BodyStyle'))

In [177]:
df = pd.merge(df,Segment_map, left_on="Segment", right_on="Segment", suffixes=(None,'_Segment'))

In [179]:
# Rename the columns: 'Brand', 'BodyStyle', 'Segment'
df = df.drop(columns=['Brand', 'BodyStyle', 'Segment'], axis=1)
df = df.rename(columns={'PriceEuro_Brand':'Brand', 'PriceEuro_BodyStyle':'BodyStyle', 'PriceEuro_Segment':'Segment'})

In [184]:
# Get the features
X = df.drop(columns=['PriceEuro'], axis=1)
y = df['PriceEuro']

In [185]:
X.shape, y.shape

((103, 12), (103,))

In [186]:
# Split the data into train test
X_train, X_test, y_train, y_test = train_test_split(X,y, train_size=0.8)

In [189]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((82, 12), (82,), (21, 12), (21,))

In [191]:
# Instantiating the linear regressor and fitting the data
linreg = LinearRegression()
linreg.fit(X_train, y_train)
y_pred = linreg.predict(X_test)

In [194]:
round(r2_score(y_test, y_pred),2)

0.56

In [195]:
round(mean_squared_error(y_test, y_pred),2)

534537754.17

In [207]:
# Fitting a lasso regressor
lasso_reg = Lasso(alpha=0.1)
lasso_reg.fit(X_train, y_train)

In [208]:
y_pred = lasso_reg.predict(X_test)
round(r2_score(y_test, y_pred),2)

0.56

In [212]:
# Fitting Ridge regressor
ridge_reg = Ridge(alpha=0.1)
ridge_reg.fit(X_train, y_train)

In [213]:
y_pred = ridge_reg.predict(X_test)
round(r2_score(y_test, y_pred),2)

0.56

In [214]:
# Since even after applying the Lasso. and Ridge regression there is no change in the r2_scores
# We can conclude that the best model with the normal hyperparameters will be 0.56

In [215]:
df.head()

Unnamed: 0,AccelSec,TopSpeed_KmH,Range_Km,Efficiency_WhKm,FastCharge_KmH,RapidCharge,PowerTrain,PlugType,Seats,PriceEuro,Brand,BodyStyle,Segment
0,4.6,233,450,161,940,1,3,4,5,55480,802.0,99.0,15
1,5.6,225,310,153,650,1,1,4,5,46380,802.0,99.0,15
2,3.4,261,435,167,910,1,3,4,5,61480,802.0,99.0,15
3,4.0,200,450,178,650,1,1,4,5,65000,531.0,99.0,15
4,5.1,217,425,171,930,1,3,4,7,58620,802.0,53.0,15


In [216]:
X.head()

Unnamed: 0,AccelSec,TopSpeed_KmH,Range_Km,Efficiency_WhKm,FastCharge_KmH,RapidCharge,PowerTrain,PlugType,Seats,Brand,BodyStyle,Segment
0,4.6,233,450,161,940,1,3,4,5,802.0,99.0,15
1,5.6,225,310,153,650,1,1,4,5,802.0,99.0,15
2,3.4,261,435,167,910,1,3,4,5,802.0,99.0,15
3,4.0,200,450,178,650,1,1,4,5,531.0,99.0,15
4,5.1,217,425,171,930,1,3,4,7,802.0,53.0,15


In [233]:
BodyStyle_map[BodyStyle_map.BodyStyle.isin(['Sedan'])].PriceEuro.values[0]

99.0

In [253]:
# Function for encoding of the values for model building
def encode_vals(AccelSec, TopSpeed_KmH, Range_Km, Efficiency_WhKm, FastCharge_KmH, RapidCharge, PowerTrain, PlugType, Seats, Brand, BodyStyle, Segment):
  RapidCharge_dict = {'Yes':1, 'No':2}
  RapidCharge = RapidCharge_dict[RapidCharge]
  PowerTrain_dict = {'AWD':3, 'RWD': 1, 'FWD':2}
  PowerTrain = PowerTrain_dict[PowerTrain]
  PlugType_dict = {'Type 1 CHAdeMO':1, 'Type 2 CHAdeMO':2, 'Type 2':3, 'Type 2 CCS':4}
  PlugType = PlugType_dict[PlugType]
  Brand = Brand_map[Brand_map.Brand.isin([Brand])].PriceEuro.values[0]
  BodyStyle = BodyStyle_map[BodyStyle_map.BodyStyle.isin([BodyStyle])].PriceEuro.values[0]
  Segment = Segment_map[Segment_map.Segment.isin([Segment])].PriceEuro.values[0]
  input_data = [[AccelSec, TopSpeed_KmH, Range_Km, Efficiency_WhKm, FastCharge_KmH, RapidCharge, PowerTrain, PlugType, Seats, Brand, BodyStyle, Segment]]
  return input_data


In [235]:
# Prompt to the User
print("The input to the model should be provided in the specified manner")

The input to the model should be provided in the specified manner


In [239]:
AccelSec = float(input("Enter the Acceleration Second : "))
TopSpeed_KmH = int(input("Enter the TopSpeed_KmH : "))
Range_Km = int(input("Enter the Range in Km : "))
Efficiency_WhKm = int(input("Enter the Efficiency in WhKm : "))
FastCharge_KmH = int(input("Enter the FastCharge in Km per H : "))
RapidCharge = input("Enter the RapidCharge: Yes or No : ")
PowerTrain = input("Enter the PowerTrain for the vehicle: 'AWD' or 'RWD' or 'FWD' : ")
PlugType = input("Enter the PlugType for the vehicle: 'Type 2 CCS' or 'Type 2 CHAdeMO' or 'Type 2' or 'Type 1 CHAdeMO' : ")
Seats = int(input("Enter the number of seats : "))
Brand = input("""Enter the Brand for the vehicle: 'Aiways', 'Audi', 'BMW', 'Byton', 'CUPRA', 'Citroen', 'DS', 'Fiat',
       'Ford', 'Honda', 'Hyundai', 'Jaguar', 'Kia', 'Lexus', 'Lightyear',
       'Lucid', 'MG', 'Mazda', 'Mercedes', 'Mini', 'Nissan', 'Opel',
       'Peugeot', 'Polestar', 'Porsche', 'Renault', 'SEAT', 'Skoda',
       'Smart', 'Sono', 'Tesla', 'Volkswagen', 'Volvo' : """)
BodyStyle = input("""Enter the BodyStyle for the vehicle: 'Cabrio', 'Hatchback', 'Liftback', 'MPV', 'Pickup', 'SPV', 'SUV',
       'Sedan', 'Station' : """)
Segment = input("""Enter the Segment for the vehicle: 'A', 'B', 'C', 'D', 'E', 'F', 'N', 'S' : """)

Enter the Acceleration Second4.5
Enter the TopSpeed_KmH300
Enter the Range in Km3000
Enter the Efficiency in WhKm250
Enter the FastCharge in Km per H250
Enter the RapidCharge: Yes or NoYes
Enter the PowerTrain for the vehicle: 'AWD' or 'RWD' or 'FWD' AWD
Enter the PlugType for the vehicle: 'Type 2 CCS' or 'Type 2 CHAdeMO' or 'Type 2' or 'Type 1 CHAdeMO' Type 2 CCS
Enter the number of seats4
Enter the Brand for the vehicle: 'Aiways', 'Audi', 'BMW', 'Byton', 'CUPRA', 'Citroen', 'DS', 'Fiat',
       'Ford', 'Honda', 'Hyundai', 'Jaguar', 'Kia', 'Lexus', 'Lightyear',
       'Lucid', 'MG', 'Mazda', 'Mercedes', 'Mini', 'Nissan', 'Opel',
       'Peugeot', 'Polestar', 'Porsche', 'Renault', 'SEAT', 'Skoda',
       'Smart', 'Sono', 'Tesla', 'Volkswagen', 'Volvo' Skoda
Enter the BodyStyle for the vehicle: 'Cabrio', 'Hatchback', 'Liftback', 'MPV', 'Pickup', 'SPV', 'SUV',
       'Sedan', 'Station' Sedan
Enter the Segment for the vehicle: 'A', 'B', 'C', 'D', 'E', 'F', 'N', 'S' A


In [257]:
# The price prediction in Euro
print(f"The price in euro for the above model = {np.round(linreg.predict(encode_vals(AccelSec, TopSpeed_KmH, Range_Km, Efficiency_WhKm, FastCharge_KmH, RapidCharge, PowerTrain, PlugType, Seats, Brand, BodyStyle, Segment))[0],2)}")


The price in euro for the above model = 195479.87
