In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import random
random.seed(0)


# Data Gathering

In [None]:
data = pd.read_csv("FuelConsumption.csv")
data.head()

# Data Pre-Processing

In [None]:
data.drop(columns=['MODELYEAR','MODEL','VEHICLECLASS','MAKE','TRANSMISSION','FUELCONSUMPTION_COMB_MPG'], inplace=True)
data.head()

In [None]:
data[["FUELTYPE"]] = data[["FUELTYPE"]].replace({"X":1, "Z":2, "E":3, "D":4})
data.sort_values(by=['FUELCONSUMPTION_COMB'], inplace=True)
data.head()

# Split Data

In [None]:
from sklearn.model_selection import train_test_split

X = data[['ENGINESIZE','FUELCONSUMPTION_COMB']]
Y = data[['CO2EMISSIONS']]

xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.2, random_state=1)

# Model Init

In [None]:
from sklearn.svm import SVR

svrR = SVR()
svrL = SVR(kernel="linear")
svrP = SVR(kernel='poly')

# Training Model

In [None]:
svrR.fit(xtrain, ytrain)
svrL.fit(xtrain, ytrain)
svrP.fit(xtrain, ytrain)

# Test Model

In [None]:
yhatR = svrR.predict(xtest)
yhatL = svrL.predict(xtest)
yhatP = svrP.predict(xtest)

# Model Evaluation

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score

print("=================== RBF  ===========================")
print("MAE: " ,100 - mean_absolute_percentage_error(yhatR, ytest)*100)
print("MSE: ", mean_squared_error(yhatR, ytest))
print("R2 Score: ", r2_score(yhatR, ytest))
print("=================== Linear  ===========================")
print("MAE: " ,100 - mean_absolute_percentage_error(yhatL, ytest)*100)
print("MSE: ", mean_squared_error(yhatL, ytest))
print("R2 Score: ", r2_score(yhatL, ytest))
print("=================== Polynomial  ===========================")
print("MAE: " ,100 - mean_absolute_percentage_error(yhatP, ytest)*100)
print("MSE: ", mean_squared_error(yhatP, ytest))
print("R2 Score: ", r2_score(yhatP, ytest))

# Hyper Parameter Tuning

In [None]:
from sklearn.svm import SVR

svrR = SVR()
svrL = SVR(kernel="linear")
svrP = SVR(kernel='poly', degree=1)

svrR.fit(xtrain, ytrain)
svrL.fit(xtrain, ytrain)
svrP.fit(xtrain, ytrain)

yhatR = svrR.predict(xtest)
yhatL = svrL.predict(xtest)
yhatP = svrP.predict(xtest)

print("=================== RBF  ===========================")
print("MAE: " ,100 - mean_absolute_percentage_error(yhatR, ytest)*100)
print("MSE: ", mean_squared_error(yhatR, ytest))
print("R2 Score: ", r2_score(yhatR, ytest))
print("=================== Linear  ===========================")
print("MAE: " ,100 - mean_absolute_percentage_error(yhatL, ytest)*100)
print("MSE: ", mean_squared_error(yhatL, ytest))
print("R2 Score: ", r2_score(yhatL, ytest))
print("=================== Polynomial  ===========================")
print("MAE: " ,100 - mean_absolute_percentage_error(yhatP, ytest)*100)
print("MSE: ", mean_squared_error(yhatP, ytest))
print("R2 Score: ", r2_score(yhatP, ytest))

# Model(SVR) Analyssis

In [None]:
print(X[['ENGINESIZE']].values.min())
print(X[['ENGINESIZE']].values.max())

In [None]:
print(X[['FUELCONSUMPTION_COMB']].values.min())
print(X[['FUELCONSUMPTION_COMB']].values.max())

In [None]:
svrR = SVR()
svrL = SVR(kernel='linear')
svrP = SVR(kernel='poly')

svrR.fit(xtrain[['ENGINESIZE']], ytrain.values.ravel())
svrL.fit(xtrain[['ENGINESIZE']], ytrain.values.ravel())
svrP.fit(xtrain[['ENGINESIZE']], ytrain.values.ravel())

yhatR = svrR.predict(xtest[['ENGINESIZE']])
yhatL = svrL.predict(xtest[['ENGINESIZE']])
yhatP = svrP.predict(xtest[['ENGINESIZE']])

lineE = np.linspace(1,8.4, 500).reshape(-1,1)
lineF = np.linspace(4.7,25.8, 500).reshape(-1,1)
feature = pd.DataFrame(
    {
        "E": lineE[0],
        "F": lineF[0]
    },
    index=list(range(500))
)

fig, axes = plt.subplots(1,1, figsize=(10,6))
axes.scatter(X[['ENGINESIZE']].values, Y.values, c='hotpink', label='Data')
axes.plot(lineE, svrL.predict(lineE), c='darkorange', label='Linear Prediction', linewidth=4)
axes.plot(lineE, svrR.predict(lineE), c='purple', label='RBF Prediction',linewidth=4)
axes.plot(lineE, svrP.predict(lineE), c='brown', label='Polynomial Prediction', linewidth=4)
axes.legend()
plt.show()

# Hyper Parameter Tuning

In [None]:
## GridSearchCv
from sklearn.model_selection import GridSearchCV
svrL = SVR(kernel='linear')
grid = GridSearchCV(
    svrL,
    param_grid={
        'C':[1.1,5.4,160,180,200,1000],
        'epsilon': [0.0003, 0.007, 0.0109, 0.019, 0.14, 0.05, 8, 0.2, 3, 7,11,13],
        "gamma":["auto","scale",0.1]
    },
    scoring="neg_mean_squared_error",
    refit=True,
    verbose=1,
    cv=5
)

# Fit
grid.fit(xtrain[['ENGINESIZE']], ytrain.values.ravel())


# get best hyperparameter
grid.best_params_

In [None]:
svrR = SVR(C=160, epsilon=13, gamma='auto')
svrL = SVR(kernel='linear', C=160, epsilon=13, gamma='auto')
svrP = SVR(kernel='poly',C=160, epsilon=13, gamma='auto')

svrR.fit(xtrain[['ENGINESIZE']], ytrain.values.ravel())
svrL.fit(xtrain[['ENGINESIZE']], ytrain.values.ravel())
svrP.fit(xtrain[['ENGINESIZE']], ytrain.values.ravel())

yhatR = svrR.predict(xtest[['ENGINESIZE']])
yhatL = svrL.predict(xtest[['ENGINESIZE']])
yhatP = svrP.predict(xtest[['ENGINESIZE']])

lineE = np.linspace(1,8.4, 500).reshape(-1,1)
lineF = np.linspace(4.7,25.8, 500).reshape(-1,1)
feature = pd.DataFrame(
    {
        "E": lineE[0],
        "F": lineF[0]
    },
    index=list(range(500))
)

fig, axes = plt.subplots(1,1, figsize=(10,6))
axes.scatter(X[['ENGINESIZE']].values, Y.values, c='hotpink', label='Data')
axes.plot(lineE, svrL.predict(lineE), c='darkorange', label='Linear Prediction', linewidth=4)
axes.plot(lineE, svrR.predict(lineE), c='purple', label='RBF Prediction',linewidth=4)
axes.plot(lineE, svrP.predict(lineE), c='brown', label='Polynomial Prediction', linewidth=4)
axes.legend()
plt.show()