In [87]:
# Import the libraries
import numpy as np
import pandas as pd
from sklearn.svm import LinearSVR, SVR
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [88]:
# Load the dataset
dataset = pd.read_csv('Fuel Consumption.csv')

# Seperate x, y of dataset
x = dataset.loc[:, ['ENGINESIZE', 'CYLINDERS', 'FUELCONSUMPTION_CITY', 'FUELCONSUMPTION_HWY', 'FUELCONSUMPTION_COMB', 'FUELCONSUMPTION_COMB_MPG', 'CO2EMISSIONS']] 
y = dataset.iloc[:, -1].values # Target

dataset.head()

Unnamed: 0,MODELYEAR,MAKE,MODEL,VEHICLECLASS,ENGINESIZE,CYLINDERS,TRANSMISSION,FUELTYPE,FUELCONSUMPTION_CITY,FUELCONSUMPTION_HWY,FUELCONSUMPTION_COMB,FUELCONSUMPTION_COMB_MPG,CO2EMISSIONS
0,2014,ACURA,ILX,COMPACT,2.0,4,AS5,Z,9.9,6.7,8.5,33,196
1,2014,ACURA,ILX,COMPACT,2.4,4,M6,Z,11.2,7.7,9.6,29,221
2,2014,ACURA,ILX HYBRID,COMPACT,1.5,4,AV7,Z,6.0,5.8,5.9,48,136
3,2014,ACURA,MDX 4WD,SUV - SMALL,3.5,6,AS6,Z,12.7,9.1,11.1,25,255
4,2014,ACURA,RDX AWD,SUV - SMALL,3.5,6,AS6,Z,12.1,8.7,10.6,27,244


In [89]:
# Split the dataset into train, test
X_train, x_test, Y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42) # 80% train, 20% test

# Scale the features and target variables
scaler_x = StandardScaler()
scaler_y = StandardScaler()
X_train = scaler_x.fit_transform(X_train)
x_test = scaler_x.transform(x_test)
Y_train = scaler_y.fit_transform(Y_train.reshape(-1, 1)).ravel()
y_test = scaler_y.transform(y_test.reshape(-1, 1)).ravel()

In [90]:
# Create and fit the linear SVR model
model = LinearSVR(epsilon = 0.1, C = 1.0, loss = 'epsilon_insensitive', random_state = 42, max_iter = 10000)

# Create the KFold object
k = 4 # %25 of train set for validation set
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize the lists to store the metrics
mse_list = []
mae_list = []
r2_list = []

# Loop over the train and valid indices
for train_index, valid_index in kf.split(X_train):
    # Split the data into train and valid sets
    x_train, x_valid = X_train[train_index], X_train[valid_index]
    y_train, y_valid = Y_train[train_index], Y_train[valid_index]

    # Fit the model on the train set
    model.fit(x_train, y_train)

    # Predict the target values for the valid set
    y_valid_pred = model.predict(x_valid)

    # Compute the accuracy metrics for the valid set
    mse_valid = mean_squared_error(y_valid, y_valid_pred)
    mae_valid = mean_absolute_error(y_valid, y_valid_pred)
    r2_valid = r2_score(y_valid, y_valid_pred)

    # Append the metrics to the lists
    mse_list.append(mse_valid)
    mae_list.append(mae_valid)
    r2_list.append(r2_valid)

# Calculate the average of the metrics
mse_mean = np.mean(mse_valid)
mae_mean = np.mean(mae_valid)
r2_mean = np.mean(r2_valid)

# Predict the target values for the train set
y_train_pred = model.predict(x_train)

# Compute the accuracy metrics for the train set
mse_train = mean_squared_error(y_train, y_train_pred)
mae_train = mean_absolute_error(y_train, y_train_pred)
r2_train = r2_score(y_train, y_train_pred)

# Predict the target values for the test set
y_test_pred = model.predict(x_test)
# Compute the accuracy metrics for the test set
mse_test = mean_squared_error(y_test, y_test_pred)
mae_test = mean_absolute_error(y_test, y_test_pred)
r2_test = r2_score(y_test, y_test_pred)

# Print the results
print('Train set:')
print(f'MSE: {mse_train:.4f}')
print(f'MAE: {mae_train:.4f}')
print(f'R2: {r2_train:.4f}')

print('\nValidation set:')
print(f'Average MSE: {mse_mean:.4f}')
print(f'Average MAE: {mae_mean:.4f}')
print(f'Average R2: {r2_mean:.4f}')

print('\nTest set:')
print(f'MSE: {mse_test:.4f}')
print(f'MAE: {mae_test:.4f}')
print(f'R2: {r2_test:.4f}')

Train set:
MSE: 0.0018
MAE: 0.0331
R2: 0.9982

Validation set:
Average MSE: 0.0016
Average MAE: 0.0305
Average R2: 0.9984

Test set:
MSE: 0.0018
MAE: 0.0331
R2: 0.9983


In [91]:
# Create and fit the Non-linear SVR model with polynomial kernel
model = SVR(kernel = 'poly', epsilon = 0.1, C = 1.0, max_iter = 100000)

# Create the KFold object
k = 4 # %25 of train set for validation set
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize the lists to store the metrics
mse_list = []
mae_list = []
r2_list = []

# Loop over the train and valid indices
for train_index, valid_index in kf.split(X_train):
    # Split the data into train and test sets
    x_train, x_valid = X_train[train_index], X_train[valid_index]
    y_train, y_valid = Y_train[train_index], Y_train[valid_index]

    # Fit the model on the train set
    model.fit(x_train, y_train)

    # Predict the target values for the valid set
    y_valid_pred = model.predict(x_valid)

    # Compute the accuracy metrics for the valid set
    mse_valid = mean_squared_error(y_valid, y_valid_pred)
    mae_valid = mean_absolute_error(y_valid, y_valid_pred)
    r2_valid = r2_score(y_valid, y_valid_pred)

    # Append the metrics to the lists
    mse_list.append(mse_valid)
    mae_list.append(mae_valid)
    r2_list.append(r2_valid)

# Calculate the average of the metrics
mse_mean = np.mean(mse_valid)
mae_mean = np.mean(mae_valid)
r2_mean = np.mean(r2_valid)

# Predict the target values for the train set
y_train_pred = model.predict(x_train)

# Compute the accuracy metrics for the train set
mse_train = mean_squared_error(y_train, y_train_pred)
mae_train = mean_absolute_error(y_train, y_train_pred)
r2_train = r2_score(y_train, y_train_pred)

# Predict the target values for the test set
y_test_pred = model.predict(x_test)
# Compute the accuracy metrics for the test set
mse_test = mean_squared_error(y_test, y_test_pred)
mae_test = mean_absolute_error(y_test, y_test_pred)
r2_test = r2_score(y_test, y_test_pred)

# Print the results
print('Train set:')
print(f'MSE: {mse_train:.4f}')
print(f'MAE: {mae_train:.4f}')
print(f'R2: {r2_train:.4f}')

print('\nValidation set:')
print(f'Average MSE: {mse_mean:.4f}')
print(f'Average MAE: {mae_mean:.4f}')
print(f'Average R2: {r2_mean:.4f}')

print('\nTest set:')
print(f'MSE: {mse_test:.4f}')
print(f'MAE: {mae_test:.4f}')
print(f'R2: {r2_test:.4f}')

Train set:
MSE: 0.1899
MAE: 0.2911
R2: 0.8105

Validation set:
Average MSE: 0.1554
Average MAE: 0.2814
Average R2: 0.8417

Test set:
MSE: 0.2735
MAE: 0.3190
R2: 0.7370


In [92]:
# Create and fit the Non-linear SVR model with RBF kernel
model = SVR(kernel = 'rbf', epsilon = 0.1, C = 1.0, max_iter = 10000)

# Create the KFold object
k = 4 # %25 of train set for validation set
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize the lists to store the metrics
mse_list = []
mae_list = []
r2_list = []

# Loop over the train and valid indices
for train_index, valid_index in kf.split(X_train):
    # Split the data into train and test sets
    x_train, x_valid = X_train[train_index], X_train[valid_index]
    y_train, y_valid = Y_train[train_index], Y_train[valid_index]

    # Fit the model on the train set
    model.fit(x_train, y_train)

    # Predict the target values for the valid set
    y_valid_pred = model.predict(x_valid)

    # Compute the accuracy metrics for the valid set
    mse_valid = mean_squared_error(y_valid, y_valid_pred)
    mae_valid = mean_absolute_error(y_valid, y_valid_pred)
    r2_valid = r2_score(y_valid, y_valid_pred)

    # Append the metrics to the lists
    mse_list.append(mse_valid)
    mae_list.append(mae_valid)
    r2_list.append(r2_valid)

# Calculate the average of the metrics
mse_mean = np.mean(mse_valid)
mae_mean = np.mean(mae_valid)
r2_mean = np.mean(r2_valid)

# Predict the target values for the train set
y_train_pred = model.predict(x_train)

# Compute the accuracy metrics for the train set
mse_train = mean_squared_error(y_train, y_train_pred)
mae_train = mean_absolute_error(y_train, y_train_pred)
r2_train = r2_score(y_train, y_train_pred)

# Predict the target values for the test set
y_test_pred = model.predict(x_test)
# Compute the accuracy metrics for the test set
mse_test = mean_squared_error(y_test, y_test_pred)
mae_test = mean_absolute_error(y_test, y_test_pred)
r2_test = r2_score(y_test, y_test_pred)

# Print the results
print('Train set:')
print(f'MSE: {mse_train:.4f}')
print(f'MAE: {mae_train:.4f}')
print(f'R2: {r2_train:.4f}')

print('\nValidation set:')
print(f'Average MSE: {mse_mean:.4f}')
print(f'Average MAE: {mae_mean:.4f}')
print(f'Average R2: {r2_mean:.4f}')

print('\nTest set:')
print(f'MSE: {mse_test:.4f}')
print(f'MAE: {mae_test:.4f}')
print(f'R2: {r2_test:.4f}')

Train set:
MSE: 0.0048
MAE: 0.0526
R2: 0.9952

Validation set:
Average MSE: 0.0068
Average MAE: 0.0519
Average R2: 0.9930

Test set:
MSE: 0.0109
MAE: 0.0556
R2: 0.9895
