# *Importing Modules*

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression

# *Reading DataFrame*

In [2]:
data_frame = pd.read_csv(r'H:\FuelConsumptionCo2.csv')
data_frame.head()

Unnamed: 0,MODELYEAR,MAKE,MODEL,VEHICLECLASS,ENGINESIZE,CYLINDERS,TRANSMISSION,FUELTYPE,FUELCONSUMPTION_CITY,FUELCONSUMPTION_HWY,FUELCONSUMPTION_COMB,FUELCONSUMPTION_COMB_MPG,CO2EMISSIONS
0,2014,ACURA,ILX,COMPACT,2.0,4,AS5,Z,9.9,6.7,8.5,33,196
1,2014,ACURA,ILX,COMPACT,2.4,4,M6,Z,11.2,7.7,9.6,29,221
2,2014,ACURA,ILX HYBRID,COMPACT,1.5,4,AV7,Z,6.0,5.8,5.9,48,136
3,2014,ACURA,MDX 4WD,SUV - SMALL,3.5,6,AS6,Z,12.7,9.1,11.1,25,255
4,2014,ACURA,RDX AWD,SUV - SMALL,3.5,6,AS6,Z,12.1,8.7,10.6,27,244


In [3]:
x = data_frame[['ENGINESIZE', 'CYLINDERS', 'FUELCONSUMPTION_COMB']]
y = data_frame[['CO2EMISSIONS']]
x

Unnamed: 0,ENGINESIZE,CYLINDERS,FUELCONSUMPTION_COMB
0,2.0,4,8.5
1,2.4,4,9.6
2,1.5,4,5.9
3,3.5,6,11.1
4,3.5,6,10.6
...,...,...,...
1062,3.0,6,11.8
1063,3.2,6,11.5
1064,3.0,6,11.8
1065,3.2,6,11.3


In [4]:
y

Unnamed: 0,CO2EMISSIONS
0,196
1,221
2,136
3,255
4,244
...,...
1062,271
1063,264
1064,271
1065,260


In [5]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
x_train

Unnamed: 0,ENGINESIZE,CYLINDERS,FUELCONSUMPTION_COMB
333,1.4,4,7.8
106,4.4,8,12.7
585,3.0,6,9.9
55,3.0,6,11.4
213,5.3,8,16.5
...,...,...,...
330,3.6,6,12.0
466,2.4,4,10.0
121,4.4,8,13.8
1044,1.8,4,8.6


# *Get The Best Parameters*

In [7]:
param_grid = {
    'fit_intercept': [True, False],
    'copy_X': [True, False],
    'positive': [True, False],
}
# fit intercept to adjust if there are any theta or not 
# positive restrict if the theta must be positive or not 

model = LinearRegression()

grid_search = GridSearchCV(model, param_grid, cv=5)
grid_search.fit(x_train, y_train)
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)

Best Parameters: {'copy_X': True, 'fit_intercept': True, 'positive': True}
Best Score: 0.8581423930096699


In [10]:
# Get the best estimator
best_model = grid_search.best_estimator_

# Get the coefficients (theta values) of the best model
theta = best_model.coef_

print("Best Parameters:", best_model)
print("Best Theta values:", theta)

Best Parameters: LinearRegression(positive=True)
Best Theta values: [[11.2094395   7.15561381  9.5208118 ]]


# *Creating Model*

In [11]:
best_parametrs=grid_search.best_params_
model = LinearRegression(**best_parametrs)
model.fit(x_train,y_train)

In [12]:
model.score(x_test,y_test)

0.8759705206914068

In [13]:
model.score(x_train,y_train)

0.860596431041164

In [14]:
mean_squared_error(y_train,model.predict(x_train))

554.3023826742204

In [15]:
mean_absolute_error(y_test,model.predict(x_test))

16.721593983516524