## Multiple Linear Regression

#### Importing necessary libraries

In [25]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler


### About our dataset-
We have used a fuel consumption dataset, which contains model-specific fuel consumption ratings and estimated carbon dioxide emissions for new light-duty vehicles for retail sale.

MODEL YEAR  e.g. 2014  
MAKE  e.g. Acura  
MODEL  e.g. ILX   
VEHICLE CLASS  e.g. SUV   
ENGINE SIZE  e.g. 4.7  
CYLINDERS  e.g. 6  
TRANSMISSION  e.g. A6  
FUEL CONSUMPTION IN CITY(L/100 km)  e.g. 9  


#### Loading the Dataset

In [26]:
df = pd.read_csv("carco2.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,MODELYEAR,MAKE,MODEL,VEHICLECLASS,ENGINESIZE,CYLINDERS,TRANSMISSION,FUELTYPE,FUELCONSUMPTION_CITY,FUELCONSUMPTION_HWY,FUELCONSUMPTION_COMB,FUELCONSUMPTION_COMB_MPG,CO2EMISSIONS
0,0,2014,ACURA,ILX,COMPACT,2.0,4,AS5,Z,9.9,6.7,8.5,33,196
1,1,2014,ACURA,ILX,COMPACT,2.4,4,M6,Z,11.2,7.7,9.6,29,221
2,2,2014,ACURA,ILX HYBRID,COMPACT,1.5,4,AV7,Z,6.0,5.8,5.9,48,136
3,3,2014,ACURA,MDX 4WD,SUV - SMALL,3.5,6,AS6,Z,12.7,9.1,11.1,25,255
4,4,2014,ACURA,RDX AWD,SUV - SMALL,3.5,6,AS6,Z,12.1,8.7,10.6,27,244


In [28]:
df.shape

(1067, 14)

#### Dropping unecessary columns

In [4]:
df = df[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_COMB','CO2EMISSIONS']]
df.head()

Unnamed: 0,ENGINESIZE,CYLINDERS,FUELCONSUMPTION_COMB,CO2EMISSIONS
0,2.0,4,8.5,196
1,2.4,4,9.6,221
2,1.5,4,5.9,136
3,3.5,6,11.1,255
4,3.5,6,10.6,244


#### Checking for null values

In [5]:
df.isna().any()

ENGINESIZE              False
CYLINDERS               False
FUELCONSUMPTION_COMB    False
CO2EMISSIONS            False
dtype: bool

In [6]:
x = df[['ENGINESIZE','CYLINDERS','FUELCONSUMPTION_COMB']]
y = df[['CO2EMISSIONS']]

### Training the Model

In [7]:
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8 , random_state=2)

In [8]:
regressor = LinearRegression()
regressor.fit(x_train, y_train)

In [9]:
print("R² value on training data:",r2_score(y_train,regressor.predict(x_train)))

R² value on training data: 0.8553923670789212


In [10]:
print("R² value on testing data:",r2_score(y_test, regressor.predict(x_test)))


R² value on testing data: 0.8911902906366301


### Saving the model

In [11]:
import pickle
filename = 'trained_model.sav'
pickle.dump(regressor, open(filename, 'wb'))

In [24]:

res = regressor.predict([[2.4,4,9.6]])
res



array([[213.92195489]])