# Predicting Carbon Dioxide Emission of a New Car Model based on its Various Features like Engine Size, Number of Cylinders, etc. using Multiple Linear Regression

### Importing Required Libraries

In [1]:
%matplotlib notebook
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Downloading the Data

In [2]:
df = pd.read_csv(r'C:\Users\user\Desktop\Data Science\FuelConsumptionCo2.csv')
df.head()

Unnamed: 0,MODELYEAR,MAKE,MODEL,VEHICLECLASS,ENGINESIZE,CYLINDERS,TRANSMISSION,FUELTYPE,FUELCONSUMPTION_CITY,FUELCONSUMPTION_HWY,FUELCONSUMPTION_COMB,FUELCONSUMPTION_COMB_MPG,CO2EMISSIONS
0,2014,ACURA,ILX,COMPACT,2.0,4,AS5,Z,9.9,6.7,8.5,33,196
1,2014,ACURA,ILX,COMPACT,2.4,4,M6,Z,11.2,7.7,9.6,29,221
2,2014,ACURA,ILX HYBRID,COMPACT,1.5,4,AV7,Z,6.0,5.8,5.9,48,136
3,2014,ACURA,MDX 4WD,SUV - SMALL,3.5,6,AS6,Z,12.7,9.1,11.1,25,255
4,2014,ACURA,RDX AWD,SUV - SMALL,3.5,6,AS6,Z,12.1,8.7,10.6,27,244


### Data Exploration

In [3]:
df.describe()

Unnamed: 0,MODELYEAR,ENGINESIZE,CYLINDERS,FUELCONSUMPTION_CITY,FUELCONSUMPTION_HWY,FUELCONSUMPTION_COMB,FUELCONSUMPTION_COMB_MPG,CO2EMISSIONS
count,1067.0,1067.0,1067.0,1067.0,1067.0,1067.0,1067.0,1067.0
mean,2014.0,3.346298,5.794752,13.296532,9.474602,11.580881,26.441425,256.228679
std,0.0,1.415895,1.797447,4.101253,2.79451,3.485595,7.468702,63.372304
min,2014.0,1.0,3.0,4.6,4.9,4.7,11.0,108.0
25%,2014.0,2.0,4.0,10.25,7.5,9.0,21.0,207.0
50%,2014.0,3.4,6.0,12.6,8.8,10.9,26.0,251.0
75%,2014.0,4.3,8.0,15.55,10.85,13.35,31.0,294.0
max,2014.0,8.4,12.0,30.2,20.5,25.8,60.0,488.0


### Extracting Necessary Columns

In [4]:
cdf = df[['MODEL', 'ENGINESIZE', 'CYLINDERS', 'FUELCONSUMPTION_COMB', 'CO2EMISSIONS']]
cdf = cdf.set_index('MODEL')
cdf.head()

Unnamed: 0_level_0,ENGINESIZE,CYLINDERS,FUELCONSUMPTION_COMB,CO2EMISSIONS
MODEL,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ILX,2.0,4,8.5,196
ILX,2.4,4,9.6,221
ILX HYBRID,1.5,4,5.9,136
MDX 4WD,3.5,6,11.1,255
RDX AWD,3.5,6,10.6,244


### Plotting Various Features

In [5]:
cdf.hist()

<IPython.core.display.Javascript object>

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x0000002B6852CA48>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000002B687F9E08>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x0000002B68838988>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000002B68870AC8>]],
      dtype=object)

### Plotting CO2 Emissions vs Other Features

In [6]:
plt.figure()
plt.scatter(cdf.ENGINESIZE, cdf.CO2EMISSIONS, label = 'ENGINE SIZE')
plt.scatter(cdf.CYLINDERS, cdf.CO2EMISSIONS, label = 'CYLINDERS')
plt.scatter(cdf.FUELCONSUMPTION_COMB, cdf.CO2EMISSIONS, label = 'FUELCONSUMPTION_COMB')
plt.legend()
plt.xlabel('Features')
plt.ylabel('CO2 EMISSIONS')
plt.title('Relationship Scatter Plot')

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'Relationship Scatter Plot')

### Splitting Data into Train and Test

In [7]:
from sklearn.model_selection import train_test_split
X = cdf[['ENGINESIZE', 'CYLINDERS', 'FUELCONSUMPTION_COMB']]
y = cdf[['CO2EMISSIONS']]
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.8, random_state=0)

### Multiple Linear Regression Model

In [8]:
from sklearn import linear_model
reg = linear_model.LinearRegression()
reg.fit(X_train, y_train)
print("Coefficient = ", reg.coef_)
print("Intercept = ", reg.intercept_)

Coefficient =  [[11.98993058  7.54469599  8.98695981]]
Intercept =  [69.05220993]


### Model Accuracy

In [9]:
reg.score(X_test, y_test)

0.8604199405627129

### Predicting CO2 Emission for given Features

In [10]:
reg.predict([[7.5, 9, 20.3]])

array([[409.3142373]])

### Evaluation Metrics

In [11]:
from sklearn.metrics import r2_score
y_hat_test = reg.predict(X_test)
print("MAE = ", np.mean(np.absolute(y_test - y_hat_test)))
print("RMSE = ", np.sqrt(np.mean((y_test - y_hat_test)**2)))
print("R2 Score = ", r2_score(y_test, y_hat_test))

MAE =  CO2EMISSIONS    18.01627
dtype: float64
RMSE =  CO2EMISSIONS    24.274167
dtype: float64
R2 Score =  0.8604199405627128


In [12]:
from mpl_toolkits.mplot3d import Axes3D 
fig = plt.figure()
plt.clf()
ax = Axes3D(fig)
plt.cla()
ax.scatter(X_train.ENGINESIZE, X_train.CYLINDERS, X_train.FUELCONSUMPTION_COMB)
ax.plot(reg.intercept_[0] + reg.coef_[0][0]*X_train.ENGINESIZE, reg.coef_[0][1]*X_train.CYLINDERS, reg.coef_[0][2]*)

<IPython.core.display.Javascript object>

<mpl_toolkits.mplot3d.art3d.Path3DCollection at 0x2b6bd4b648>