In [2]:
# Connecting Google colab with google drive

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Importing requied packages and libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
import warnings
warnings.filterwarnings(action = 'ignore', category = UserWarning)

In [5]:
# Reading the 'MPG_Car_Data.csv' file

data = pd.read_csv('/content/drive/MyDrive/Datasets/MPG_Car_Data.csv')
data

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
387,27.0,4,140.0,86,2790,15.6,82,1,ford mustang gl
388,44.0,4,97.0,52,2130,24.6,82,2,vw pickup
389,32.0,4,135.0,84,2295,11.6,82,1,dodge rampage
390,28.0,4,120.0,79,2625,18.6,82,1,ford ranger


In [7]:
# Checking the column types

data.dtypes

Unnamed: 0,0
mpg,float64
cylinders,int64
displacement,float64
horsepower,int64
weight,int64
acceleration,float64
model year,int64
origin,int64
car name,object


In [9]:
# Getting the statistical summaries of the numerical columns

data.describe()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin
count,392.0,392.0,392.0,392.0,392.0,392.0,392.0,392.0
mean,23.445918,5.471939,194.41199,104.469388,2977.584184,15.541327,75.979592,1.576531
std,7.805007,1.705783,104.644004,38.49116,849.40256,2.758864,3.683737,0.805518
min,9.0,3.0,68.0,46.0,1613.0,8.0,70.0,1.0
25%,17.0,4.0,105.0,75.0,2225.25,13.775,73.0,1.0
50%,22.75,4.0,151.0,93.5,2803.5,15.5,76.0,1.0
75%,29.0,8.0,275.75,126.0,3614.75,17.025,79.0,2.0
max,46.6,8.0,455.0,230.0,5140.0,24.8,82.0,3.0


In [18]:
# Define two functions namely cat_dis and cat_wt that converts the displacement and weight into categorical form

def cat_dis(entry):
  if 68 <= entry <= 168:
    return 0
  elif 168 < entry <= 233:
    return 1
  elif 233 < entry <= 369:
    return 2
  else:
    return 3


def cat_wt(entry):
  if entry <= 2974:
    return 0
  else:
    return 1

# Applying this functions on the data frame data to create two categorical columns

data['discat'] = data['displacement'].apply(cat_dis)
data['wtcat'] = data['weight'].apply(cat_wt)

# Checking the final results

data.dtypes

Unnamed: 0,0
mpg,float64
cylinders,int64
displacement,float64
horsepower,int64
weight,int64
acceleration,float64
model year,int64
origin,int64
car name,object
discat,int64


In [20]:
# Converting the 'discat' and 'wtcat' columns into object type

data['discat'] = data['discat'].astype(object)
data['wtcat'] = data['wtcat'].astype(object)
data.dtypes

Unnamed: 0,0
mpg,float64
cylinders,int64
displacement,float64
horsepower,int64
weight,int64
acceleration,float64
model year,int64
origin,int64
car name,object
discat,object


In [15]:
data['discat'].unique()

array(['Moderate', 'Severe', 'Low', 'Mild'], dtype=object)

#### Now, in this notebook, a model called 'mixlr_model' is created where the 'mpg' column is output as previous, but as input, we will considering the variables 'accelation', 'cylinders', 'horsepower', 'discat' and 'wtcat'

In [21]:
# Build the Linear Regression Model

mixlr_model = LinearRegression()
mixlr_model.fit(data[['cylinders', 'horsepower', 'acceleration', 'discat', 'wtcat']], data.mpg)

In [22]:
# Getting the Coefficients of the model

slopes = mixlr_model.coef_
print('The slopes for the variables are:', slopes)
intercept = mixlr_model.intercept_
print('Intecept value of the model is:', intercept)

The slopes for the variables are: [-0.42537822 -0.10599821 -0.23219685 -0.3710792  -4.45419367]
Intecept value of the model is: 42.696990816101675
