## Predict the mileage of a car

### Fetch the data

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("mpg.csv")

In [3]:
df.head()

Unnamed: 0,MPG,Cylinders,Engine Disp,Horsepower,Weight,Accelerate,Year,Origin
0,15.0,8,390.0,190,3850,8.5,70,American
1,21.0,6,199.0,90,2648,15.0,70,American
2,18.0,6,199.0,97,2774,15.5,70,American
3,16.0,8,304.0,150,3433,12.0,70,American
4,14.0,8,455.0,225,3086,10.0,70,American


### Data cleaning

In [4]:
df.shape

(392, 8)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 392 entries, 0 to 391
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   MPG          392 non-null    float64
 1   Cylinders    392 non-null    int64  
 2   Engine Disp  392 non-null    float64
 3   Horsepower   392 non-null    int64  
 4   Weight       392 non-null    int64  
 5   Accelerate   392 non-null    float64
 6   Year         392 non-null    int64  
 7   Origin       392 non-null    object 
dtypes: float64(3), int64(4), object(1)
memory usage: 24.6+ KB


### Data Analysis

In [6]:
df.corr(numeric_only=True)

Unnamed: 0,MPG,Cylinders,Engine Disp,Horsepower,Weight,Accelerate,Year
MPG,1.0,-0.777618,-0.805127,-0.778427,-0.832244,0.423329,0.580541
Cylinders,-0.777618,1.0,0.950823,0.842983,0.897527,-0.504683,-0.345647
Engine Disp,-0.805127,0.950823,1.0,0.897257,0.932994,-0.5438,-0.369855
Horsepower,-0.778427,0.842983,0.897257,1.0,0.864538,-0.689196,-0.416361
Weight,-0.832244,0.897527,0.932994,0.864538,1.0,-0.416839,-0.30912
Accelerate,0.423329,-0.504683,-0.5438,-0.689196,-0.416839,1.0,0.290316
Year,0.580541,-0.345647,-0.369855,-0.416361,-0.30912,0.290316,1.0


### Identify the input and output

In [7]:
y = df.MPG
x = df.drop(columns=["MPG",'Origin'])

### Choose the algorithm

In [8]:
from sklearn.linear_model import LinearRegression

In [9]:
model = LinearRegression()

### Train the model

In [10]:
model.fit(x,y)

### Check the accuracy

In [11]:
model.score(x,y)

0.8092552890383932

### Metrics

In [12]:
from sklearn.metrics import mean_absolute_percentage_error as mape
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import median_absolute_error as mde
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score as r2

In [13]:
original  = y
predicted = model.predict(x)

In [14]:
print("mape = ", mape(original, predicted))
print("mae = ", mae(original, predicted))
print("mde = ", mde(original, predicted))
print("mse = ", mse(original, predicted))
print("r2 = ", r2(original, predicted))

mape =  0.12116157119745288
mae =  2.6182640467289575
mde =  2.216294833001358
mse =  11.590170981415227
r2 =  0.8092552890383932


### manually find the mean_absolute_error

In [15]:
df.head()

Unnamed: 0,MPG,Cylinders,Engine Disp,Horsepower,Weight,Accelerate,Year,Origin
0,15.0,8,390.0,190,3850,8.5,70,American
1,21.0,6,199.0,90,2648,15.0,70,American
2,18.0,6,199.0,97,2774,15.5,70,American
3,16.0,8,304.0,150,3433,12.0,70,American
4,14.0,8,455.0,225,3086,10.0,70,American


In [16]:
df['predicted_MPG'] = model.predict(x)

In [17]:
df.head()

Unnamed: 0,MPG,Cylinders,Engine Disp,Horsepower,Weight,Accelerate,Year,Origin,predicted_MPG
0,15.0,8,390.0,190,3850,8.5,70,American,13.047353
1,21.0,6,199.0,90,2648,15.0,70,American,21.001034
2,18.0,6,199.0,97,2774,15.5,70,American,20.184809
3,16.0,8,304.0,150,3433,12.0,70,American,15.534475
4,14.0,8,455.0,225,3086,10.0,70,American,18.851752


In [18]:
df['error'] = df.MPG - df.predicted_MPG

In [19]:
df.head()

Unnamed: 0,MPG,Cylinders,Engine Disp,Horsepower,Weight,Accelerate,Year,Origin,predicted_MPG,error
0,15.0,8,390.0,190,3850,8.5,70,American,13.047353,1.952647
1,21.0,6,199.0,90,2648,15.0,70,American,21.001034,-0.001034
2,18.0,6,199.0,97,2774,15.5,70,American,20.184809,-2.184809
3,16.0,8,304.0,150,3433,12.0,70,American,15.534475,0.465525
4,14.0,8,455.0,225,3086,10.0,70,American,18.851752,-4.851752


In [20]:
df['abserror'] = df.error.apply(abs)

In [21]:
df.head()

Unnamed: 0,MPG,Cylinders,Engine Disp,Horsepower,Weight,Accelerate,Year,Origin,predicted_MPG,error,abserror
0,15.0,8,390.0,190,3850,8.5,70,American,13.047353,1.952647,1.952647
1,21.0,6,199.0,90,2648,15.0,70,American,21.001034,-0.001034,0.001034
2,18.0,6,199.0,97,2774,15.5,70,American,20.184809,-2.184809,2.184809
3,16.0,8,304.0,150,3433,12.0,70,American,15.534475,0.465525,0.465525
4,14.0,8,455.0,225,3086,10.0,70,American,18.851752,-4.851752,4.851752


In [22]:
df.abserror.sum() # total abs error

1026.3595063177513

In [23]:
df.abserror.sum()/df.shape[0] # mean abs error

2.6182640467289575