In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
file_path = r'D:\User\Desktop\GT\jupyter\auto+mpg\auto-mpg.data'

column_names = ['mpg', 'cylinders', 'displacement', 'horsepower',
                'weight', 'acceleration', 'model_year', 'origin', 'car_name']

df = pd.read_csv(file_path, sep=r'\s+', names=column_names, na_values='?')
print(df.head())

    mpg  cylinders  displacement  horsepower  weight  acceleration  \
0  18.0          8         307.0       130.0  3504.0          12.0   
1  15.0          8         350.0       165.0  3693.0          11.5   
2  18.0          8         318.0       150.0  3436.0          11.0   
3  16.0          8         304.0       150.0  3433.0          12.0   
4  17.0          8         302.0       140.0  3449.0          10.5   

   model_year  origin                   car_name  
0          70       1  chevrolet chevelle malibu  
1          70       1          buick skylark 320  
2          70       1         plymouth satellite  
3          70       1              amc rebel sst  
4          70       1                ford torino  


In [3]:
info = df.drop(['origin','car_name'],axis=1)
info = info[["cylinders","displacement","horsepower","weight","acceleration","model_year","mpg"]]
info = info.dropna()
info

Unnamed: 0,cylinders,displacement,horsepower,weight,acceleration,model_year,mpg
0,8,307.0,130.0,3504.0,12.0,70,18.0
1,8,350.0,165.0,3693.0,11.5,70,15.0
2,8,318.0,150.0,3436.0,11.0,70,18.0
3,8,304.0,150.0,3433.0,12.0,70,16.0
4,8,302.0,140.0,3449.0,10.5,70,17.0
...,...,...,...,...,...,...,...
393,4,140.0,86.0,2790.0,15.6,82,27.0
394,4,97.0,52.0,2130.0,24.6,82,44.0
395,4,135.0,84.0,2295.0,11.6,82,32.0
396,4,120.0,79.0,2625.0,18.6,82,28.0


In [9]:
x = info.drop("mpg",axis = 1).values.tolist()
x = np.array(x)
x = (x - np.mean(x, axis=0)) / np.std(x, axis=0)
print(x)

[[ 1.48394702  1.07728956  0.66413273  0.62054034 -1.285258   -1.62531533]
 [ 1.48394702  1.48873169  1.57459447  0.84333403 -1.46672362 -1.62531533]
 [ 1.48394702  1.1825422   1.18439658  0.54038176 -1.64818924 -1.62531533]
 ...
 [-0.86401356 -0.56847897 -0.53247413 -0.80463202 -1.4304305   1.63640964]
 [-0.86401356 -0.7120053  -0.66254009 -0.41562716  1.11008813  1.63640964]
 [-0.86401356 -0.72157372 -0.58450051 -0.30364091  1.40043312  1.63640964]]


In [11]:
y = info['mpg'].values.tolist()
y = np.array(y)
print(y)

[18.  15.  18.  16.  17.  15.  14.  14.  14.  15.  15.  14.  15.  14.
 24.  22.  18.  21.  27.  26.  25.  24.  25.  26.  21.  10.  10.  11.
  9.  27.  28.  25.  19.  16.  17.  19.  18.  14.  14.  14.  14.  12.
 13.  13.  18.  22.  19.  18.  23.  28.  30.  30.  31.  35.  27.  26.
 24.  25.  23.  20.  21.  13.  14.  15.  14.  17.  11.  13.  12.  13.
 19.  15.  13.  13.  14.  18.  22.  21.  26.  22.  28.  23.  28.  27.
 13.  14.  13.  14.  15.  12.  13.  13.  14.  13.  12.  13.  18.  16.
 18.  18.  23.  26.  11.  12.  13.  12.  18.  20.  21.  22.  18.  19.
 21.  26.  15.  16.  29.  24.  20.  19.  15.  24.  20.  11.  20.  19.
 15.  31.  26.  32.  25.  16.  16.  18.  16.  13.  14.  14.  14.  29.
 26.  26.  31.  32.  28.  24.  26.  24.  26.  31.  19.  18.  15.  15.
 16.  15.  16.  14.  17.  16.  15.  18.  21.  20.  13.  29.  23.  20.
 23.  24.  25.  24.  18.  29.  19.  23.  23.  22.  25.  33.  28.  25.
 25.  26.  27.  17.5 16.  15.5 14.5 22.  22.  24.  22.5 29.  24.5 29.
 33.  20.  18.  18.5

In [12]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [13]:
model = LinearRegression()
model.fit(x_train,y_train)

In [14]:
y_pred= model.predict(x_test)

In [16]:
mse = mean_squared_error(y_test,y_pred)
r2 = r2_score(y_test,y_pred)

print("Mean squared error: ",mse)
print("R2 score: ",r2)

Mean squared error:  10.502370329417305
R2 score:  0.794234907542859


In [17]:
print("Weights: ",model.intercept_)
print("Bias: ",model.coef_)

Weights:  23.482123668059664
Bias:  [-0.19791303  0.1059186  -0.08750727 -5.56582242  0.17010249  2.79840802]
