In [6]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv('../DMW_DataSets/auto-mpg.csv')
data.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


In [3]:
data.dropna()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin,car name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86,2790,15.6,82,1,ford mustang gl
394,44.0,4,97.0,52,2130,24.6,82,2,vw pickup
395,32.0,4,135.0,84,2295,11.6,82,1,dodge rampage
396,28.0,4,120.0,79,2625,18.6,82,1,ford ranger


In [4]:
X = data.drop('mpg', axis=1)
y = data['mpg']

In [5]:
X = pd.get_dummies(X,drop_first=True)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [8]:
model = LinearRegression()
model.fit(X_train, y_train)

In [9]:
y_pred = model.predict(X_test)

In [13]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f'RMSE: {rmse}')

r2 = model.score(X_test, y_test)
print(f'R2: {r2}')

print("\nModel Coefficients:\n")
for feature, coef in zip(X.columns[:10], model.coef_[:10]):
    print(f"{feature} : {coef}")

RMSE: 8.475148048678276
R2: -0.2515495290559093

Model Coefficients:

cylinders : -0.6933507098515248
displacement : -0.12012813030281133
weight : 0.011965677768793537
acceleration : -1.683354960838666
model year : 0.5707723551709878
origin : 1.7424995563817256
horsepower_100 : -11.091734528240988
horsepower_102 : -11.806832365286223
horsepower_103 : -9.738021361459113
horsepower_105 : -9.916327129234688


In [16]:
result = pd.DataFrame({'Actual Value':y_test, 'Predicted value':y_pred})
print("Actual values vs Predicted values\n", result.head())

Actual values vs Predicted values
      Actual Value  Predicted value
198          33.0        34.206966
396          28.0        22.853545
33           19.0        18.741608
208          13.0        12.985510
93           14.0        12.638638
