In [2]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR

from sklearn.model_selection import GridSearchCV

import warnings
warnings.filterwarnings('ignore')

In [3]:
automobile_df = pd.read_csv('dataset/auto_mpg_processed.csv')
automobile_df.sample(5)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,age
307,37.0,4,85.0,65,1975,19.4,40
116,37.0,4,119.0,92,2434,15.0,41
180,30.0,4,135.0,84,2385,12.9,40
186,32.8,4,78.0,52,1985,19.4,43
362,24.0,4,121.0,110,2660,14.0,48


In [4]:
x = automobile_df.drop(['mpg', 'age'], axis = 1)
y = automobile_df['mpg']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

In [5]:
parameters = {'alpha' : [0.2, 0.4,0.6,0.8, 0.9, 1.0]}

grid_search = GridSearchCV(Lasso(), parameters,cv = 3, return_train_score = True)
grid_search.fit(x_train, y_train)

grid_search.best_params_

{'alpha': 1.0}

In [10]:
for i in range(len(parameters['alpha'])):
    print('parameters: ', grid_search.cv_results_['params'][i])
    
    print('Mean Test Score: ', grid_search.cv_results_['mean_test_score'][i])
    
    print('Rank: ', grid_search.cv_results_['rank_test_score'][i])
    print()

parameters:  {'alpha': 0.2}
Mean Test Score:  0.6820820473008601
Rank:  6

parameters:  {'alpha': 0.4}
Mean Test Score:  0.6826528808302821
Rank:  5

parameters:  {'alpha': 0.6}
Mean Test Score:  0.682883695900574
Rank:  4

parameters:  {'alpha': 0.8}
Mean Test Score:  0.682992566898788
Rank:  3

parameters:  {'alpha': 0.9}
Mean Test Score:  0.6830426135474408
Rank:  2

parameters:  {'alpha': 1.0}
Mean Test Score:  0.6830896521174944
Rank:  1



In [13]:
lasso_model = Lasso(alpha = grid_search.best_params_['alpha']).fit(x_train, y_train)

y_pred = lasso_model.predict(x_test)

print('Training Score: ', lasso_model.score(x_train, y_train))
print('Test Score: ', r2_score(y_test, y_pred))

Training Score:  0.6901828244553618
Test Score:  0.7726945581622706


In [15]:
parameters = {'n_neighbors' : [10, 12, 24, 38, 35, 50]}

grid_search = GridSearchCV(KNeighborsRegressor(), parameters, cv = 3, return_train_score = True)
grid_search.fit(x_train, y_train)

grid_search.best_params_

{'n_neighbors': 35}

In [16]:
for i in range(len(parameters['n_neighbors'])):
    print('parameters: ', grid_search.cv_results_['params'][i])
    
    print('Mean Test Score: ', grid_search.cv_results_['mean_test_score'][i])
    
    print('Rank: ', grid_search.cv_results_['rank_test_score'][i])
    print()

parameters:  {'n_neighbors': 10}
Mean Test Score:  0.6614863043469441
Rank:  6

parameters:  {'n_neighbors': 12}
Mean Test Score:  0.6686296089908869
Rank:  5

parameters:  {'n_neighbors': 24}
Mean Test Score:  0.6855948348954879
Rank:  4

parameters:  {'n_neighbors': 38}
Mean Test Score:  0.6940098188895302
Rank:  2

parameters:  {'n_neighbors': 35}
Mean Test Score:  0.694727845709787
Rank:  1

parameters:  {'n_neighbors': 50}
Mean Test Score:  0.6875253424701085
Rank:  3



In [17]:
k_neighbors_model = KNeighborsRegressor(n_neighbors = grid_search.best_params_['n_neighbors']).fit(x_train, y_train)

In [18]:
parameters = {'max_depth' : [1, 2, 3, 4, 5, 7, 8]}

grid_search = GridSearchCV(DecisionTreeRegressor(), parameters, cv = 3, return_train_score = True)
grid_search.fit(x_train, y_train)

grid_search.best_params_

{'max_depth': 2}

In [20]:
parameters = {'epsilon': [0.05, 0.1, 0.2, 0.3],
             'C' : [0.2, 0.3]}

grid_search = GridSearchCV(SVR(), parameters, cv = 3, return_train_score = True)
grid_search.fit(x_train, y_train)

grid_search.best_params_

{'C': 0.3, 'epsilon': 0.05}

In [21]:
svr_model = SVR(kernel= 'linear', epsilon = grid_search.best_params_['epsilon'],
                C = grid_search.best_params_['C']).fit(x_train, y_train)

In [23]:
y_pred = svr_model.predict(x_test)

print('Training Score: ', svr_model.score(x_train,y_train))
print('TestScore: ', r2_score(y_test, y_pred))

Training Score:  0.6714066154522125
TestScore:  0.7315245072019358
