In [45]:
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split


from sklearn.linear_model import Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor

from sklearn.model_selection import GridSearchCV

### Read data

In [46]:
auto_df = pd.read_csv("./auto-mpg-processed.csv")
auto_df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,age
0,18.0,8,307.0,130,3504,12.0,49
1,15.0,8,350.0,165,3693,11.5,49
2,18.0,8,318.0,150,3436,11.0,49
3,16.0,8,304.0,150,3433,12.0,49
4,17.0,8,302.0,140,3449,10.5,49


### Compare results array

In [47]:
compare_results = []

### Split data

In [48]:
X = auto_df.drop(["mpg", "age"], axis=1)
Y= auto_df["mpg"]

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

## Lasso regression

### Tune alpha param

In [49]:
params = {"alpha":[0.2, 0.4, 0.6, 0.8, 0.9, 1.0]}
gridsearchA = GridSearchCV(Lasso(), params, cv=3, return_train_score=True)
gridsearchA.fit(x_train, y_train)
gridsearchA.best_params_

{'alpha': 1.0}

### results of each alpha value and their rank

In [50]:
for i in range(len(params["alpha"])):
    print("Param", gridsearchA.cv_results_["params"][i])
    print("Rank", gridsearchA.cv_results_["rank_test_score"][i])
    print("Test result", gridsearchA.cv_results_["mean_test_score"][i])

Param {'alpha': 0.2}
Rank 6
Test result 0.683468895940368
Param {'alpha': 0.4}
Rank 5
Test result 0.6876546425753021
Param {'alpha': 0.6}
Rank 4
Test result 0.6886645801362363
Param {'alpha': 0.8}
Rank 3
Test result 0.6889861145665007
Param {'alpha': 0.9}
Rank 2
Test result 0.6891428001624516
Param {'alpha': 1.0}
Rank 1
Test result 0.6892983085822174


### Train model

In [51]:
lasso_regression = Lasso(alpha = gridsearchA.best_params_["alpha"])
lasso_regression.fit(x_train, y_train)

y_pred = lasso_regression.predict(x_test)

result = {
    "lasso_train_score": lasso_regression.score(x_train, y_train),
    "lasso_test_score": r2_score(y_test, y_pred)
}

compare_results.append(result)

## K-NN model

### Tune n_neighbors param

In [52]:
params = {"n_neighbors":[5, 10, 12, 15, 17, 20, 25, 30, 40]}

gridSearchK = GridSearchCV(KNeighborsRegressor(), params, cv=3, return_train_score=True)
gridSearchK.fit(x_train, y_train)
gridSearchK.best_params_



{'n_neighbors': 40}

### Train model

In [53]:
knn_regression = KNeighborsRegressor(n_neighbors=gridSearchK.best_params_["n_neighbors"])
knn_regression.fit(x_train, y_train)

y_pred = knn_regression.predict(x_test)

result = {
    "knn_train_score": knn_regression.score(x_train, y_train),
    "knn_test_score": r2_score(y_test, y_pred)
}

compare_results.append(result)

## Decision tree model

### Tune depth param

In [54]:
params = {"max_depth":[1, 2, 3, 4, 5, 6]}
gridsearchD = GridSearchCV(DecisionTreeRegressor(), params, cv=3, return_train_score=True)
gridsearchD.fit(x_train, y_train)

gridsearchD.best_params_



{'max_depth': 2}

### Train model

In [55]:
d_tree = DecisionTreeRegressor(max_depth=gridsearchD.best_params_["max_depth"])
d_tree.fit(x_train, y_train)

y_pred = d_tree.predict(x_test)

result = {
    "decision_tree_train_score": d_tree.score(x_train, y_train),
    "decision_tree_test_score": r2_score(y_test, y_pred)
}

compare_results.append(result)

In [56]:
compare_results

[{'lasso_train_score': 0.7067193209777726,
  'lasso_test_score': 0.7017008167974965},
 {'knn_train_score': 0.7305230220980674, 'knn_test_score': 0.7028050471122513},
 {'decision_tree_train_score': 0.7325588560140855,
  'decision_tree_test_score': 0.7018519810488555}]