# Lecture 3 Regressions


In [1]:
%matplotlib inline 
# import necessary libraries and specify that graphs should be plotted inline
import numpy as np
import pandas as pd
import sklearn
import mglearn
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

## Advanced Regression


In [2]:
from sklearn.model_selection import train_test_split
X, y = mglearn.datasets.make_wave(n_samples=60)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

### Polynomial Regression



In [3]:
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing  import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression

poly = PolynomialFeatures()
#define a list of parameters
param_poly = {'polynomialfeatures__degree':range(1,5)}

#apply polynomial regression in pipeline
pipe_poly = make_pipeline(PolynomialFeatures(), LinearRegression())
grid_poly = GridSearchCV(pipe_poly, param_poly,cv=5, n_jobs=-1, return_train_score = True)


grid_poly.fit(X_train, y_train)

grid_poly_train_score = grid_poly.score(X_train, y_train)
grid_poly_test_score = grid_poly.score(X_test, y_test)

print('train score: ', grid_poly_train_score)
print('test score: ', grid_poly_test_score)

#find best parameters
print('Best parameters: ', grid_poly.best_params_)

train score:  0.7366477110211134
test score:  0.7475874000113646
Best parameters:  {'polynomialfeatures__degree': 4}


In [4]:
#predictions using polynomial regression
y_poly_train_predict = grid_poly.predict(X_train)
y_poly_predict = grid_poly.predict(X_test)

#MSE and MAE
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae

print('Training set MSE score', mse(y_poly_train_predict,y_train))
print('Test set MSE score',mse(y_poly_predict,y_test) )

print('Training set MAE score',mae(y_poly_train_predict,y_train))
print('Test set MAE score', mae(y_poly_predict,y_test))

Training set MSE score 0.20428901086840767
Test set MSE score 0.2924564533054176
Training set MAE score 0.36374762075965594
Test set MAE score 0.42485671006472187


### Ridge Regression

In [5]:
from sklearn.linear_model import Ridge

X, y = mglearn.datasets.load_extended_boston()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

ridge = Ridge().fit(X_train, y_train)
print("Training set score: {:.2f}".format(ridge.score(X_train, y_train)))
print("Test set score: {:.2f}".format(ridge.score(X_test, y_test)))

Training set score: 0.89
Test set score: 0.75


In [6]:
ridge = Ridge()

#define a list of parameters
param_ridge = {'alpha':[0.001, 0.01, 0.1, 1, 10, 100] }

grid_ridge = GridSearchCV(ridge, param_ridge, cv=5, return_train_score = True)
grid_ridge.fit(X_train, y_train)

grid_ridge_train_score = grid_ridge.score(X_train, y_train)
grid_ridge_test_score = grid_ridge.score(X_test, y_test)

print('Training set score: ', grid_ridge_train_score)
print('Test set score: ', grid_ridge_test_score)

#find best parameters
print('best parameters:',grid_ridge.best_params_)
print('Best cross-validation score:', grid_ridge.best_score_)

Training set score:  0.928227368500198
Test set score:  0.772206793648016
best parameters: {'alpha': 0.1}
Best cross-validation score: 0.848452166752413


### Lasso

In [7]:
from sklearn.linear_model import Lasso

lasso = Lasso().fit(X_train, y_train)
print("Training set score: {:.2f}".format(lasso.score(X_train, y_train)))
print("Test set score: {:.2f}".format(lasso.score(X_test, y_test)))

Training set score: 0.29
Test set score: 0.21


In [8]:
lasso = Lasso()

#define a list of parameters
param_lasso = {'alpha':[0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10] }

grid_lasso = GridSearchCV(lasso, param_lasso, cv=5, return_train_score = True)
grid_lasso.fit(X_train, y_train)

grid_lasso_train_score = grid_lasso.score(X_train, y_train)
grid_lasso_test_score = grid_lasso.score(X_test, y_test)

print('Training set score: ', grid_lasso_train_score)
print('Test score: ', grid_lasso_test_score)

#find best parameters
print('Best parameters: ', grid_lasso.best_params_)
print('Best cross-validation score:', grid_lasso.best_score_)

Training set score:  0.9354593864534091
Test score:  0.7547974360305012
Best parameters:  {'alpha': 0.001}
Best cross-validation score: 0.8248531922293164
