In [1]:
import warnings
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt

from sklearn import datasets
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, SGDRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# Load data
boston = datasets.load_boston()
print(boston.data.shape, boston.target.shape)
print(boston.feature_names)

(506, 13) (506,)
['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
 'B' 'LSTAT']


In [3]:
df = pd.DataFrame(boston.data,columns=boston.feature_names)
df = pd.concat([df,pd.Series(boston.target,name='MEDV')],axis=1)

In [5]:
import numpy as np

# It contains all attributes (all features and the target)
allData = df


# Use the following code to select ALL features
X = df.drop(columns='MEDV')  # Data Matrix containing all features excluding the target


y = df['MEDV'] # 1D targer vector

print(X.shape)
print(y.shape)

(506, 13)
(506,)


In [6]:

scaler = StandardScaler().fit(X)
X = scaler.transform(X)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# SGD Regression

# Create SGDRegressor linear regression object
lin_reg_sgd = SGDRegressor(max_iter=1000, eta0=0.01, penalty="elasticnet", l1_ratio=0.0, alpha=0.01)

# Train the model
lin_reg_sgd.fit(X_train, y_train)



# The intercept
print("Intercept: \n", lin_reg_sgd.intercept_)

# The coefficients
print("Coefficients: \n", lin_reg_sgd.coef_)

# The number of iterations
print("Number of Iterations: \n", lin_reg_sgd.n_iter_)


print("\n----------------------------- Model Evaluation -----------------------------")

# Make prediction 
y_train_predicted_sgd = lin_reg_sgd.predict(X_train)
y_test_predicted_sgd = lin_reg_sgd.predict(X_test)


print("Training: Mean squared error: %.2f"
      % mean_squared_error(y_train, y_train_predicted_sgd))

print("Test: Mean squared error: %.2f"
      % mean_squared_error(y_test, y_test_predicted_sgd))


# Explained variance score: 1 is perfect prediction
print("\nTraining: Coefficient of determination r^2 variance score [1 is perfect prediction]: %.2f" % 
      r2_score(y_train, y_train_predicted_sgd))




# Explained variance score: 1 is perfect prediction
print("Test: Coefficient of determination r^2 variance score [1 is perfect prediction]: %.2f" % 
      r2_score(y_test, y_test_predicted_sgd))

Intercept: 
 [22.49135076]
Coefficients: 
 [-0.88756887  0.51901873  0.03984921  0.74787209 -1.78124908  3.16828234
 -0.18834981 -2.84261664  1.52460556 -1.02564533 -1.92379304  1.1165961
 -3.54860949]
Number of Iterations: 
 37

----------------------------- Model Evaluation -----------------------------
Training: Mean squared error: 21.74
Test: Mean squared error: 24.74

Training: Coefficient of determination r^2 variance score [1 is perfect prediction]: 0.75
Test: Coefficient of determination r^2 variance score [1 is perfect prediction]: 0.66


In [9]:
%%time

warnings.filterwarnings('ignore')

# The param_grid tells Scikit-Learn to evaluate all combinations of the hyperparameter values
param_grid = {'alpha': [0.1, 0.01, 0.001], 'learning_rate': ["constant", "optimal", "invscaling"], 
              'l1_ratio': [1, 0.5, 0.2, 0], 'max_iter':[100, 400, 1000, 10000],'eta0': [0.01, 0.001]}



sgd = SGDRegressor()

sgd_cv = GridSearchCV(sgd, param_grid, scoring='neg_mean_squared_error', cv=10, verbose=2, n_jobs=-1)
sgd_cv.fit(X_train, y_train)


params_optimal_sgd = sgd_cv.best_params_

print("Best Score (negative mean squared error): %f" % sgd_cv.best_score_)
print("Optimal Hyperparameter Values: ", params_optimal_sgd)
print("\n")

Fitting 10 folds for each of 288 candidates, totalling 2880 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed:    2.3s
[Parallel(n_jobs=-1)]: Done 876 tasks      | elapsed:   16.5s


Best Score (negative mean squared error): -23.754557
Optimal Hyperparameter Values:  {'alpha': 0.01, 'eta0': 0.01, 'l1_ratio': 0.5, 'learning_rate': 'constant', 'max_iter': 1000}


CPU times: user 2.72 s, sys: 155 ms, total: 2.88 s
Wall time: 25.1 s


[Parallel(n_jobs=-1)]: Done 2880 out of 2880 | elapsed:   25.1s finished


In [14]:

# SGD Regression

# Create SGDRegressor linear regression object using the optimal hyperparameter values
lin_reg_sgd = SGDRegressor(**params_optimal_sgd)

# Train the model
lin_reg_sgd.fit(X_train, y_train)


# The intercept
print("Intercept: \n", lin_reg_sgd.intercept_)

# The coefficients
print("Coefficients: \n", lin_reg_sgd.coef_)

# The number of iterations
print("Number of Iterations: \n", lin_reg_sgd.n_iter_)


print("\n----------------------------- Model Evaluation -----------------------------")

# Make prediction 
y_train_predicted_sgd = lin_reg_sgd.predict(X_train)


print("Mean squared error: %.2f"
      % mean_squared_error(y_train, y_train_predicted_sgd))


# Explained variance score: 1 is perfect prediction
print("Coefficient of determination r^2 variance score [1 is perfect prediction]: %.2f" % r2_score(y_train, y_train_predicted_sgd))

Intercept: 
 [22.74936386]
Coefficients: 
 [-0.78092811  0.74963022  0.19684641  0.94408979 -2.22259309  3.03928575
 -0.35755978 -2.83423286  2.22378395 -1.32822218 -1.9248228   1.09453204
 -3.91091028]
Number of Iterations: 
 20

----------------------------- Model Evaluation -----------------------------
Mean squared error: 22.14
Coefficient of determination r^2 variance score [1 is perfect prediction]: 0.75


In [15]:
# Scoring Parameter for Regression:
# https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter

scores = cross_val_score(lin_reg_sgd, X_train, y_train, scoring='neg_mean_squared_error', cv=10)
print(scores)

print("Negative Mean Squared Error: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

[-13.58023515 -20.25968689 -29.89696913 -47.70833555 -25.24162297
 -29.66762613 -20.132711   -22.37341366 -14.15770168 -35.65793057]
Negative Mean Squared Error: -25.87 (+/- 19.68)


In [16]:
# Make prediction using the test data
y_test_predicted = lin_reg_sgd.predict(X_test)


test_mse_linear = mean_squared_error(y_test, y_test_predicted)

print("Mean squared error: %.2f"
      % test_mse_linear)


# Explained variance score: 1 is perfect prediction
test_r2_linear = r2_score(y_test, y_test_predicted)
print("Coefficient of determination r^2 variance score [1 is perfect prediction]: %.2f" 
      % test_r2_linear)

Mean squared error: 24.00
Coefficient of determination r^2 variance score [1 is perfect prediction]: 0.67


In [17]:
# Variable that speficies the degree of the polynomial to be added to the feature vector
poly_degree = 2


# Add polynomial terms with the feature vector using the sklearn PolynomialFeatures class
poly_features = PolynomialFeatures(degree=poly_degree, include_bias=False)
X_train_poly = poly_features.fit_transform(X_train)


print("No. of Original Features: ", X_train.shape[1])
print("No. of Augmented Features: ", X_train_poly.shape[1])

No. of Original Features:  13
No. of Augmented Features:  104


In [18]:
%%time

warnings.filterwarnings('ignore')

# The param_grid tells Scikit-Learn to evaluate all combinations of the hyperparameter values
param_grid = {'alpha': [0.1, 0.01], 'learning_rate': ["invscaling"], 
              'l1_ratio': [1, 0.5, 0.2, 0], 'max_iter':[100, 400, 1000, 5000],'eta0': [0.01, 0.001, 0.0001]}



sgd = SGDRegressor()

sgd_cv = GridSearchCV(sgd, param_grid, scoring='neg_mean_squared_error', cv=10, verbose=2, n_jobs=-1)
sgd_cv.fit(X_train_poly, y_train)


params_optimal_sgd = sgd_cv.best_params_

print("Best Score (negative mean squared error): %f" % sgd_cv.best_score_)
print("Optimal Hyperparameter Values: ", params_optimal_sgd)
print("\n")

Fitting 10 folds for each of 96 candidates, totalling 960 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 136 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 836 tasks      | elapsed:   29.2s


Best Score (negative mean squared error): -14.925211
Optimal Hyperparameter Values:  {'alpha': 0.1, 'eta0': 0.001, 'l1_ratio': 0.5, 'learning_rate': 'invscaling', 'max_iter': 1000}


CPU times: user 1.25 s, sys: 66.4 ms, total: 1.32 s
Wall time: 38.5 s


[Parallel(n_jobs=-1)]: Done 960 out of 960 | elapsed:   38.3s finished


In [19]:
# SGD Regression

# Create SGDRegressor linear regression object using the optimal hyperparameter values
lin_reg_sgd = SGDRegressor(**params_optimal_sgd)

# Train the model
lin_reg_sgd.fit(X_train_poly, y_train)


# # The intercept
# print("Intercept: \n", lin_reg_sgd.intercept_)

# # The coefficients
# print("Coefficients: \n", lin_reg_sgd.coef_)

# The number of iterations
print("Number of Iterations: \n", lin_reg_sgd.n_iter_)


print("\n----------------------------- Model Evaluation -----------------------------")

# Make prediction 
y_train_predicted_sgd = lin_reg_sgd.predict(X_train_poly)


print("Mean squared error: %.2f"
      % mean_squared_error(y_train, y_train_predicted_sgd))


# Explained variance score: 1 is perfect prediction
print("Coefficient of determination r^2 variance score [1 is perfect prediction]: %.2f" % r2_score(y_train, y_train_predicted_sgd))

Number of Iterations: 
 783

----------------------------- Model Evaluation -----------------------------
Mean squared error: 8.85
Coefficient of determination r^2 variance score [1 is perfect prediction]: 0.90


In [20]:
poly_features = PolynomialFeatures(degree=poly_degree, include_bias=False)
X_test_poly = poly_features.fit_transform(X_test)

# Make prediction using the test data
y_test_predicted = lin_reg_sgd.predict(X_test_poly)

test_mse_polynomial = mean_squared_error(y_test, y_test_predicted)

print("Mean squared error: %.2f"
      % test_mse_polynomial)



# Explained variance score: 1 is perfect prediction

test_r2_polynomial = r2_score(y_test, y_test_predicted)
print("Coefficient of determination r^2 variance score [1 is perfect prediction]: %.2f" 
      % test_r2_polynomial)

Mean squared error: 14.54
Coefficient of determination r^2 variance score [1 is perfect prediction]: 0.80


In [21]:
data = [["MSE (test)", test_mse_linear, test_mse_polynomial], 
        ["R2 Score (test)", test_r2_linear, test_r2_polynomial]]
pd.DataFrame(data, columns=["Metric", "SGD Linear Regression", "SGD Polynomial Regression"])

Unnamed: 0,Metric,SGD Linear Regression,SGD Polynomial Regression
0,MSE (test),24.002255,14.544969
1,R2 Score (test),0.672699,0.801661
