# Linear Regression Using Scikit-Learn library

In [11]:
#use of open source libraray Scikit-Learn to implement for ML models

#use scikit-learn to run linear regression using gradient descent

import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
from lab_utils_multi import load_house_data
from lab_utils_common import dlc
np.set_printoptions(precision = 2)
plt.style.use('./deeplearning.mplstyle')

In [12]:
#Gradient Descent 

#gradient descent regression model sklearn.linear_model.SGDRegressor

#z score normalization: sklearn.preprocessing.StandardScaler

X_train, y_train = load_house_data()
X_features = ['size(sqft)', 'bedrroms', 'floors', 'age']


In [13]:
#Scale normalize the training data

scaler = StandardScaler()
X_norm = scaler.fit_transform(X_train)

print(f"Peak tot Peak range by column in Raw. X:{np.ptp(X_train, axis = 0)}")
print(f"Peak tot Peak range by column in Raw. X:{np.ptp(X_norm, axis = 0)}")

Peak tot Peak range by column in Raw. X:[2.41e+03 4.00e+00 1.00e+00 9.50e+01]
Peak tot Peak range by column in Raw. X:[5.85 6.14 2.06 3.69]


In [17]:
#create and fit the regression model
sgdr = SGDRegressor( max_iter= 130)
sgdr.fit(X_norm, y_train)
print(sgdr)

print(f"number of iterations completed: {sgdr.n_iter_}, number of weight updates: {sgdr.t_}")


SGDRegressor(max_iter=130)
number of iterations completed: 124, number of weight updates: 12277.0


In [18]:
#view parameters

b_norm = sgdr.intercept_
w_norm = sgdr.coef_

print(f"model parameters: w: {w_norm}, b: {b_norm}")
print("Model parameters from previous lab: w:[110.56, -21.27, -32.71, -37.97], b: 363.16")

model parameters: w: [110.1  -21.06 -32.47 -38.05], b: [363.16]
Model parameters from previous lab: w:[110.56, -21.27, -32.71, -37.97], b: 363.16


In [19]:
#Make predictions using sgdr.predict() method

y_pred_sgd = sgdr.predict(X_norm)

#make predictions using w and b parameters
y_pred = np.dot(X_norm, w_norm) + b_norm

print(f"Prediction using np.dot() and sgdr.predict() matches: {(y_pred == y_pred_sgd).all()}")

print(f"Prediction on the training data set: {y_pred[:4]}")
print(f"Target values {y_train[:4]}")

Prediction using np.dot() and sgdr.predict() matches: True
Prediction on the training data set: [295.19 485.85 389.57 492.01]
Target values [300.  509.8 394.  540. ]


In [None]:
#plot the sgdr results 

fig, ax = plt.subplots(1,4, figsize = (12,4), sharey = True)

for i in range(len(ax)):
    ax[i].scatter(X_train[:,i], y_train, label = 'target')
    ax[i].set_xlabel(X_features[i])
    ax[i].scatter(X_train[:,i], y_pred, c = 'r', label = 'predict')
    
ax[0].set_ylabel("Price")
ax[0].legend()
