## Set up env and modules

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/MyDrive/Notebooks/Course Supervised ML/

In [2]:
import copy
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('./deeplearning.mplstyle')
np.set_printoptions(precision=3, suppress=True)

In [16]:
from functions import *
from lab_utils_multi import run_gradient_descent, plot_cost_i_w

## Data

In [34]:
dataset = pd.read_excel("./data/Foltz Regression House Price.xlsx")
dataset.head(2)

  warn(msg)


Unnamed: 0,price_1000s,sqft,exempHS,beds,bath
0,89,1097,0,1.0,2
1,95,1505,0,1.0,2


In [5]:
X_train = dataset.iloc[:, 1:]
y_train = dataset.iloc[:, 0]

## Graphs

In [None]:
graph_scatter_hue_df(X_train, y_train, 0, 1)


In [None]:
graph_scatter_df(X_train, y_train)

## Functions description

**Gradient descent**
1. compute_cost($X, y, w, b$): return total_cost
2. compute_gradient($X, y, w, b$): return dj_dw, dj_db
3. gradient_descent(X, y, w_in, b_in, alpha, num_iter): return w, b, J_hist

**Features scaling**
1. zscore_normalization(X):
   return X_norm, mu, sigma

**Graphs**
1. graph_scatter_df(X_features, X, y)
2. graph_normalization(X, X_norm, mu, sigma, feature_x, feature_y)

## Normalization + Model calculation for normilized data

In [9]:
X_train = X_train.to_numpy()
y_train = y_train.to_numpy()

In [None]:
X_norm, mu, sigma = zscore_normalization(X_train)

In [None]:
X_norm[:4, ]

In [12]:
w_in = np.ones(X_train.shape[1])
b_in = 50.0
alpha = 0.03
num_iter = 200

In [None]:
w_norm, b_norm, J_hist = gradient_descent(X_norm, y_train, w_in, b_in, alpha, num_iter)

In [None]:
w_norm, b_norm

(array([40.523, 14.344, 24.887,  4.172]), 206.94430678912764)

### Graphs to check gradient descent work

In [None]:
# plot cost versus iteration
fig, (ax1, ax2) = plt.subplots(1, 2, constrained_layout=True, figsize=(12, 4))
ax1.plot(J_hist)
ax2.plot(100 + np.arange(len(J_hist[100:])), J_hist[100:])
ax1.set_title("Cost vs. iteration");  ax2.set_title("Cost vs. iteration (tail)")
ax1.set_ylabel('Cost')             ;  ax2.set_ylabel('Cost')
ax1.set_xlabel('iteration step')   ;  ax2.set_xlabel('iteration step')
plt.show()

In [None]:
_, _, hist = run_gradient_descent(X_norm, y_train, 300, alpha = 0.03)

In [None]:
plot_cost_i_w(X_train, y_train, hist)

### Prediction using normalization parameters from training set for training data

In [None]:
#predict target using normalized features
X_features = ['sqft', 'exempHS', 'beds', 'bath']
m = X_norm.shape[0]
yp = np.zeros(m)
for i in range(m):
    yp[i] = np.dot(X_norm[i], w_norm) + b_norm

    # plot predictions and targets versus original features
fig, ax=plt.subplots(1, 4,figsize=(12, 4),sharey=True)
for i in range(len(ax)):
    ax[i].scatter(X_train[:, i], y_train, label = 'target')
    ax[i].set_xlabel(X_features[i])
    ax[i].scatter(X_train[:, i], yp, color="orange", label = 'predict')
ax[0].set_ylabel("Price"); ax[0].legend();
fig.suptitle("target versus prediction using z-score normalized model")
plt.show()

### Prediction using normalization parameters from training set for new data
 you must normalize the data with the mean and standard deviation derived when the training data was normalized.

In [None]:
# First, normalize out example.
x_house = np.array([1200, 3, 1, 40])
x_house_norm = (x_house - mu) / sigma
print(x_house_norm)

x_house_predict = np.dot(x_house_norm, w_norm) + b_norm
print(f"\npredicted price of a house with 1200 sqft, 3 bedrooms, 1 floor, 40 years old = ${x_house_predict*1000:0.0f}\n")

## Compare with non-normilized data
(coefficients taken from OLS normal equation)

In [17]:
w = np.array([0.073, 28.309, 35.133, 5.557])
b = -75.674

In [None]:
#predict target using normalized features
X_features = ['sqft', 'exempHS', 'beds', 'bath']
m = X_train.shape[0]
yo = np.zeros(m)
for i in range(m):
    yo[i] = np.dot(X_train[i], w) + b

    # plot predictions and targets versus original features
fig, ax=plt.subplots(1, 4,figsize=(12, 4),sharey=True)
for i in range(len(ax)):
    ax[i].scatter(X_train[:, i], yo, label = 'pred by OLS')
    ax[i].set_xlabel(X_features[i])
    ax[i].scatter(X_train[:, i], yp, color="orange", label = 'pred by GD')
ax[0].set_ylabel("Price"); ax[0].legend();
fig.suptitle("target versus prediction using z-score normalized model")
plt.show()

## Gradient descent Linear Regression using Scikit-Learn
(with normalization)

In [None]:
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler

In [None]:
X_train = X_train.to_numpy()
y_train = y_train.to_numpy()

In [None]:
scaler = StandardScaler()
X_norm = scaler.fit_transform(X_train)
print(f"Peak to Peak range by column in Raw        X:{np.ptp(X_train,axis=0)}")
print(f"Peak to Peak range by column in Normalized X:{np.ptp(X_norm,axis=0)}")

In [None]:
sgdr = SGDRegressor(max_iter=1000)
sgdr.fit(X_norm, y_train)

b_norm = sgdr.intercept_
w_norm = sgdr.coef_

print(f"number of iterations completed: {sgdr.n_iter_}, number of weight updates: {sgdr.t_}\n")
print(f"model parameters: w: {w_norm}, b:{b_norm}")

## Closed form Linear Regression using Scikit-Learn

In [19]:
from sklearn.linear_model import LinearRegression

In [None]:
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

In [None]:
b = linear_model.intercept_
w = linear_model.coef_
print(f"w = {w:}, b = {b:0.2f}")


## Features engineering
