### Import all the necessary modules, packages, and libraries to complete this assignment.

In [9]:
%matplotlib inline
%config InlineBackend.print_figure_kwargs={'bbox_inches':None}
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import pandas as pd
from time import time
from math import sqrt
from sklearn import linear_model, model_selection, preprocessing
from sklearn.exceptions import DataConversionWarning
import warnings
warnings.filterwarnings(action='ignore', category=DataConversionWarning)
import tensorflow as tf

# 1 (30pts) Polynomial regression / overfitting / regularization

### Read in first dataset

In [10]:
data = pd.read_csv('../hw1/ex1data2.csv', header=None)

### Initialize learning rates, data, and labels; define cost, derivative, and weight updating function

In [11]:
# Initialize learning rates
learning_rates = [0.1, 0.01, 1e-3]

# Initialize data and labels
X = np.stack([
        pd.Series([1 for _ in range(len(data))]),
        preprocessing.minmax_scale(data[0]),
        preprocessing.minmax_scale(data[1])],
        axis=1)
y = np.asmatrix(data.iloc[:, -1]).reshape(len(data), -1)

# Initialize weights
W_init = np.asmatrix(np.array([0., 0., 0.]).reshape(3, -1))
W = np.copy(W_init)

# Define cost, cost derivative, and weight updating functions
J = lambda: np.dot((np.dot(X, W) - y).T, np.dot(X, W) - y)
dJ_dW = lambda: (1/len(data)) * (np.dot(np.dot(X.T, X), W) - np.dot(X.T, y))

def update_weights(W, alpha):
    W -= alpha*dJ_dW()
    return W

Initial RMSE: 2483064.4494253467
Performing gradient descent with a learning rate of 0.1

	Final RMSE:      438256.04944477754 achieved after 8581 iterations

Performing gradient descent with a learning rate of 0.01

	Final RMSE:     438256.0494456521 achieved after 76933 iterations

Performing gradient descent with a learning rate of 0.001
	Cost: 192648518540.6467
	Cost: 192070539761.879
	Cost: 192068373353.45212
	Cost: 192068364908.2607

	Final RMSE:    438256.0494530507 achieved after 703110 iterations


The best model has:
	RMSE = 438256.04944477754
	learning rate = 0.1
	W = 199467.251253252
	    504777.6341366063
	    -34951.478695117934


In [None]:
print('Initial RMSE: {}'.format(sqrt(float(J()))))

# Train the model using different learning rates
results = []
for alpha in learning_rates:
    
    # Reinitialize weights and regenerate hypothesis
    W = np.copy(W_init)
    print('Performing gradient descent with a learning rate of {}'.format(alpha))

    # Actually train the model
    new_cost, old_cost, start, count = J(), float('inf'), time(), 0
    while new_cost < old_cost and abs(old_cost - new_cost) > 1e-5:
        old_cost = float(new_cost)
        W = update_weights(W, alpha)
        new_cost = float(J())
        if time() - start > 3:
            print('\tCost: {}'.format(new_cost))
            start = time()
        count += 1
            
    print('\n\tFinal RMSE: {:{align}{width}} achieved after {count} iterations\n'.format( 
        sqrt(J()), 
        align='>', 
        width=8-len(str(alpha))+len(str(sqrt(J()))),
        count=count))
    
    # Append tuple of variables pertaining to this training run to a list of results 
    results.append((sqrt(J()), alpha, W))

print('\nThe best model has:\n\tRMSE = {}\n\tlearning rate = {}\n\tW = {}'.format(min(results)[0], min(results)[1], '\n\t    '.join([str(float(r)) for r in min(results)[2]]))) 

In [None]:
# Enable embedded interactive figures
%matplotlib notebook

# Plot data as a scatter plot with the linear model fit to this data
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.set_title('Ex1data2 with Model Fit', formatting)
ax.set_xlabel('House Size (square feet)', formatting)
ax.set_ylabel('Bedrooms', formatting)
ax.set_zlabel('House Price', formatting)
ax.set_xlim(min(data[0]) - 1, max(data[0]) + 1)
ax.set_ylim(min(data[1]) - 1, max(data[1]) + 1)
ax.set_zlim(min(data[2]) - 1, max(data[2]) + 1)
ax.grid(True)
ax.scatter(data[0], data[1], data[2])

W0, W1, W2 = min(results)[-3:]
model = lambda x, y: W0 + W1*x + W2*y
Z_hat = pd.Series(list(map(model, X, Y)))
X_grid, Y_grid = np.meshgrid(data[0], data[1])
ax.plot_surface(X_grid, Y_grid, Z_hat.values.reshape(-1, len(Z_hat)), 
                cmap=plt.cm.cool, rstride=1, cstride=1, linewidth=0)
ax.view_init(10, 255)