#### Gradient Descent

Jay Urbain
12/10/2018

[Gradient descent](https://en.wikipedia.org/wiki/Gradient_descent) is an optimization algorithm used to find the local minimum of a function. It is commonly used in many different machine learning algorithms. 

Complete the code for the following functions below:   
- dLdw1() # partial derivative of the loss (cost) with respect to w1
- dLdw0() # partial derivative of the loss (cost) with respect to w0
- f() # linear regression prediction function
- cost() # cost function 

In [None]:
# loading necessary libraries and setting up plotting libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# generate plots within notebook
%matplotlib inline

In [None]:
# Read dataset into a Pandas dataframe
# You'll need to change the 'f' path
f = "ex1data1.txt"
df = pd.read_csv(f, header=None, names=["X","Y"])

# verify
print(df.shape)
print(df.head())

# Convert pandas columns for X, Y into numpy arrays for processing
X=df.iloc[:,0].values
Y=df.iloc[:,1].values

print(type(X))
print(X.shape)
print(X.shape[0])
print(X[:5])
print(Y[:5])

#### Hyperparameter settings

In [None]:
# default settings, you need to determine optimal convergence settings
alpha = 0.01;    # learning rate
tol = 1e-11;     # tolerance to determine convergence
maxiter = 1000;  # maximum number of iterations (in case convergence is not reached)
dispiter = 10;   # interval for displaying results during iterations

# track interations
iters = 0;
# parameter initialization
w0 = -0.01;
w1 = 00.01;

In [None]:
# track results for plotting parameter convergence
w0plot = [0.0]*(maxiter+1);
w1plot = [0.0]*(maxiter+1);
tplot  = [0]*(maxiter+1);
cplot  = [0]*(maxiter+1);

w0plot[:5]

#### Generate a scatter plot with labels and title to visualize the data

In [None]:
plt.scatter(X,Y)

#### Generate _nice_ initial scatter plot with labels to visualize the data

In [None]:
# Adding labels
# Create a Figure object.
fig = plt.figure(figsize=(8, 6))
# Create an Axes object.
ax = fig.add_subplot(1,1,1) # one row, one column, first plot
# Plot the data.
ax.scatter(X, Y, color="red", marker="*")
# Add a title.
ax.set_title("Data set")
# Add axis labels.
ax.set_xlabel("X")
ax.set_ylabel("Y")
# Produce an image.
#fig.savefig("scatterplot.png")

#### Functions you need to complete !!!

In [None]:
# linear regression predictive function
def f(x):
    ##################### 
    # fill this in
    return w0 + w1*x
    #####################
    
# partial derivative of the cost(loss) with respect to w1 (slope)
def dLdw1():
    ##################### 
    # fill this in
    return -2/X.size * np.sum( (Y-f(X))*X )
    #####################
    
# partial derivative of the cost(loss) with respect to w0 (bias)
def dLdw0():
    ##################### 
    # fill this in
    return -2/X.size * np.sum( (Y-f(X)) )
    #####################
    
def cost():
    ##################### 
    # fill this in
    return np.sum( (Y-f(X))**2 ) / X.size
    #####################

#### Main loop of program

In [None]:
# main algorithm loop starts here
iters = 0
maxiters = 10
while True:
    # Note: need to implement dLdw1 and dLdw0 (above)
    delta1 = alpha * dLdw1()
    delta0 = alpha * dLdw0()

    # Store data for plotting convergence of parameters
    tplot[iters] = iters
    w0plot[iters] = w0
    w1plot[iters] = w1
    cplot[iters] = cost()
 
    iters+=1
    w1 = w1 - delta1
    w0 = w0 - delta0
 
    # display progress
    if iters % dispiter == 0:
        print(str(iters), ", w0=", str(w0), " delta0=", str(delta0), "w1=", str(w1), ", delta1=", str(delta1))

    if abs(delta1) <= tol or abs(delta0) <= tol or iters > maxiter:
        break

print("\nConvergence after " + str(iters) + " iterations: w0=" + str(w0) + ", w1=" + str(w1));

print(cplot)

#### Generate scatter plot with linear regression fit line

In [None]:
# Adding labels
# Create a Figure object.
fig = plt.figure(figsize=(8, 6))
# Create an Axes object.
ax = fig.add_subplot(1,1,1) # one row, one column, first plot
# Plot the data.
ax.scatter(X, Y, color="blue")
# Add a title.
ax.set_title("Data set with linear regression fit")
# Add axis labels.
ax.set_xlabel("X")
ax.set_ylabel("Y")
# Produce an image.
#fig.savefig("scatterplot.png")
ax.plot(X, w0+w1*X, color="red")

#### Plot convergence of data

In [None]:
# Adding labels
# Create a Figure object.
fig = plt.figure(figsize=(8, 6))
# Create an Axes object.
ax = fig.add_subplot(1,1,1) # one row, one column, first plot
# Plot the data.    
ax.plot(tplot, w0plot, color="blue", label="w0")
ax.plot(tplot, w1plot, color="red", label="w1")
# Add a title.
ax.set_title("Convergence of w0 and w1")
# Add axis labels.
ax.set_xlabel("Iterations")
ax.set_ylabel("Parameters")
ax.legend(loc='right')

#### Plot convergence of cost function

In [None]:
# Adding labels
# Create a Figure object.
fig = plt.figure(figsize=(8, 6))
# Create an Axes object.
ax = fig.add_subplot(1,1,1) # one row, one column, first plot
# Plot the data.    
ax.plot(tplot, cplot, color="blue", label="Cost")
# Add a title.
ax.set_title("Cost function trend")
# Add axis labels.
ax.set_xlabel("Iterations")
ax.set_ylabel("Cost")
ax.legend(loc='right')

Notice how the parameters and the cost function converge at different rates. We could also try terminating our gradient descent algorithm based on the cost function.