# DATA1030 Gradient Descent Interactive Plot

In [None]:
# load libraries and packages
import pandas as pd
import numpy as np
from sklearn.datasets import make_regression
import plotly
from plotly.subplots import make_subplots
import plotly.graph_objs as go
import ipywidgets
from ipywidgets import interact

## Generate Regression Data

In [None]:
# fix the seed so code is reproducable
np.random.seed(1)

# generate n_samples points
n_samples = 30

# generate data
X, y, coef = make_regression(n_samples = n_samples, n_features = 1, coef = True, noise= 10, bias=0)
print("coefficent:", coef) # the coefficients of the underlying linear model, the bias is 0.
print("added noise:", np.dot(X,coef)[:,0]) # noise is added to the label
print("y:", y)
df = pd.DataFrame()
df['x1'] = X[:,0]
df['y'] = y
print(df.head())

# set intercept and slope ranges to explore
n_vals = 101
theta0 = np.linspace(-100,100,n_vals) # the intercept values to explore
theta1 = np.linspace(-100,100,n_vals) # the slope values to explore

coefficent: 28.777533858634875
added noise: [  9.18115839 -30.87739085  24.90429735  -4.96205858  32.94232532
  14.46054785 -66.23260778 -19.67600162 -11.0521372  -25.26260034
 -59.28576902 -31.67310564 -31.65215819  32.62708851  50.21137962
  -3.53647763 -26.92913658  46.7446537   42.07586066  25.94555749
 -21.90565736  15.2623224  -15.19948048  -7.70915828  16.77198455
   1.21480753  25.92441258  -7.17626442 -17.60484091  -9.27837201]
y: [  3.08130585 -48.2639338   25.93592732   1.87178372  45.4428258
   6.88380644 -72.44927625 -16.20740827 -12.15967691 -25.0337406
 -66.76530644 -35.83422144 -27.17695128  53.04737598  52.96856006
  10.13384942 -20.19197583  36.16208808  58.99412194  38.42855536
 -32.81240623   6.77988325 -15.82439642 -20.62478531  18.61129032
   9.78715027  31.80735422  -4.11214063 -11.04200012 -15.08634424]
         x1          y
0  0.319039   3.081306
1 -1.072969 -48.263934
2  0.865408  25.935927
3 -0.172428   1.871784
4  1.144724  45.442826


## Prediction and Cost Functions

In [None]:
def predict(X,theta):
    if len(np.shape(theta)) != 2:
        theta = np.array(theta)[np.newaxis,:] # just a numpy trick to make the dot product work
    y_pred = theta[0,0] + X.dot(theta[0,1:]) # intercept + theta_i*x_i    
    return y_pred

def cost_function(X,y_true,theta):
    """
    Take in a numpy array X,y_true, theta and generate the cost function
    of using theta as parameter in a linear regression model
    """
    m = len(y)
    theta = np.array(theta)[np.newaxis,:] # just a numpy trick to make the dot product work in predict
    y_pred = predict(X,theta)
    cost = (1/m) * np.sum(np.square(y_true-y_pred)) # this is MSE
    return cost

## Calculate Cost Function

In [None]:
# set cost function's value for each theta
cost = np.zeros([len(theta0),len(theta1)]) 

# loop through all intercept-slope combinations and calculate the cost function
for i in range(n_vals):
    for j in range(n_vals):
        theta = [theta0[i],theta1[j]]
        cost[i,j] = cost_function(df['x1'].values[:,np.newaxis],df['y'],theta)
        
# print grid search best thetas  
print('min(cost):',np.min(cost))
min_coords = np.unravel_index(cost.argmin(),np.shape(cost))
print('best intercept:',theta0[min_coords[0]])
print('best slope:',theta1[min_coords[1]])

min(cost): 71.43643291686587
best intercept: 2.0
best slope: 32.0


## Gradient Descent Function

(For illustration purposes, slower than sklearn implementation.)

In [None]:
def gradient_descent(X,y_true,theta,learning_rate=0.01,iterations=100):
    '''
    X    = Matrix of X with added bias units
    y    = Vector of Y
    theta=Vector of thetas np.random.randn(j,1)
    learning_rate 
    iterations = no of iterations
    
    Returns the final theta vector and array of cost history over no of iterations
    '''
    m = len(y_true)
    theta = np.array(theta)[np.newaxis,:]

    cost_history = np.zeros(iterations)
    theta_history = np.zeros([iterations,np.shape(theta)[1]])
    for it in range(iterations):
        
        y_pred = predict(X,theta)
        delta_theta = np.zeros(np.shape(theta)) # the step we take 
        # the derivative of the cost function with respect to the intercept
        delta_theta[0,0] = (1/m) * sum(y_pred - y_true) *learning_rate
        # the derivative of the cost function with respect to the slopes * learning_rate
        delta_theta[0,1:] = (1/m)*learning_rate*( X.T.dot((y_pred - y_true)))
        theta = theta - delta_theta # update theta so we move down the gradient
        theta_history[it] = theta[0]
        cost_history[it] = cost_function(X,y_true,theta[0])
        
    return theta[0], cost_history, theta_history

## Function to generate gradient descent path data

In [None]:
def generate_grad_desc_data(X,y,theta,learning_rates,iterations=100):
    """Returns a dictionary of theta history, a dictionary of cost history, and final theta
    for given input data, vector of initial thetas, and vector of learning rates"""
    
    # create dictionary for theta histories
    theta_hist_dict = {}
    
    # create dictionary for cost histories
    cost_hist_dict = {}
    
    # create dictionary for final theta
    theta_dict = {}
    
    # create df of input data
    df = pd.DataFrame()
    df['x1'] = X[:,0]
    df['y'] = y
    
    # calculate theta and cost history for each learning rate
    for lr in learning_rates:
        # calculate cost and theta histories
        theta, cost_history, theta_hist = gradient_descent(df['x1'].values[:,np.newaxis],df['y'],[100,-100],lr,iterations)
        
        # add theta history to dict 
        theta_hist_dict[lr] = theta_hist
        
        # add cost history to dict 
        cost_hist_dict[lr] = cost_history
        
        # add theta to dict 
        theta_dict[lr] = theta
        
    return theta_hist_dict, cost_hist_dict, theta_dict


## Generate Data

In [None]:
# set theta range
theta = [100,-100]
# set learning rates
learning_rates = np.arange(0.1,2.1, 0.1).round(2) # round to remove floating point rounding error
# generate gradient descent data
theta_hist_dict, cost_hist_dict, theta_dict = generate_grad_desc_data(X,y,theta,learning_rates,iterations=100)

## Interactive Gradient Descent Plots

### Double Plot - Cost History and Gradient Descent Path

In [None]:
# Double plot code

# set plot range 
x_min, x_max = np.min(theta0), np.max(theta0)
y_min, y_max = np.min(theta1), np.max(theta1)

# create figure widget 1 
fig = go.FigureWidget( make_subplots(rows=1, cols=2, 
                                     shared_xaxes=False, 
                                     subplot_titles=("Cost History", "Gradient Descent Path"),
                                     horizontal_spacing = 0.1))

# add axis labels
fig['layout']['xaxis']['title']='nr. of iterations'
fig['layout']['xaxis2']['title']='intercept'
fig['layout']['yaxis']['title']='cost function'
fig['layout']['yaxis2']['title']='slope'


# add contour plot
fig.add_contour(row=1, col=2)
# set plot size
fig.update_layout(
    autosize=False,
    width=1000,
    height=500)
# add first scatter plot 
fig.add_scatter(row=1, col=2)

# add second scatter plot (path end point)
fig.add_scatter(row=1, col=2)

# add third scatter plot (cost history)
fig.add_scatter(row=1, col=1)

# create slider for learning rate
slider = ipywidgets.SelectionSlider(options=learning_rates,description="learning rate", orientation='horizontal')

@interact(x=slider)
# create interactive plot with slider that updates learning rate
def update(x=learning_rates[0]):
    with fig.batch_update():
        # set contour plot colorbar title
        fig.data[0].colorbar={"title": 'cost function'}
        
        # set contour plot data and axes
        fig.data[0].x=theta0
        fig.data[0].y=theta1
        fig.data[0].z=cost.T
        
        # set grad descent path scatterplot point color and shape
        fig.data[1].marker.color="red"
        fig.data[1].marker.size=8
        fig.data[1].marker.symbol=1
        fig.data[1].marker.line.width=1
        fig.data[1].showlegend= False
        # set grad desc path data
        fig.data[1].x=theta_hist_dict[x][::10,0]
        fig.data[1].y=theta_hist_dict[x][::10,1]
        
        # set grad descent path end point point color and shape
        fig.data[2].marker.color="white"
        fig.data[2].marker.size=8
        fig.data[2].marker.symbol=1
        fig.data[2].marker.line.width=1
        fig.data[2].showlegend= False
        fig.data[2].mode='lines+markers+text'
        fig.data[2].text='end'
        fig.data[2].textposition='top right'
        fig.data[2].textfont=dict(color='white')
        
        # set path end point data
        fig.data[2].x=np.array(theta_hist_dict[x][::10,0][-1])
        fig.data[2].y=np.array(theta_hist_dict[x][::10,1][-1])
        
        # cost history line plot
         # set cost history scatterplot point color and shape
        fig.data[3].marker.color="red"
        fig.data[3].marker.size=8
        fig.data[3].marker.symbol=1
        fig.data[3].marker.line.width=1
        # set cost history data
        fig.data[3].y=cost_hist_dict[x]
        fig.data[3].showlegend= False # hide legend
        
        # generate title
        title="learning rate = " + str(x)
        
        # update title
        fig.update_layout(
            title={
                'text': title,
                'y':0.9,
                'x':0.41,
                'xanchor': 'center',
                'yanchor': 'top'},
            font=dict(
                family="arial, monospace",
                size=16)
        )
        # update y-axis label distance
        fig.update_yaxes(
        title_standoff = 0)
        

fig

interactive(children=(SelectionSlider(description='learning rate', options=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,…

FigureWidget({
    'data': [{'colorbar': {'title': {'text': 'cost function'}},
              'type': 'contour'…

### Gradient Descent Path Plot

In [None]:
# set plot range 
x_min, x_max = np.min(theta0), np.max(theta0)
y_min, y_max = np.min(theta1), np.max(theta1)

# create figure widget 1 
fig = go.FigureWidget()
# add contour plot
fig.add_contour()
# set plot size
fig.update_layout(
    autosize=False,
    width=800,
    height=640)
# add scatter plot
fig.add_scatter()

# create slider for learning rate
slider = ipywidgets.SelectionSlider(options=learning_rates,description="learning rate", orientation='horizontal')

@interact(x=slider)
# create interactive plot with slider that updates learning rate
def update(x=learning_rates[0]):
    with fig.batch_update():
        # set contour colorscale
        #fig.data[0].colorscale = "RdBu" # set red blue colorscale
       # fig.data[0].contours=dict(
        #    size=0.05,
         #   start=0,
          #  end=1)
        fig.data[0].colorbar={"title": 'cost function'}
        # set contourplot data and axes 
        fig.data[0].z=cost.T
        fig.data[0].x=theta0
        fig.data[0].y=theta1


        # set scatterplot point color and shape
        fig.data[1].marker.color="red"
        #fig.data[1].marker.colorscale=[[0, 'rgb(255,0,0)'], [1, 'rgb(0,0,255)']]
        fig.data[1].marker.size=8
        fig.data[1].marker.symbol=1
        fig.data[1].marker.line.width=1
        # set grad desc path data
        fig.data[1].x=theta_hist_dict[x][::10,0]
        fig.data[1].y=theta_hist_dict[x][::10,1]

        # generate title
        title="learning rate = " + str(x)
        
        # axes labels
        fig.update_layout(
            title={
                'text': title,
                'y':0.9,
                'x':0.41,
                'xanchor': 'center',
                'yanchor': 'top'},
            xaxis_title="intercept",
            yaxis_title="slope",
            font=dict(
                family="arial, monospace",
                size=16)
        )

fig

interactive(children=(SelectionSlider(description='learning rate', options=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,…

FigureWidget({
    'data': [{'colorbar': {'title': {'text': 'cost function'}},
              'type': 'contour'…

### Cost History Plot

In [None]:
# create figure widget
fig2 = go.FigureWidget()

# set plot size
fig2.update_layout(
    autosize=False,
    width=800,
    height=640)
# add scatter plot
fig2.add_scatter()

# create slider for learning rate
slider = ipywidgets.SelectionSlider(options=learning_rates,description="learning rate", orientation='horizontal')

@interact(x=slider)
# create interactive plot with slider that updates learning rate
def update(x=learning_rates[0]):
    with fig2.batch_update():

        # set scatterplot point color and shape
        fig2.data[0].marker.color="red"
        #fig.data[1].marker.colorscale=[[0, 'rgb(255,0,0)'], [1, 'rgb(0,0,255)']]
        fig2.data[0].marker.size=8
        fig2.data[0].marker.symbol=1
        fig2.data[0].marker.line.width=1
        # set grad desc path data
        fig2.data[0].y=cost_hist_dict[x]
        
        # generate title
        title="cost function history, learning rate = " + str(x)
        
        # axes labels
        fig2.update_layout(
            title={
                'text': title,
                'y':0.9,
                'x':0.41,
                'xanchor': 'center',
                'yanchor': 'top'},
            xaxis_title="nr. of iterations",
            yaxis_title="cost function",
            font=dict(
                family="arial, monospace",
                size=16)
        )

fig2

interactive(children=(SelectionSlider(description='learning rate', options=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,…

FigureWidget({
    'data': [{'marker': {'color': 'red', 'line': {'width': 1}, 'size': 8, 'symbol': 1},
       …