In [None]:
####################################################################
# Machine Learning Primer - Workshop
# Day 1 - September 2021
####################################################################

# python package imports
from typing import Callable, List, Optional, Tuple

import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
####################################################################
# Functions to generate (fake) data that we would use for supervised
# linear regression
####################################################################

def line_hypothesis(x: np.ndarray, m: float, c: float) -> np.array:
    return (m * x) + c

def generate_fake_single_var_data(
    n_points: int,
    true_m: float,
    true_c: float,
    min_x: float,
    max_x: float,
    noise_std: float,
) -> Tuple[np.ndarray, np.ndarray]:
    """Generate fake noisy data from a hypothetical line
    
    Args:
        n_points: number of sample data points (x, y) to generate.
        true_m: the gradient to use for the hypothetical line.
        true_c: the intercept to use for the hypothetical line.
        min_x: minimum value that x can take.
        max_x: maximum value that x can take.
        noise_std: noise standard-deviation to add to generate y for
            each data point.
    
    Returns:
        x: Vector of size (n_points), giving each sample's x value.
        noisy_y: Vector of size (n_points), giving each sample's y value.
    """
    # generate the underlying x points
    x = np.random.uniform(low=min_x, high=max_x, size=n_points)
    # apply the line equation to get the ground-truth y for each x
    y = line_hypothesis(x, m=true_m, c=true_c)
    # add noise to y
    noise = np.random.normal(scale=noise_std, size=n_points)
    noisy_y = y + noise
    return x, noisy_y

In [None]:
####################################################################
# Data plotting functionality
####################################################################

def get_hypothesis_scatter_plots(
    data_x: np.ndarray,
    data_y: np.ndarray,
    hypothesis_m: Optional[float] = None,
    hypothesis_c: Optional[float] = None,
) -> List[go.Scatter]:
    """Gets the scatter plots to draw the data, true line, and hypothesis
    line if parameters given.
    
    Args:
        data_x: Vector of x values
        data_y: Vector of corresponding y values
        hypothesis_m: The m (slope) hypothesis. If given, will be used
            to draw the hypothesis line.
        hypothesis_c: The c (intercept) hypothesis. If given, will be used
            to draw the hypothesis line.
    
    Returns:
        List of scatter plots for drawing data, true line, and hypothesis.
    """
    scatter_plots = [
        # plot the data
        go.Scatter(
            x=data_x,
            y=data_y,
            mode="markers",
            marker_size=10,
            marker_color='blue',
            name="data"
        ),
        # plot the underlying GT line
        go.Scatter(
            x=[min_x, max_x],
            y=line_hypothesis(np.array([min_x, max_x]), m=true_m, c=true_c),
            mode='lines',
            line_dash='dash',
            line_color='green',
            name='ground-truth'
        )
    ]
    if hypothesis_m is not None:
        # plot the current hypothesis
        scatter_plots.append(
            go.Scatter(
                x=[min_x, max_x],
                y=line_hypothesis(np.array([min_x, max_x]), m=hypothesis_m, c=hypothesis_c),
                mode='lines',
                line_dash='dash',
                line_color='red',
                name='hypothesis'
            )
        )
    return scatter_plots


def plot_gradient_descent_info(
    data_x: np.ndarray,
    data_y: np.ndarray,
    m_history: np.ndarray,
    c_history: np.ndarray,
):
    """Draws data/hypothesis; cost contour map; and cost vs training iterations.
        
    Args:
        data_x: Vector of x values
        data_y: Vector of corresponding y values
        m_history: Vector of m (slope) values as training progresses
            (from the oldest to the newest).
        c_history: Vector of c (intercept) values as training progresses
            (from the oldest to the newest).
    """
    common_axis = dict(
        mirror=True,
        ticks='outside',
        showline=True,
        linewidth=2,
        linecolor='black'
    )
    
    fig = make_subplots(
        rows=2,
        cols=2,
        specs=[[{}, {}], [{"colspan": 2}, None]],
        row_heights=[0.7, 0.3],
        # horizontal_spacing=0.01,
        vertical_spacing=0.2,
        subplot_titles=("Hypothesis", "Cost map", "Cost w/ iteration")
    )
    
    # plot the data, the hypothesis, and true line
    scatter_plots = get_hypothesis_scatter_plots(
        data_x=data_x,
        data_y=data_y,
        hypothesis_m=m_history[-1],
        hypothesis_c=c_history[-1],
    )
    for plot in scatter_plots:
        fig.add_trace(plot, row=1, col=1)
    fig.update_xaxes(title_text="time (mins)", row=1, col=1, **common_axis)
    fig.update_yaxes(title_text="distance (km)", row=1, col=1, **common_axis)
    
    # plot the cost contour map against m and c parameters
    fig.add_trace(
        go.Contour(x=m_plot_range, y=c_plot_range, z=grid_costs),
        row=1, col=2
    )
    # plot the history of m/c values on the cost contour map
    fig.add_trace(
        go.Scatter(
            x=m_history,
            y=c_history,
            mode="markers+lines",
            marker_size=[5]*(len(m_history)-1) + [10],
            marker_color='white',
            name="m/c history"
        ),
        row=1, col=2,
    )
    # draw a marker for true m/c 
    fig.add_trace(
        go.Scatter(
            x=[true_m],
            y=[true_c],
            mode="markers",
            marker_size=10,
            marker_color='yellow',
            marker_symbol='x',
            name="True m/c"
        ),
        row=1, col=2,
    )
    fig.update_xaxes(
        title_text="m (gradient)",
        range=[np.min(m_plot_range), np.max(m_plot_range)],
        row=1, col=2, **common_axis
    )
    fig.update_yaxes(
        title_text="c (intercept)",
        range=[np.min(c_plot_range), np.max(c_plot_range)],
        row=1, col=2, **common_axis
    )
    
    # plot history of the cost against training iterations
    cost_history = [
        compute_cost(data_x=data_x, data_y=data_y, curr_m=curr_m, curr_c=curr_c)
        for curr_m, curr_c in zip(m_history, c_history)
    ]
    fig.add_trace(
        go.Scatter(
            x=list(range(1, len(cost_history) + 1)),
            y=cost_history,
            mode="markers+lines",
            name="cost",
        ),
        row=2, col=1,
    )
    fig.update_xaxes(title_text="Step", row=2, col=1, **common_axis)
    fig.update_yaxes(title_text="Cost", row=2, col=1, **common_axis)
    
    # move the legend
    fig.update_layout(
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.05,
            xanchor="right",
            x=1
        )
    )
    
    fig.show()

In [None]:
####################################################################
# Generate some (fake) data to do supervised regression
####################################################################

np.random.seed(1)

N = 20
min_x = 0
max_x = 1

true_m = 0.5  # np.random.uniform(low=0, high=1)
true_c = 0.5  # np.random.uniform(low=0, high=1)

# generate the data from a fake randomly generated line
x, y = generate_fake_single_var_data(
    n_points=N,
    true_m=true_m,
    true_c=true_c,
    min_x=min_x,
    max_x=max_x,
    noise_std=0.05,
)

####################################################################
# Plot the generated data (and the underlying randomly generated line)
####################################################################

fig = go.Figure(
    layout=dict(
        xaxis_title="time (mins)",
        yaxis_title="distance (km)",
        title=f"Line data: m: {true_m}, c: {true_c}",
        title_x=0.5,
    )
)

scatter_plots = get_hypothesis_scatter_plots(data_x=x, data_y=y)
for plot in scatter_plots:
    fig.add_trace(plot)

fig.show()    

In [None]:
def compute_cost(
    data_x: np.ndarray,
    data_y: np.ndarray,
    curr_m: float,
    curr_c: float
) -> float:
    """Compute the cost at certain value of m (slope) and c (intercept).
    
    Args:
        data_x: Vector of x values
        data_y: Vector of corresponding y values
        curr_m: The m (slope) value to compute the cost at
        curr_c: The c (intercept) value to compute the cost at
    
    Returns:
        Cost scalar value
    """
    hypothesis_y = line_hypothesis(data_x, m=curr_m, c=curr_c)
    cost = (hypothesis_y - data_y) ** 2
    cost = np.mean(cost, axis=-1) / 2
    return cost


def gradient_m(
    data_x: np.ndarray,
    data_y: np.ndarray,
    curr_m: float,
    curr_c: float
) -> float:
    """Compute the gradient of the cost with respect to m (slope)
    
    Args:
        data_x: Vector of x values
        data_y: Vector of corresponding y values
        curr_m: The m (slope) value to compute the gradient at
        curr_c: The c (intercept) value to compute the gradient at
    
    Returns:
        Gradient of cost with respect to m (slope)
    """
    grad_m = (curr_c + (curr_m * data_x) - data_y) * data_x
    return np.sum(grad_m) / y.size


def gradient_c(
    data_x: np.ndarray,
    data_y: np.ndarray,
    curr_m: float,
    curr_c: float
) -> float:
    """Compute the gradient of the cost with respect to c (intercept)
    
    Args:
        data_x: Vector of x values
        data_y: Vector of corresponding y values
        curr_m: The m (slope) value to compute the gradient at
        curr_c: The c (intercept) value to compute the gradient at
    
    Returns:
        Gradient of cost with respect to c (intercept)
    """
    grad_c = curr_c + (curr_m * data_x) - data_y
    return np.sum(grad_c) / data_y.size


    
def gradient_descent_step(
    data_x: np.ndarray,
    data_y: np.ndarray,
    curr_m: float,
    curr_c: float,
    learning_rate: float
) -> Tuple[float, float]:
    """Runs a single step of gradient descent
    
    Args:
        data_x: Vector of x values
        data_y: Vector of corresponding y values
        curr_m: The m (slope) value to do gradient descent at
        curr_c: The c (intercept) value to do gradient descent at
        learning_rate: Step size for gradient descent
    
    Returns:
        new_m: The new scalar value for m (slope) after a single step of
            gradient descent.
        new_c: The new scalar value for c (intercept) after a single step of
            gradient descent.
    """
    # compute the gradients
    grad_m = gradient_m(data_x=data_x, data_y=data_y, curr_m=curr_m, curr_c=curr_c)
    grad_c = gradient_c(data_x=data_x, data_y=data_y, curr_m=curr_m, curr_c=curr_c)
    
    new_m = curr_m - learning_rate * grad_m
    new_c = curr_c - learning_rate * grad_c
    
    return new_m, new_c

In [None]:
####################################################################
# Get costs at different m/c parameters so we can draw a contour
# plot showing how the cost changes with different parameter values
####################################################################

# the range of m/c values
m_plot_range = np.linspace(0, 1, 101)
c_plot_range = np.linspace(0, 1, 101)

grid_m, grid_c = np.meshgrid(m_plot_range, c_plot_range)
grid_m = np.reshape(grid_m, (-1))
grid_c = np.reshape(grid_c, (-1))

# compute the cost at each parameter value
grid_costs = compute_cost(
    data_x=x, data_y=y, curr_m=grid_m[:, np.newaxis], curr_c=grid_c[:, np.newaxis]
)
grid_costs = np.reshape(grid_costs, (c_plot_range.size, m_plot_range.size))

In [None]:
####################################################################
# Run gradient descent for n_steps
####################################################################

n_steps = 1

# some initial hypothesis (can be any random value)
hypothesis_m = 0.1
hypothesis_c = 0.9

# the step size used during gradient descent
learning_rate = 1e-1

# used for plotting the progress of the parameters
m_history = [hypothesis_m]
c_history = [hypothesis_c]

for idx in range(n_steps):
    # compute the current cost (loss)
    cost = compute_cost(
        data_x=x,
        data_y=y,
        curr_m=hypothesis_m,
        curr_c=hypothesis_c
    )
    
    # get the new slope / intercept by gradient descent
    hypothesis_m, hypothesis_c = gradient_descent_step(
        data_x=x,
        data_y=y,
        curr_m=hypothesis_m,
        curr_c=hypothesis_c,
        learning_rate=learning_rate
    )
    
    # print(f"Step {idx: <3},  Cost: {cost:.5f},  {hypothesis_m},  {hypothesis_c}")

    m_history.append(hypothesis_m)
    c_history.append(hypothesis_c)

# plot the whole gradient descent process
plot_gradient_descent_info(
    data_x=x,
    data_y=y,
    m_history=np.array(m_history),
    c_history=np.array(c_history)
)