# Lecture 10 Supplementary Notebook

## DSC 40A, Summer 2024

The following cell sets up the necessary imports – don't worry too much about it.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import seaborn as sns

from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats("svg")

pd.options.plotting.backend = "plotly"

# DSC 40A preferred styles
pio.templates["dsc40a"] = go.layout.Template(
    layout=dict(
        margin=dict(l=30, r=30, t=30, b=30),
        autosize=True,
        xaxis=dict(showgrid=True),
        yaxis=dict(showgrid=True),
        title=dict(x=0.5, xanchor="center"),
    )
)
pio.templates.default = "simple_white+dsc40a"

from IPython.display import HTML
from ipywidgets import interact, widgets

In [None]:
def solve_normal_equations(X, y):
    '''Returns the optimal parameter vector, w*, given a design matrix X and observation vector y.'''
    return np.linalg.solve(X.T @ X, X.T @ y)

## Feature engineering and transformations

### Example: Quadratic hypothesis functions

Let's look at a new dataset of cars.

In [None]:
cars = sns.load_dataset('mpg').dropna()
cars.head()

In [None]:
pio.renderers.default = 'notebook' # If the plot doesn't load for you, run this first.

In [None]:
fig = px.scatter(cars, x='horsepower', y='mpg', title='MPG vs. Horsepower')
fig.update_layout(xaxis_title='Horsepower', yaxis_title='MPG', title='MPG vs. Horsepower')

A regular linear model here isn't great.

In [None]:
cars['1'] = 1
w_cars_one_feature = solve_normal_equations(cars[['1', 'horsepower']], cars['mpg'])
w_cars_one_feature

In [None]:
px.scatter(cars, x='horsepower', y='mpg', title='MPG vs. Horsepower')

x_range = np.linspace(40, 220)

fig = go.Figure()
fig.add_trace(go.Scatter(x=cars['horsepower'], y=cars['mpg'], mode='markers', name='actual'))
fig.add_trace(go.Scatter(x=x_range, 
                         y=w_cars_one_feature[0] + w_cars_one_feature[1] * x_range, 
                         name = 'Linear Hypothesis Function', 
                         line=dict(color='red')))

fig.update_layout(xaxis_title='Horsepower', yaxis_title='MPG', title='MPG vs. Horsepower')

What if we add $\text{horsepower}^2$ as a feature? This would mean fitting a hypothesis function of the form

$$\text{predicted MPG} = w_0 + w_1 \cdot \text{horsepower} + w_2 \cdot \text{horsepower}^2$$

In [None]:
cars['horsepower^2'] = cars['horsepower']**2

In [None]:
cars[['1', 'horsepower', 'horsepower^2']]

In [None]:
w_cars_squared = solve_normal_equations(cars[['1', 'horsepower', 'horsepower^2']], cars['mpg'])
w_cars_squared

Let's look at the resulting hypothesis function.

In [None]:
px.scatter(cars, x='horsepower', y='mpg', title='MPG vs. Horsepower')

fig = go.Figure()
fig.add_trace(go.Scatter(x=cars['horsepower'], y=cars['mpg'], mode='markers', name='actual'))
fig.add_trace(go.Scatter(x=x_range, 
                         y=w_cars_one_feature[0] + w_cars_one_feature[1] * x_range, 
                         name='Linear Hypothesis Function', 
                         line=dict(color='red')))
fig.add_trace(go.Scatter(x=np.linspace(40, 220), 
                         y=w_cars_squared[0] + w_cars_squared[1] * x_range + w_cars_squared[2] * x_range**2, 
                         name='Quadratic Hypothesis Function', 
                         line=dict(color='#F7CF5D', width=5)))

fig.update_layout(xaxis_title='Horsepower', yaxis_title='MPG', title='MPG vs. Horsepower')

Note: this hypothesis function is **quadratic as a function of horsepower**, but it's still **linear as a function of the parameters**. This means we can still use the normal equations to find $\vec{w}^*$*.

### Example: Amdahl's Law

In [None]:
X_amdahl = np.array([[1, 1],
                     [1, 1/2],
                     [1, 1/4]])

y_amdahl = np.array([8, 4, 3])

In [None]:
solve_normal_equations(X_amdahl, y_amdahl)

### Example: Transformations

In [None]:
# This cell generates our dataset.
np.random.seed(28)
x_fake = np.linspace(0, 20, 50) + np.random.normal(loc=0, scale=0.5, size=50)
y_fake = 0.5*np.random.normal(loc=2, scale=0.5, size=50) * np.e**(0.2 * x_fake)

In [None]:
px.scatter(x=x_fake, y=y_fake)

As per the lecture slides, we're trying to find a hypothesis function of the form

$$H(x) = w_0 e^{w_1 x}$$

We re-wrote this as

$$\log H(x) = \log w_0 + w_1 x$$

As a result, our design matrix $X$ is still 

$$X = \begin{bmatrix}1 & x_1 \\ 1 & x_2 \\ \vdots & \vdots \\ 1 & x_n \end{bmatrix}$$ but our observation vector is now

$$\vec{z} = \begin{bmatrix} \log y_1 \\ \log y_2 \\ \vdots \\ \log y_n \end{bmatrix}$$

and our parameter vector is $$\vec{b} = \begin{bmatrix} b_0 \\ b_1 \end{bmatrix} = \begin{bmatrix} \log w_0 \\ w_1 \end{bmatrix}$$

In [None]:
X_trans = np.vstack([
    np.ones_like(x_fake),
    x_fake
]).T

z_trans = np.log(y_fake)

In [None]:
b_trans = solve_normal_equations(X_trans, z_trans)
b_trans

Now that we have $\vec{b}^*$, we need to solve for $\vec{w}^*$:

In [None]:
b0, b1 = b_trans

In [None]:
w0_star = np.e**b0
w1_star = b1

In [None]:
w0_star, w1_star

Let's look at a plot of the resulting hypothesis function, $H(x) = 0.965 e^{0.196 x}$, to make sure it looks reasonable.

In [None]:
x_range = np.arange(0, 25)

fig = go.Figure()
fig.add_trace(go.Scatter(x=x_fake, y=y_fake, mode='markers', name='actual'))
fig.add_trace(go.Scatter(x=x_range, 
                         y=w0_star * np.e**(w1_star * x_range), 
                         name='Exponential Hypothesis Function', 
                         line=dict(color='red')))

## Gradient descent

$$f(t) = 5t^4 - t^3 - 5t^2 + 2t - 9$$

In [None]:
def f(t):
    return 5 * (t**4) - (t**3) - 5 * (t**2) + 2 * t - 9

def df(t):
    return 20 * (t**3) - 3 * (t**2) - 10 * t + 2

def create_tangent_line(t):
    slope = df(t)
    intercept = f(t) - slope * t
    return lambda x: intercept + slope * x

**Note**: This notebook has _lots_ of code, but you're not expected to understand most of it. Instead, most of it is there in order to set up the visualizations.

Run the cell below to see a plot of our function, $f$.

In [None]:
ts = np.linspace(-1.25, 1.25, 1000)
ys = f(ts)

fig = px.line(x=ts, y=ys)
fig.update_layout(xaxis_title='$t$', 
                  yaxis_title='$f(t)$', 
                  title='$f(t) = 5t^4 - t^3 - 5t^2 + 2t - 9$',
                  width=800)

Run the cell below to see an interactive visualization, where you can change the value of $t$ and see the line tangent to $f$ that passes through the point $(t, f(t))$.

In [None]:
# def show_tangent(t0):
def show_tangent(t0):
    tan_fn = create_tangent_line(t0)
    fig2 = go.Figure(fig.data)
    fig2.add_trace(go.Scatter(x=[t0], y=[f(t0)], marker={'color': 'red', 'size': 20}, showlegend=False))
    fig2.add_trace(go.Scatter(x=[-5, 5], y=[tan_fn(-5), tan_fn(5)], line={'color': 'red'}, name='Tangent Line'))
    fig2.update_xaxes(range=[-1.25, 1.25]).update_yaxes(range=[-12, -4])
    fig2.update_layout(title=f'Tangent line to f(t) at t = {round(t0, 2)}<br>Slope of tangent line: {round(df(t0), 5)}', xaxis_title='$t$', yaxis_title='$f(t) = 5t^4 - t^3 - 5t^2 + 2t - 9$', showlegend=False)
    return fig2

interact(show_tangent, t0=(-1.25, 1.25))

Run the cell below and click the **▶️ Start animation** button to see an **animated** version of the previous plot.

In [None]:
play_button = {'label': '▶️ Start animation', 'method': 'animate', 'args': [None]}

stop_button = dict(label='⏯️ Stop animation', method='animate', visible = True,
            args=[(), {'frame': {'duration': 0, 'redraw': False}, 'mode': 'next', 'fromcurrent': True}])

t_range = np.arange(-1.25, 1.26, 0.1)
anim_fig = go.Figure(
    data=[show_tangent(-1.25).data[0], show_tangent(-1.25).data[1], show_tangent(-1.25).data[2]],
    frames=[
        go.Frame(data=[show_tangent(t).data[0], show_tangent(t).data[1], show_tangent(t).data[2]])
        for t in t_range
    ],
    layout=go.Layout(updatemenus=[dict(
            type="buttons",
            buttons=[play_button, stop_button])])
)
anim_fig.update_xaxes(title='$t$', range=[-1.25, 1.25]).update_yaxes(title='$f(t) = 5t^4 - t^3 - 5t^2 + 2t - 9$', range=[-12, -4])

### Gradient descent update rule

Let's start with an initial guess $t_0 = 0$ and a learning rate $\alpha = 0.01$.

$$t_{i + 1} = t_i - \alpha \frac{df}{dt}(t_i)$$

In [None]:
t = 0
for i in range(50):
    print(round(t, 4), round(f(t), 4))
    t = t - 0.01 * df(t)

We see that pretty quickly, $t_i$ converges to $-0.727$! What does this look like animated? Run the cell below!

In [None]:
def minimizing_animation(t0, alpha):
    t = t0
    ts = []
    dfts = []
    for i in range(50):
        ts.append(t)
        dfts.append(df(t))
        t = t - alpha * df(t)
        
    grad_anim = go.Figure(
        data=[fig.data[0], go.Scatter(x=[ts[0]], y=[f(ts[0])], marker={'size': 20}, showlegend=False)],
        frames=[
            go.Frame(data=[fig.data[0], go.Scatter(x=[ts[i]], y=[f(ts[i])], marker={'size': 20}, showlegend=False)])
            for i in range(50)
        ],
        layout=go.Layout(updatemenus=[dict(
            type="buttons",
            buttons=[play_button, stop_button])],
             title=f'Gradient Descent<br>Initial Guess = {t0}&nbsp;&nbsp;&nbsp;&nbsp;Step Size = {alpha}'))
                                      
    
    return grad_anim

In [None]:
minimizing_animation(t0=0, alpha=0.01)

What if we start with a different initial guess?

In [None]:
minimizing_animation(t0=1.1, alpha=0.01)

What if we use a different learning rate?

In [None]:
minimizing_animation(t0=0, alpha=0.1)

Some learning rates are so large that the values of $t$ explode towards infinity! Watch what happens when we use a learning rate of 1:

In [None]:
t = 0
for i in range(50):
    print(round(t, 4), round(f(t), 4))
    t = t - 1 * df(t)