# Lecture 9 Supplementary Notebook

## DSC 40A, Spring 2024

The following cell sets up the necessary imports – don't worry too much about it.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import seaborn as sns

from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats("svg")

pd.options.plotting.backend = "plotly"

# DSC 80 preferred styles
pio.templates["dsc80"] = go.layout.Template(
    layout=dict(
        margin=dict(l=30, r=30, t=30, b=30),
        autosize=True,
        xaxis=dict(showgrid=True),
        yaxis=dict(showgrid=True),
        title=dict(x=0.5, xanchor="center"),
    )
)
pio.templates.default = "simple_white+dsc80"

from IPython.display import HTML

In [None]:
def solve_normal_equations(X, y):
    '''Returns the optimal parameter vector, w*, given a design matrix X and observation vector y.'''
    return np.linalg.solve(X.T @ X, X.T @ y)

In [None]:
def mean_squared_error(X, y, w):
    return np.mean(np.sum((y - X @ w)**2))



## Feature engineering and transformations

### Example: Quadratic hypothesis functions

Let's look at a new dataset of cars.

In [None]:
cars = sns.load_dataset('mpg').dropna()
cars.head()

In [None]:
px.scatter(cars, x='horsepower', y='mpg', title='MPG vs. Horsepower')

A regular linear model here isn't great.

In [None]:
cars['1'] = 1
w_cars_one_feature = solve_normal_equations(cars[['1', 'horsepower']], cars['mpg'])
w_cars_one_feature

In [None]:
px.scatter(cars, x='horsepower', y='mpg', title='MPG vs. Horsepower')

x_range = np.linspace(40, 220)

fig = go.Figure()
fig.add_trace(go.Scatter(x=cars['horsepower'], y=cars['mpg'], mode='markers', name='actual'))
fig.add_trace(go.Scatter(x=x_range, 
                         y=w_cars_one_feature[0] + w_cars_one_feature[1] * x_range, 
                         name = 'Linear Hypothesis Function', 
                         line=dict(color='red')))

fig.update_layout(xaxis_title='Horsepower', yaxis_title='MPG', title='MPG vs. Horsepower')

What if we add $\text{horsepower}^2$ as a feature? This would mean fitting a hypothesis function of the form

$$\text{predicted MPG} = w_0 + w_1 \cdot \text{horsepower} + w_2 \cdot \text{horsepower}^2$$

In [None]:
cars['horsepower^2'] = cars['horsepower']**2

In [None]:
cars[['1', 'horsepower', 'horsepower^2']]

In [None]:
w_cars_squared = solve_normal_equations(cars[['1', 'horsepower', 'horsepower^2']], cars['mpg'])
w_cars_squared

Let's look at the resulting hypothesis function.

In [None]:
px.scatter(cars, x='horsepower', y='mpg', title='MPG vs. Horsepower')

fig = go.Figure()
fig.add_trace(go.Scatter(x=cars['horsepower'], y=cars['mpg'], mode='markers', name='actual'))
fig.add_trace(go.Scatter(x=x_range, 
                         y=w_cars_one_feature[0] + w_cars_one_feature[1] * x_range, 
                         name='Linear Hypothesis Function', 
                         line=dict(color='red')))
fig.add_trace(go.Scatter(x=np.linspace(40, 220), 
                         y=w_cars_squared[0] + w_cars_squared[1] * x_range + w_cars_squared[2] * x_range**2, 
                         name='Quadratic Hypothesis Function', 
                         line=dict(color='#F7CF5D', width=5)))

fig.update_layout(xaxis_title='Horsepower', yaxis_title='MPG', title='MPG vs. Horsepower')

Note: this hypothesis function is **quadratic as a function of horsepower**, but it's still **linear as a function of the parameters**. This means we can still use the normal equations to find $\vec{w}^*$*.

### Example: Amdahl's Law

In [None]:
X_amdahl = np.array([[1, 1],
                     [1, 1/2],
                     [1, 1/4]])

y_amdahl = np.array([8, 4, 3])

In [None]:
solve_normal_equations(X_amdahl, y_amdahl)

### Example: Transformations

In [None]:
# This cell generates our dataset.
np.random.seed(28)
x_fake = np.linspace(0, 20, 50) + np.random.normal(loc=0, scale=0.5, size=50)
y_fake = 0.5*np.random.normal(loc=2, scale=0.5, size=50) * np.e**(0.2 * x_fake)

In [None]:
px.scatter(x=x_fake, y=y_fake)

As per the lecture slides, we're trying to find a hypothesis function of the form

$$H(x) = w_0 e^{w_1 x}$$

We re-wrote this as

$$\log H(x) = \log w_0 + w_1 x$$

As a result, our design matrix $X$ is still 

$$X = \begin{bmatrix}1 & x_1 \\ 1 & x_2 \\ \vdots & \vdots \\ 1 & x_n \end{bmatrix}$$ but our observation vector is now

$$\vec{z} = \begin{bmatrix} \log y_1 \\ \log y_2 \\ \vdots \\ \log y_n \end{bmatrix}$$

and our parameter vector is $$\vec{b} = \begin{bmatrix} b_0 \\ b_1 \end{bmatrix} = \begin{bmatrix} \log w_0 \\ w_1 \end{bmatrix}$$

In [None]:
X_trans = np.vstack([
    np.ones_like(x_fake),
    x_fake
]).T

z_trans = np.log(y_fake)

In [None]:
b_trans = solve_normal_equations(X_trans, z_trans)
b_trans

Now that we have $\vec{b}^*$, we need to solve for $\vec{w}^*$:

In [None]:
b0, b1 = b_trans

In [None]:
w0_star = np.e**b0
w1_star = b1

In [None]:
w0_star, w1_star

Let's look at a plot of the resulting hypothesis function, $H(x) = 0.965 e^{0.196 x}$, to make sure it looks reasonable.

In [None]:
x_range = np.arange(0, 25)

fig = go.Figure()
fig.add_trace(go.Scatter(x=x_fake, y=y_fake, mode='markers', name='actual'))
fig.add_trace(go.Scatter(x=x_range, 
                         y=w0_star * np.e**(w1_star * x_range), 
                         name='Exponential Hypothesis Function', 
                         line=dict(color='red')))