In [None]:
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go

N = 100

np.random.seed(42)
secret_numbers = np.round(100*(1.5*np.random.rand(3)-1)/2)/10

w = np.array([secret_numbers[0],secret_numbers[1]])
b = secret_numbers[1]
x = np.random.randn(N,2)
eps = np.sqrt(np.pi)*np.random.randn(N)
y = x@w + b + eps

# Plotting utilities
(see below for how to use them)

In [None]:
def plot_scatter_data():
    global x, y
    fig = go.Figure(data=[go.Scatter3d(x=x[:, 0], y=x[:, 1], z=y, mode='markers', name='Data Points')])

    fig.update_layout(title='3D Scatter Plot',
                      scene=dict(
                          xaxis_title='Feature 1 (x1)',
                          yaxis_title='Feature 2 (x2)',
                          zaxis_title='Target (y)'))
    return fig

def plot_plane(fig, w, b,alpha=0.5):
    global x

    x_range = np.linspace(x[:, 0].min(), x[:, 0].max(), 10)
    y_range = np.linspace(x[:, 1].min(), x[:, 1].max(), 10)
    X_plane, Y_plane = np.meshgrid(x_range, y_range)

    Z_plane = w[0] * X_plane + w[1] * Y_plane + b

    fig.add_trace(go.Surface(x=X_plane, y=Y_plane, z=Z_plane, name='Regression Plane', opacity=alpha))

# Make a "random" guess of the parameters

In [None]:
w0 = np.array([1,1])
b0 = 1
fig = plot_scatter_data()
plot_plane(fig, w0, b0)
fig.show()

# Live demo

## Creating vectors

In [None]:
v = np.array([0,1,2,3,4,5])

## Slicing vectors

In [None]:
# 1 through 3

## Slicing off the end

In [None]:
# drop the last guy

## Concatenation
(make `[0,1,4,5]` using `v` and slicing)

## Vector of length 1 versus accessing the entry of the vector

## Creating matrices

In [None]:
A = np.array([[1,2], [3,4]])

# implement this
# B = [[5,6]
#      [7,8]]

B = None

## Transpose of a matrix

## Creating constant matrices

In [None]:
C = np.ones(shape=(2,3)) # also np.zeros
C

## Stacking matrices horizontally

In [None]:
D = np.hstack([A,C])

## How about vertically?

In [None]:
np.vstack([A,C])

## Matrix-vector multiplication

In [None]:
# A times [1;2]

## Matrix-matrix multiplication

In [None]:
A@C.T

# Exercise 2.a


Implement a single gradient descent update with step size = 1/10

In [None]:


def pack(w,b):
  pass # delete this line
  # YOUR CODE HERE
  # return theta

# delete in student version
def pack(w,b):
  theta = np.concatenate([w, np.array([b])])
  return theta


def unpack(theta):
  pass # delete this line
  # YOUR CODE HERE
  # return w,b

# delete in student version
def unpack(theta):
  w = theta[:-1]
  b = theta[-1]
  return w,b




# Exercise 2.b
Implement gradient descent using linear algebra

In [None]:
step_size=0.1

theta0 = pack(w0,b0)

theta1 = None # delete this line

# DELETE THIS IN STUDENT VERSION
Xtilde = np.hstack([x,np.ones(shape=(N,1))])
grad_theta = (2/N)*(Xtilde.T @ Xtilde @ theta0 - Xtilde.T @ y)
theta1 = theta0 - step_size*grad_theta
# END DELETE


w1,b1 = unpack(theta1)


# Visualize your solution

In [None]:

fig = plot_scatter_data()

plot_plane(fig, w1, b1)
fig.show()

# Exercise 2.c

1. Implement the mse function `J(theta)`
2. Run 100 iterations of gradient descent with step size = 1/10
3. Plot `J(theta_list[t])` with `t` on the x-axis
4. Why is the value `J(theta_list[-1])` close to `pi`?

In [None]:
def J(theta):
    return np.mean((y - Xtilde@theta)**2)

step_size=1/10

theta0 = pack(w0,b0)
theta_list = [theta0]
mse_list = [J(theta0)]
w_list = [w0]
b_list = [b0]

for t in range(100):
    pass
    # YOUR CODE GOES HERE

    # DELETE THIS IN THE STUDENT VERSEION
    theta = theta_list[-1]

    Xtilde = np.hstack([x,np.ones(shape=(N,1))])
    grad_theta = (2/N)*(Xtilde.T @ Xtilde @ theta - Xtilde.T @ y)

    theta_next = theta - step_size*grad_theta

    w,b = unpack(theta_next)
    mse = J(theta_next)
    # END DELETE

    theta_list.append(theta_next)
    w_list.append(w)
    b_list.append(b)
    mse_list.append(mse)

plt.plot(mse_list)

plt.axhline(np.pi, color='k', linestyle=":")


# Exercise 2.d

1. Create a new batch of testing data called `x_test` and `y_test`
2. Define `J_test`
3. Plot them together

In [None]:
np.random.seed(43)
N_test = 10*N
x_test = None
y_test = None

# YOUR CODE GOES HERE
# Hint: You should define Xtilde_test

# delete this in student version
x_test = np.random.randn(N_test,2)
eps_test = np.sqrt(np.pi)*np.random.randn(N_test)
y_test = x_test@w + b + eps_test
Xtilde_test = np.hstack([x_test,np.ones(shape=(N_test,1))])
# end delete


def J_test(theta):
    pass # delete this line
    # YOUR CODE GOES HERE
    return mse_test

# delete this in student version
def J_test(theta):
    return np.mean((y_test - Xtilde_test@theta)**2)


mse_test_list = [J_test(theta) for theta in theta_list]

plt.plot(mse_list, label='Training MSE')
plt.plot(mse_test_list, label='Testing MSE')

plt.axhline(np.pi, color='k', linestyle=":")
plt.legend()

# Visualize the models through the training process

In [None]:

fig = plot_scatter_data()

for i in range(6):
  plot_plane(fig, w_list[i], b_list[i],alpha=0.6-(6-i-1)*.1)
fig.show()

In [None]:

fig = plot_scatter_data()

for i in range(6):
  plot_plane(fig, w_list[i+6], b_list[i+6],alpha=0.6-(6-i-1)*.1)
fig.show()

In [None]:
fig = plot_scatter_data()

plot_plane(fig, w_list[-1], b_list[-1])
fig.show()