# 1) + 2)

In [107]:
import numpy as np
from varname import nameof

# adapted from stack overflow
def bmatrix(a, var_name, exercise_number=3):
    """Stores in a file a LaTeX bmatrix from a numpy array."""
    if len(a.shape) > 2:
        raise ValueError('bmatrix can at most display two dimensions')
    lines = str(a).replace('[', '').replace(']', '').splitlines()
    rv = [r'\begin{bmatrix}']
    rv += ['  ' + ' & '.join([f"{float(x):.6g}" for x in l.split()]) + r'\\' for l in lines]
    rv +=  [r'\end{bmatrix}']
    with open(f"./docs/aux-matrices/ex-{exercise_number}/{var_name}.tex", 'w+') as f:
      f.write('\n'.join(rv))

### First question

In [108]:
Phi = np.matrix([
  [1, 0.8, 0.64, 0.512],
  [1, 1, 1, 1],
  [1, 1.2, 1.44, 1.728],
  [1, 1.4, 1.96, 2.744],
  [1, 1.6, 2.56, 4.096],
])

bmatrix(Phi, nameof(Phi), exercise_number=1)

Phi_T = Phi.transpose()
I = np.identity(4) * 2
bmatrix(I, nameof(I), exercise_number=1)
z = np.matrix([24, 20, 10, 13, 12]).transpose()
bmatrix(z, nameof(z), exercise_number=1)

left = np.linalg.inv(np.add(np.matmul(Phi_T, Phi), I))
bmatrix(left, nameof(left), exercise_number=1)
right = np.matmul(Phi_T, z)
bmatrix(right, nameof(right), exercise_number=1)
w = np.matmul(left, right)
bmatrix(w, nameof(w), exercise_number=1)

### Second question

In [109]:
z_hat = np.matmul(Phi, w)
bmatrix(z_hat, nameof(z_hat), exercise_number=2)
n = len(z_hat)
rmse = np.sqrt(np.sum(np.square(z_hat - z)) / n)
print(f"Training RMSE: {rmse}")

Training RMSE: 6.843294892094978


# 3)

In [110]:
# NOTE: np.matmul is regular matrix multiplication, np.multiply is element-wise multiplication

def activation(x):
  return np.exp(0.1 * x)

TARGET = np.array([[24, 20, 10]])

x_0 = np.array([[0.8, 1, 1.2]])
bmatrix(x_0, nameof(x_0))
w_1 = np.array([[1], [1]])
bmatrix(w_1, nameof(w_1))
b_1 = np.array([[1], [1]])
bmatrix(b_1, nameof(b_1))

z_1 = np.matmul(w_1, x_0) + b_1
bmatrix(z_1, nameof(z_1))
x_1 = activation(z_1)
bmatrix(x_1, nameof(x_1))

w_2 = np.array([[1, 1]])
bmatrix(w_2, nameof(w_2))
b_2 = np.array([[1]])
bmatrix(b_2, nameof(b_2))

z_2 = np.matmul(w_2, x_1) + b_2
bmatrix(z_2, nameof(z_2))
x_2 = activation(z_2)
bmatrix(x_2, nameof(x_2))

In [111]:
# sum (x_2 - t), for every t in TARGET, store it in delta_sum
delta_sum = np.array([x_2[i] - TARGET[i] for i in range(len(x_2))])
bmatrix(delta_sum, nameof(delta_sum))
d_x_2_z_2 = 0.1 * activation(z_2)
bmatrix(d_x_2_z_2, nameof(d_x_2_z_2))

# hadamard product
delta_2 = np.multiply(delta_sum, d_x_2_z_2)
bmatrix(delta_2, nameof(delta_2))

d_z_2_x_1_T = w_2.transpose()
bmatrix(d_z_2_x_1_T, nameof(d_z_2_x_1_T))
d_x_1_z_1 = 0.1 * activation(z_1)
bmatrix(d_x_1_z_1, nameof(d_x_1_z_1))
left_delta_1 = np.matmul(d_z_2_x_1_T, delta_2)
bmatrix(left_delta_1, nameof(left_delta_1))
right_delta_1 = d_x_1_z_1
bmatrix(right_delta_1, nameof(right_delta_1))
delta_1 = np.multiply(left_delta_1, right_delta_1)
bmatrix(delta_1, nameof(delta_1))

In [112]:
d_E_w_1 = np.array([[0 for _ in range(len(w_1[0]))] for _ in range(len(w_1))])
d_E_w_2 = np.array([[0 for _ in range(len(w_2[0]))] for _ in range(len(w_2))])
transposed_delta_1 = delta_1.transpose()
transposed_delta_2 = delta_2.transpose()
transposed_x_0 = x_0.transpose()
transposed_x_1 = x_1.transpose()
for i in range(len(transposed_delta_1)):
  column = np.array([transposed_x_0[i]])
  delta_column = np.array([transposed_delta_1[i]]).transpose()
  d_E_w_1 = np.add(d_E_w_1, np.matmul(delta_column, column))
bmatrix(d_E_w_1, nameof(d_E_w_1))

for i in range(len(transposed_delta_2)):
  column = np.array([transposed_x_1[i]])
  delta_column = np.array([transposed_delta_2[i]]).transpose()
  d_E_w_2 = np.add(d_E_w_2, np.matmul(delta_column, column))
bmatrix(d_E_w_2, nameof(d_E_w_2))

d_E_b_1 = delta_1
bmatrix(d_E_b_1, nameof(d_E_b_1))
d_E_b_2 = delta_2
bmatrix(d_E_b_2, nameof(d_E_b_2))

In [113]:
LEARNING_RATE = 0.1

w_1_after_update = np.subtract(w_1, LEARNING_RATE * d_E_w_1)
bmatrix(w_1_after_update, nameof(w_1_after_update))
w_2_after_update = np.subtract(w_2, LEARNING_RATE * d_E_w_2)
bmatrix(w_2_after_update, nameof(w_2_after_update))

b_1_lines_sum = LEARNING_RATE * np.array([[np.sum(d_E_b_1[i])] for i in range(len(d_E_b_1))])
b_2_lines_sum = LEARNING_RATE * np.array([[np.sum(d_E_b_2[i])] for i in range(len(d_E_b_2))])

bmatrix(b_1_lines_sum, nameof(b_1_lines_sum))
bmatrix(b_2_lines_sum, nameof(b_2_lines_sum))

b_1_after_update = np.subtract(b_1, b_1_lines_sum)
bmatrix(b_1_after_update, nameof(b_1_after_update))
b_2_after_update = np.subtract(b_2, b_2_lines_sum)
bmatrix(b_2_after_update, nameof(b_2_after_update))