# 1) + 2)

In [184]:
import numpy as np

# shamelessly stolen from stack overflow
def bmatrix(a):
    """Returns a LaTeX bmatrix

    :a: numpy array
    :returns: LaTeX bmatrix as a string
    """
    if len(a.shape) > 2:
        raise ValueError('bmatrix can at most display two dimensions')
    lines = str(a).replace('[', '').replace(']', '').splitlines()
    rv = [r'\begin{bmatrix}']
    rv += ['  ' + ' & '.join(l.split()) + r'\\' for l in lines]
    rv +=  [r'\end{bmatrix}']
    return '\n'.join(rv)

Phi = np.matrix([
  [1, 0.8, 0.64, 0.512],
  [1, 1, 1, 1],
  [1, 1.2, 1.44, 1.728],
  [1, 1.4, 1.96, 2.744],
  [1, 1.6, 2.56, 4.096],
])

Phi_T = Phi.transpose()
I = np.identity(4) * 2
z = np.matrix([24, 20, 10, 13, 12]).transpose()

left = np.linalg.inv(Phi_T * Phi + I)
right = Phi_T * z
w = left * right

def print_first_question_matrices():
  for matrix in [Phi, Phi_T, I, z, left, right, w]:
    print(bmatrix(matrix))
    print("---")

def print_second_question_results():
  # compute the training RMSE for the learnt regression model
  forecasts = Phi * w
  print("z hat:")
  print(bmatrix(forecasts))
  n = len(forecasts)
  rmse = np.sqrt(np.sum(np.square(forecasts - z)) / n)
  print("Training RMSE: {}".format(rmse))

In [185]:
print_first_question_matrices()

\begin{bmatrix}
  1. & 0.8 & 0.64 & 0.512\\
  1. & 1. & 1. & 1.\\
  1. & 1.2 & 1.44 & 1.728\\
  1. & 1.4 & 1.96 & 2.744\\
  1. & 1.6 & 2.56 & 4.096\\
\end{bmatrix}
---
\begin{bmatrix}
  1. & 1. & 1. & 1. & 1.\\
  0.8 & 1. & 1.2 & 1.4 & 1.6\\
  0.64 & 1. & 1.44 & 1.96 & 2.56\\
  0.512 & 1. & 1.728 & 2.744 & 4.096\\
\end{bmatrix}
---
\begin{bmatrix}
  2. & 0. & 0. & 0.\\
  0. & 2. & 0. & 0.\\
  0. & 0. & 2. & 0.\\
  0. & 0. & 0. & 2.\\
\end{bmatrix}
---
\begin{bmatrix}
  24\\
  20\\
  10\\
  13\\
  12\\
\end{bmatrix}
---
\begin{bmatrix}
  0.34168753 & -0.1214259 & -0.07490231 & -0.00932537\\
  -0.1214259 & 0.3892078 & -0.09667718 & -0.07445624\\
  -0.07490231 & -0.09667718 & 0.37257788 & -0.17135047\\
  -0.00932537 & -0.07445624 & -0.17135047 & 0.17998796\\
\end{bmatrix}
---
\begin{bmatrix}
  79.\\
  88.6\\
  105.96\\
  134.392\\
\end{bmatrix}
---
\begin{bmatrix}
  7.0450759\\
  4.64092765\\
  1.96734046\\
  -1.30088142\\
\end{bmatrix}
---


In [186]:
print_second_question_results()

z hat:
\begin{bmatrix}
  11.35086463\\
  12.35246259\\
  13.19923625\\
  13.8287433\\
  14.17854143\\
\end{bmatrix}
Training RMSE: 6.843294892094976


# 3)

In [187]:
# activation function f is e^0.1x

# NOTE: np.matmul is regular matrix multiplication, np.multiply is element-wise multiplication

def activation(x):
  return np.exp(0.1 * x)

TARGET = np.array([[24, 20, 10]])

x_0 = np.array([[0.8, 1, 1.2]])
print("x_0:")
print(bmatrix(x_0))
w_1 = np.array([[1], [1]])
b_1 = np.array([[1], [1]])

z_1 = np.matmul(w_1, x_0) + b_1
print("z_1:")
print(bmatrix(z_1))
x_1 = activation(z_1)
print("x_1:")
print(bmatrix(x_1))

w_2 = np.array([[1, 1]])
b_2 = np.array([[1]])

z_2 = np.matmul(w_2, x_1) + b_2
print("z_2:")
print(bmatrix(z_2))
x_2 = activation(z_2)
print("x_2:")
print(bmatrix(x_2))

x_0:
\begin{bmatrix}
  0.8 & 1. & 1.2\\
\end{bmatrix}
z_1:
\begin{bmatrix}
  1.8 & 2. & 2.2\\
  1.8 & 2. & 2.2\\
\end{bmatrix}
x_1:
\begin{bmatrix}
  1.19721736 & 1.22140276 & 1.24607673\\
  1.19721736 & 1.22140276 & 1.24607673\\
\end{bmatrix}
z_2:
\begin{bmatrix}
  3.39443473 & 3.44280552 & 3.49215346\\
\end{bmatrix}
x_2:
\begin{bmatrix}
  1.40416592 & 1.41097443 & 1.41795451\\
\end{bmatrix}


In [188]:
# sum (x_2 - t), for every t in TARGET, store it in delta_sum
delta_sum = np.array([x_2[i] - TARGET[i] for i in range(len(x_2))])
print("delta_sum:")
print(bmatrix(delta_sum))
d_x_2_z_2 = 0.1 * activation(z_2)
print("d_x_2_z_2:")
print(bmatrix(d_x_2_z_2))

# hadamard product
delta_2 = np.multiply(delta_sum, d_x_2_z_2)
print("delta_2:")
print(bmatrix(delta_2))

d_z_2_x_1_T = w_2.transpose()
print("d_z_2_x_1_T:")
print(bmatrix(d_z_2_x_1_T))
d_x_1_z_1 = 0.1 * activation(z_1)
print("d_x_1_z_1:")
print(bmatrix(d_x_1_z_1))
left = np.matmul(d_z_2_x_1_T, delta_2)
print("left:")
print(bmatrix(left))
right = d_x_1_z_1
print("right:")
print(bmatrix(right))
delta_1 = np.multiply(left, right)
print("delta_1:")
print(bmatrix(delta_1))

delta_sum:
\begin{bmatrix}
  -22.59583408 & -18.58902557 & -8.58204549\\
\end{bmatrix}
d_x_2_z_2:
\begin{bmatrix}
  0.14041659 & 0.14109744 & 0.14179545\\
\end{bmatrix}
delta_2:
\begin{bmatrix}
  -3.17283001 & -2.62286398 & -1.21689501\\
\end{bmatrix}
d_z_2_x_1_T:
\begin{bmatrix}
  1\\
  1\\
\end{bmatrix}
d_x_1_z_1:
\begin{bmatrix}
  0.11972174 & 0.12214028 & 0.12460767\\
  0.11972174 & 0.12214028 & 0.12460767\\
\end{bmatrix}
left:
\begin{bmatrix}
  -3.17283001 & -2.62286398 & -1.21689501\\
  -3.17283001 & -2.62286398 & -1.21689501\\
\end{bmatrix}
right:
\begin{bmatrix}
  0.11972174 & 0.12214028 & 0.12460767\\
  0.11972174 & 0.12214028 & 0.12460767\\
\end{bmatrix}
delta_1:
\begin{bmatrix}
  -0.37985672 & -0.32035733 & -0.15163446\\
  -0.37985672 & -0.32035733 & -0.15163446\\
\end{bmatrix}


In [189]:
d_E_w_1 = np.array([[0 for _ in range(len(w_1[0]))] for _ in range(len(w_1))])
d_E_w_2 = np.array([[0 for _ in range(len(w_2[0]))] for _ in range(len(w_2))])
transposed_delta_1 = delta_1.transpose()
transposed_delta_2 = delta_2.transpose()
transposed_x_0 = x_0.transpose()
transposed_x_1 = x_1.transpose()
for i in range(len(transposed_delta_1)):
  column = np.array([transposed_x_0[i]])
  delta_column = np.array([transposed_delta_1[i]]).transpose()
  d_E_w_1 = np.add(d_E_w_1, np.matmul(delta_column, column))
print("d_E_w_1:")
print(bmatrix(d_E_w_1))

for i in range(len(transposed_delta_2)):
  column = np.array([transposed_x_1[i]])
  delta_column = np.array([transposed_delta_2[i]]).transpose()
  d_E_w_2 = np.add(d_E_w_2, np.matmul(delta_column, column))
print("d_E_w_2:")
print(bmatrix(d_E_w_2))

d_E_b_1 = delta_1
print("d_E_b_1:")
print(bmatrix(d_E_b_1))
d_E_b_2 = delta_2
print("d_E_b_2:")
print(bmatrix(d_E_b_2))

d_E_w_1:
\begin{bmatrix}
  -0.80620405\\
  -0.80620405\\
\end{bmatrix}
d_E_w_2:
\begin{bmatrix}
  -8.51848503 & -8.51848503\\
\end{bmatrix}
d_E_b_1:
\begin{bmatrix}
  -0.37985672 & -0.32035733 & -0.15163446\\
  -0.37985672 & -0.32035733 & -0.15163446\\
\end{bmatrix}
d_E_b_2:
\begin{bmatrix}
  -3.17283001 & -2.62286398 & -1.21689501\\
\end{bmatrix}


In [190]:
LEARNING_RATE = 0.1

w_1 = np.subtract(w_1, LEARNING_RATE * d_E_w_1)
print("w_1:")
print(bmatrix(w_1))
w_2 = np.subtract(w_2, LEARNING_RATE * d_E_w_2)
print("w_2:")
print(bmatrix(w_2))

b_1_lines_sum = LEARNING_RATE * np.array([[np.sum(d_E_b_1[i])] for i in range(len(d_E_b_1))])
b_2_lines_sum = LEARNING_RATE * np.array([[np.sum(d_E_b_2[i])] for i in range(len(d_E_b_2))])

print("Previously, b_1 was:")
print(bmatrix(b_1))
print("b_1_lines_sum:")
print(bmatrix(b_1_lines_sum))
print("b_2_lines_sum:")
print(bmatrix(b_2_lines_sum))

b_1 = np.subtract(b_1, b_1_lines_sum)
print("b_1:")
print(bmatrix(b_1))
b_2 = np.subtract(b_2, b_2_lines_sum)
print("b_2:")
print(bmatrix(b_2))

w_1:
\begin{bmatrix}
  1.08062041\\
  1.08062041\\
\end{bmatrix}
w_2:
\begin{bmatrix}
  1.8518485 & 1.8518485\\
\end{bmatrix}
Previously, b_1 was:
\begin{bmatrix}
  1\\
  1\\
\end{bmatrix}
b_1_lines_sum:
\begin{bmatrix}
  -0.08518485\\
  -0.08518485\\
\end{bmatrix}
b_2_lines_sum:
\begin{bmatrix}
  -0.7012589\\
\end{bmatrix}
b_1:
\begin{bmatrix}
  1.08518485\\
  1.08518485\\
\end{bmatrix}
b_2:
\begin{bmatrix}
  1.7012589\\
\end{bmatrix}
