In [1]:
import numpy as np
from scipy.special import softmax

np.set_printoptions(precision=10)

def relu(z):
    return np.maximum(z, 0)

def add_bias(x):
    return np.append(x, 1)

In [92]:
x = add_bias(np.array([3, 14]).reshape((2, 1)))

w = np.array([[1, 0, -1], [0, 1, -1], [-1, 0, -1], [0, -1, -1]])
v = np.array([[1, 1, 1, 1, 0], [-1, -1, -1, -1, 2]])

print(w.shape, x.shape)
print('w:', w, '\n')
print('x:', x, )

z1 = w @ x
print('z1:', z1, '\n')

a1 = relu(w @ x)
print('a1:', a1, '\n')

a1 = add_bias(a1)

print(v.shape, a1.shape)
z2 = relu(v @ a1)
a2 = softmax(z2)

print(f'[{a2[0]:.10}, {a2[1]:.10}]')

(4, 3) (3,)
w: [[ 1  0 -1]
 [ 0  1 -1]
 [-1  0 -1]
 [ 0 -1 -1]] 

x: [ 3 14  1]
z1: [  2  13  -4 -15] 

a1: [ 2 13  0  0] 

(2, 5) (5,)
[0.9999996941, 3.059022269e-07]


In [122]:
v = np.array([[1, 1, 1, 1, 0], [-1, -1, -1, -1, 2]])

def calc_o1(a1):
    a1 = add_bias(np.array(a1)).reshape(5, 1)
    z2 = relu(v @ a1)
    print('z2:', z2, '\n')
    a2 = softmax(z2)
    print('a2:', a2, '\n')

calc_o1([0.25, 0.25, 0.25, 0.25])
calc_o1([0, 0, 0, 0])
calc_o1([0.75, 0.75, 0.75, 0.75])

z2: [[1.]
 [1.]] 

a2: [[0.5]
 [0.5]] 

z2: [[0]
 [2]] 

a2: [[0.119202922]
 [0.880797078]] 

z2: [[3.]
 [0.]] 

a2: [[0.9525741268]
 [0.0474258732]] 



In [127]:
def softmax_with_temp(x, t):
    return np.exp(t*x) / np.sum(np.exp(t*x))

softmax_with_temp(np.array([3, 1]), 1)

print(np.exp(5))

148.4131591025766


In [5]:
# Question 2 - LSTM

def sigmoid(x):
    return round(1 / (1 + np.exp(-x)))

def forget_gate(h, x):
    return sigmoid(w_fh * h + w_fx * x + b_f)

def input_gate(h, x):
    return sigmoid(w_ih * h + w_ix * x + b_i)

def output_gate(h, x):
    return sigmoid(w_oh * h + w_ox * x + b_o)

def memory_cell(c_prev, h, x):
    f = forget_gate(h, x)
    i = input_gate(h, x)
    return f * c_prev + i * round(np.tanh(w_ch * h + w_cx * x + b_c))

def hidden_state(c_prev, h, x):
    o = output_gate(h, x)
    c_new = memory_cell(c_prev, h, x)
    h_new = o * round(np.tanh(c_new))
    return c_new, h_new

# Memory cell weights
w_fh = 0
w_ih = 0
w_oh = 0
w_ch = -100

# Input weights
w_fx = 0
w_ix = 100
w_ox = 100
w_cx = 50

# Biases
b_f = -100
b_i = 100
b_o = 0
b_c = 0

def rnn(seq):
    c = 0
    h = 0
    states = []
    for x in seq:
        c, h = hidden_state(c, h, x)
        states += [h]
        #print(f'x: {x}, c: {c}, h: {h}')
    print(f'x: {seq}\nh: {[int(i) for i in states]}')
        
        
rnn([0, 0, 1, 1, 1, 0])
print('')
rnn([1, 1, 0, 1, 1])
print('')
rnn([1, 0, 1, 1, 1])
print('')
rnn([1, 1, 0, 1, 1, 0, 1, 1, 1])
print('')
rnn([1, 0, 0, 1, 1, 0, 1, 1, 1])
print('')
rnn([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])

x: [0, 0, 1, 1, 1, 0]
h: [0, 0, 1, -1, 1, 0]

x: [1, 1, 0, 1, 1]
h: [1, -1, 0, 1, -1]

x: [1, 0, 1, 1, 1]
h: [1, 0, 1, -1, 1]

x: [1, 1, 0, 1, 1, 0, 1, 1, 1]
h: [1, -1, 0, 1, -1, 0, 1, -1, 1]

x: [1, 0, 0, 1, 1, 0, 1, 1, 1]
h: [1, 0, 0, 1, -1, 0, 1, -1, 1]

x: [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
h: [0, 0, 0, 0, 0, 1, -1, 1, -1, 1]


In [13]:
# Forward Propagation

y = 1
x = 3
w_1 = 0.01
w_2 = -5
b = -1

z_1 = w_1 * x
a_1 = np.maximum(0, z_1)
z_2 = w_2 * a_1 + b
a_2 = 1 / (1 + np.exp(-z_2))
loss = 1/2 * (y - a_2)**2
print('loss', round(loss, 3))
print('vars', [z_1, a_1, z_2, a_2])

loss 0.288
vars [0.03, 0.03, -1.15, 0.24048908305088898]


In [18]:
# Backward Propagation

dloss = -(y-a_2)
da2_z2 = a_2 * (1 - a_2)
dloss_z2 = dloss * da2_z2 # chain rule

dz2_a1 = w_2
dz_w2 = a_1
dloss_a1 = dloss_z2 * dz2_a1
dloss_w2 = dloss_z2 * dz_w2

da1_z1 = 1 # relu derivative 1 > 0
dloss_z1 = dloss_a1 * da1_z1

dz1_w1 = x
dloss_w1 = dloss_z1 * dz1_w1

print('Loss wrt w2:', round(dloss_w2, 3))
print('Loss wrt b:', round(dloss_z2, 3))
print('Loss wrt w1:', round(dloss_w1, 3))

Loss wrt w2: -0.004
Loss wrt b: -0.139
Loss wrt w1: 2.081
