![Simple LSTM](./assets/images/simple_lstm.png)

Q19

In [1]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def calc_input_gate(x_t, h_prev, W_xi, W_hi):
    """
    Calculate input gate output for LSTM
    
    Args:
        x_t (np.array): Input vector at time t
        h_prev (np.array): Previous hidden state
        W_xi (np.array): Input weights for input gate
        W_hi (np.array): Hidden weights for input gate
    
    Returns:
        np.array: Input gate output after sigmoid
    """
    input_transform = np.dot(W_xi, x_t)
    hidden_transform = np.dot(W_hi, h_prev)
    gate_input = input_transform + hidden_transform
    
    return sigmoid(gate_input)


W_xi = np.array([[-1.0, 0.0, 0.2],
                 [-1.0, 0.0, -0.2]])

W_hi = np.array([[0.0, 1.0],
                 [0.0, -1.0]])

x_t = np.array([1.0, 2.0, 3.0])
h_prev = np.array([0.0, 0.0])

i_t = calc_input_gate(x_t, h_prev, W_xi, W_hi)
print("Input gate output (i_t):", i_t)

Input gate output (i_t): [0.40131234 0.16798161]


Q20

In [2]:
Wxo = [[1.0, 1.0, 0.4],
       [1.0, -1.0, -0.4]]
Who = [[0.0, 1.0],
       [0.0, -1.0]]
xt = [1.0, 2.0, 3.0]
h_prev = [0.0, 0.0]

def calc_output_gate(x_t, h_prev, W_xo, W_ho):
    """
    Calculate output gate for LSTM
    
    Args:
        x_t (np.array): Input vector at time t
        h_prev (np.array): Previous hidden state
        W_xo (np.array): Input weights for output gate
        W_ho (np.array): Hidden weights for output gate
    
    Returns:
        np.array: Output gate after sigmoid
    """
    input_transform = np.dot(W_xo, x_t)
    hidden_transform = np.dot(W_ho, h_prev)
    gate_input = input_transform + hidden_transform
    return sigmoid(gate_input)


W_xo = np.array([[1.0, 1.0, 0.4],
                 [1.0, -1.0, -0.4]])

W_ho = np.array([[0.0, 1.0],
                 [0.0, -1.0]])

x_t = np.array([1.0, 2.0, 3.0])
h_prev = np.array([0.0, 0.0])

o_t = calc_output_gate(x_t, h_prev, W_xo, W_ho)
print("Output gate (o_t):", o_t)

Output gate (o_t): [0.98522597 0.09975049]


Q21

In [4]:
def calc_forget_gate(x_t, h_prev, W_xf, W_hf):
    input_transform = np.dot(W_xf, x_t)
    hidden_transform = np.dot(W_hf, h_prev)
    gate_input = input_transform + hidden_transform
    return sigmoid(gate_input)

def calc_input_gate(x_t, h_prev, W_xi, W_hi):
    input_transform = np.dot(W_xi, x_t)
    hidden_transform = np.dot(W_hi, h_prev)
    gate_input = input_transform + hidden_transform
    return sigmoid(gate_input)

def calc_candidate_state(x_t, h_prev, W_xc, W_hc):
    input_transform = np.dot(W_xc, x_t)
    hidden_transform = np.dot(W_hc, h_prev)
    gate_input = input_transform + hidden_transform
    return np.tanh(gate_input)

def calc_cell_state(f_t, c_prev, i_t, c_tilde):
    return f_t * c_prev + i_t * c_tilde


x_t = np.array([1.0, 2.0, 3.0])
h_prev = np.array([0.0, 0.0])
c_prev = np.array([0.0, 0.0])

W_xf = np.array([[1.0, 0.0, 0.1],
                 [1.0, 0.0, -0.1]])
W_hf = np.array([[0.0, 1.0],
                 [0.0, -1.0]])

W_xi = np.array([[-1.0, 0.0, 0.2],
                 [-1.0, 0.0, -0.2]])
W_hi = np.array([[0.0, 1.0],
                 [0.0, -1.0]])

W_xc = np.array([[1.0, 0.0, -0.3],
                 [1.0, 0.0, -0.3]])
W_hc = np.array([[0.0, 1.0],
                 [0.0, -1.0]])

# Calculate all components
f_t = calc_forget_gate(x_t, h_prev, W_xf, W_hf)
i_t = calc_input_gate(x_t, h_prev, W_xi, W_hi)
c_tilde = calc_candidate_state(x_t, h_prev, W_xc, W_hc)
c_t = calc_cell_state(f_t, c_prev, i_t, c_tilde)

print("Forget gate (f_t):", f_t)
print("Input gate (i_t):", i_t)
print("Candidate state (c_tilde):", c_tilde)
print("Cell state (c_t):", c_t)

Forget gate (f_t): [0.78583498 0.66818777]
Input gate (i_t): [0.40131234 0.16798161]
Candidate state (c_tilde): [0.09966799 0.09966799]
Cell state (c_t): [0.039998   0.01674239]


Q22

In [6]:
def calc_hidden_state(o_t, c_t):
    """
    Calculate new hidden state for LSTM
    
    Args:
        o_t (np.array): Output gate
        c_t (np.array): Current cell state
    
    Returns:
        np.array: New hidden state
    """
    return o_t * np.tanh(c_t)


# Previous calculations
x_t = np.array([1.0, 2.0, 3.0])
h_prev = np.array([0.0, 0.0])
c_prev = np.array([0.0, 0.0])

# Calculate output gate
W_xo = np.array([[1.0, 1.0, 0.4],
                 [1.0, -1.0, -0.4]])
W_ho = np.array([[0.0, 1.0],
                 [0.0, -1.0]])
o_t = sigmoid(np.dot(W_xo, x_t) + np.dot(W_ho, h_prev))

# Calculate cell state components
W_xf = np.array([[1.0, 0.0, 0.1],
                 [1.0, 0.0, -0.1]])
W_hf = np.array([[0.0, 1.0],
                 [0.0, -1.0]])
f_t = sigmoid(np.dot(W_xf, x_t) + np.dot(W_hf, h_prev))

W_xi = np.array([[-1.0, 0.0, 0.2],
                 [-1.0, 0.0, -0.2]])
W_hi = np.array([[0.0, 1.0],
                 [0.0, -1.0]])
i_t = sigmoid(np.dot(W_xi, x_t) + np.dot(W_hi, h_prev))

W_xc = np.array([[1.0, 0.0, -0.3],
                 [1.0, 0.0, -0.3]])
W_hc = np.array([[0.0, 1.0],
                 [0.0, -1.0]])
c_tilde = np.tanh(np.dot(W_xc, x_t) + np.dot(W_hc, h_prev))

# Calculate cell state
c_t = f_t * c_prev + i_t * c_tilde

# Calculate final hidden state
h_t = calc_hidden_state(o_t, c_t)

print("Output gate (o_t):", o_t)
print("Cell state (c_t):", c_t)
print("tanh(c_t):", np.tanh(c_t))
print("Hidden state (h_t):", h_t)

Output gate (o_t): [0.98522597 0.09975049]
Cell state (c_t): [0.039998   0.01674239]
tanh(c_t): [0.03997668 0.01674083]
Hidden state (h_t): [0.03938606 0.00166991]
