In [1]:
import math

# Activation functions
def sigmoid(x):
    return 1 / (1 + math.exp(-x))

def tanh(x):
    return math.tanh(x)

# Initialize weights and biases
Wf, Whf, bf = 0.5, 0.1, 0  # Forget gate
Wi, Whi, bi = 0.6, 0.2, 0  # Input gate
Wc, Whc, bc = 0.7, 0.3, 0  # Candidate gate
Wo, Who, bo = 0.8, 0.4, 0  # Output gate
Wy, by = 4, 0  # Output layer

# Initialize states
h = 0  # Initial hidden state
C = 0  # Initial cell state

# Input sequence
X = [1, 2, 3]  # Input sequence

# Step-by-step LSTM computations
for t, x in enumerate(X):
    print(f"Time step {t+1}: Input = {x}")

    # Forget gate
    ft = sigmoid(Wf * x + Whf * h + bf)

    # Input gate
    it = sigmoid(Wi * x + Whi * h + bi)

    # Candidate memory
    Ct_tilde = tanh(Wc * x + Whc * h + bc)

    # Cell state update
    C = ft * C + it * Ct_tilde

    # Output gate
    ot = sigmoid(Wo * x + Who * h + bo)

    # Hidden state update
    h = ot * tanh(C)

    # Print out all the gate values and states at this step
    print(f"  Forget gate: f_t = {ft:.4f}")
    print(f"  Input gate: i_t = {it:.4f}")
    print(f"  Candidate memory: Ct_tilde = {Ct_tilde:.4f}")
    print(f"  Cell state: C_t = {C:.4f}")
    print(f"  Output gate: o_t = {ot:.4f}")
    print(f"  Hidden state: h_t = {h:.4f}\n")

# Final output
y = Wy * h + by
print(f"Final output y: {y:.4f}")


Time step 1: Input = 1
  Forget gate: f_t = 0.6225
  Input gate: i_t = 0.6457
  Candidate memory: Ct_tilde = 0.6044
  Cell state: C_t = 0.3902
  Output gate: o_t = 0.6900
  Hidden state: h_t = 0.2564

Time step 2: Input = 2
  Forget gate: f_t = 0.7361
  Input gate: i_t = 0.7775
  Candidate memory: Ct_tilde = 0.9009
  Cell state: C_t = 0.9877
  Output gate: o_t = 0.8459
  Hidden state: h_t = 0.6398

Time step 3: Input = 3
  Forget gate: f_t = 0.8269
  Input gate: i_t = 0.8730
  Candidate memory: Ct_tilde = 0.9798
  Cell state: C_t = 1.6721
  Output gate: o_t = 0.9344
  Hidden state: h_t = 0.8707

Final output y: 3.4827
