In [8]:
import numpy as np



In [13]:
# Redefine the environment after reset
np.set_printoptions(precision=4, suppress=True, linewidth=200)


# Dimensions
d = 1  # Input size (1 feature)
h = 2  # Hidden size (2 units) -> this is abstract representation of the sequence, bigger dimension means more complex patterns can be learned
batch_size = 1  # Batch size (1 sequence)

# Initialize weights and biases
np.random.seed(0)
W = np.random.randn(d, 4 * h)  # Input weights [d, 4h]
U = np.random.randn(h, 4 * h)  # Recurrent weights [h, 4h]
b = np.random.randn(1, 4 * h)  # Bias [1, 4h]

print("Initializing weights and biases...")
print("W: ", W.flatten())
print("U: ", U.flatten())
print("b: ", b.flatten())

# Initialize hidden state and cell state
h_t = np.zeros((batch_size, h))  # Hidden state [batch, h]
C_t = np.zeros((batch_size, h))  # Cell state [batch, h]

# Define single sequence with 1 feature (e.g., [2, 4, 6])
X = np.array([[[2], [4], [6]]])  # Shape: [batch, timesteps, features]

# Detailed forward pass with print statements for each step
print("h_t before LSTM Forward Pass: ", h_t)
print("C_t before LSTM Forward Pass: ", C_t)

print("--------------------------------")

print("Starting LSTM Forward Pass...\n")

for t in range(X.shape[1]):  # Loop through each timestep
    print(f"Time step {t + 1}, Input: {X[:, t, :].flatten()}")
    
    x_t = X[:, t, :]  # Input at timestep t [batch, d]

    print("x_t * W: ", x_t, " * ", W, " = ", np.dot(x_t, W))
    print("h_t * U: ", h_t, " * ", U, " = ", np.dot(h_t, U))


    gates = np.dot(x_t, W) + np.dot(h_t, U) + b  # [batch, 4h]
    print(f"  Gates raw values: {gates.flatten()}")
    
    print("--------------------------------")

    print("Input to F gate  ", gates[:, :h])

    f_t = 1 / (1 + np.exp(-gates[:, :h]))  # Forget gate [batch, h]
    print(f"  Forget Gate (f_t): {f_t.flatten()}")

    i_t = 1 / (1 + np.exp(-gates[:, h:2 * h]))  # Input gate [batch, h]
    print(f"  Input Gate (i_t): {i_t.flatten()}")

    o_t = 1 / (1 + np.exp(-gates[:, 2 * h:3 * h]))  # Output gate [batch, h]
    print(f"  Output Gate (o_t): {o_t.flatten()}")

    C_t_candidate = np.tanh(gates[:, 3 * h:])  # Candidate cell state [batch, h]
    print(f"  Candidate Cell State (C~_t): {C_t_candidate.flatten()}")

    # Update cell state
    C_t = f_t * C_t + i_t * C_t_candidate  # [batch, h]
    print(f"  Updated Cell State (C_t): {C_t.flatten()}")

    # Update hidden state
    h_t = o_t * np.tanh(C_t)  # [batch, h]
    print(f"  Updated Hidden State (h_t): {h_t.flatten()}\n")

print("LSTM Forward Pass Completed.")


Initializing weights and biases...
W:  [ 1.7641  0.4002  0.9787  2.2409  1.8676 -0.9773  0.9501 -0.1514]
U:  [-0.1032  0.4106  0.144   1.4543  0.761   0.1217  0.4439  0.3337  1.4941 -0.2052  0.3131 -0.8541 -2.553   0.6536  0.8644 -0.7422]
b:  [ 2.2698 -1.4544  0.0458 -0.1872  1.5328  1.4694  0.1549  0.3782]
h_t before LSTM Forward Pass:  [[0. 0.]]
C_t before LSTM Forward Pass:  [[0. 0.]]
--------------------------------
Starting LSTM Forward Pass...

Time step 1, Input: [2]
x_t * W:  [[2]]  *  [[ 1.7641  0.4002  0.9787  2.2409  1.8676 -0.9773  0.9501 -0.1514]]  =  [[ 3.5281  0.8003  1.9575  4.4818  3.7351 -1.9546  1.9002 -0.3027]]
h_t * U:  [[0. 0.]]  *  [[-0.1032  0.4106  0.144   1.4543  0.761   0.1217  0.4439  0.3337]
 [ 1.4941 -0.2052  0.3131 -0.8541 -2.553   0.6536  0.8644 -0.7422]]  =  [[0. 0. 0. 0. 0. 0. 0. 0.]]
  Gates raw values: [ 5.7979 -0.6541  2.0032  4.2946  5.2679 -0.4852  2.0551  0.0754]
--------------------------------
Input to F gate   [[ 5.7979 -0.6541]]
  Forget Gate

In [2]:
# Define single sequence with 1 feature (e.g., [2, 4, 6])
X = np.array([[[2], [4], [6]]])  # Shape: [batch, timesteps, features]


In [14]:
X

array([[[2],
        [4],
        [6]]])

In [15]:
# Dimensions
d = 1  # Input size (1 feature)
h = 2  # Hidden size (2 units)
batch_size = 1  # Batch size (1 sequence)

In [16]:
# Initialize weights and biases
np.random.seed(0)
W = np.random.randn(d, 4 * h)  # Input weights [d, 4h]
U = np.random.randn(h, 4 * h)  # Recurrent weights [h, 4h]
b = np.random.randn(1, 4 * h)  # Bias [1, 4h]

In [17]:
# Initialize hidden state and cell state
h_t = np.zeros((batch_size, h))  # Hidden state [batch, h]
C_t = np.zeros((batch_size, h))  # Cell state [batch, h]

In [19]:
results = []
for t in range(X.shape[1]):  # Loop through each timestep
    x_t = X[:, t, :]  # Input at timestep t [batch, d]
    print("At timestep", t, "x_t is", x_t)
    # Compute gates
    gates = np.dot(x_t, W) + np.dot(h_t, U) + b  # [batch, 4h]
    print("At timestep", t, "gates are", gates)
    f_t = 1 / (1 + np.exp(-gates[:, :h]))  # Forget gate [batch, h]
    i_t = 1 / (1 + np.exp(-gates[:, h:2 * h]))  # Input gate [batch, h]
    o_t = 1 / (1 + np.exp(-gates[:, 2 * h:3 * h]))  # Output gate [batch, h]
    C_t_candidate = np.tanh(gates[:, 3 * h:])  # Candidate cell state [batch, h]

    # Update cell state
    C_t = f_t * C_t + i_t * C_t_candidate  # [batch, h]

    # Update hidden state
    h_t = o_t * np.tanh(C_t)  # [batch, h]

    # Store results for this timestep
    results.append({
        "x_t": x_t.flatten(),
        "f_t": f_t.flatten(),
        "i_t": i_t.flatten(),
        "o_t": o_t.flatten(),
        "C_t_candidate": C_t_candidate.flatten(),
        "C_t": C_t.flatten(),
        "h_t": h_t.flatten(),
    })

# Display results for each timestep
results

At timestep 0 x_t is [[2]]
At timestep 0 gates are [[ 5.69150442 -0.24575738  2.14547783  5.74104175  6.03025755 -0.366045
   2.49369845  0.40873171]]
At timestep 1 x_t is [[4]]
At timestep 1 gates are [[ 9.40280444  0.53070912  4.14188673 10.12270543  9.4542689  -2.23990763
   4.50152825  0.01594867]]
At timestep 2 x_t is [[6]]
At timestep 2 gates are [[12.78014531  1.3530555   6.06832762 14.69535743 13.44896222 -4.25985873
   6.31616641 -0.2109283 ]]


[{'x_t': array([2]),
  'f_t': array([0.99663684, 0.43886803]),
  'i_t': array([0.89524544, 0.99679886]),
  'o_t': array([0.9976009 , 0.40949703]),
  'C_t_candidate': array([0.98644568, 0.38739526]),
  'C_t': array([3.70666547, 0.30260307]),
  'h_t': array([0.9963982, 0.1202664])},
 {'x_t': array([4]),
  'f_t': array([0.99991751, 0.62964849]),
  'i_t': array([0.98435579, 0.99995984]),
  'o_t': array([0.99992165, 0.09622357]),
  'C_t_candidate': array([0.99975396, 0.01594732]),
  'C_t': array([4.69047333, 0.20648024]),
  'h_t': array([0.99975305, 0.01959065])},
 {'x_t': array([6]),
  'f_t': array([0.99999718, 0.79462872]),
  'i_t': array([0.99769031, 0.99999959]),
  'o_t': array([0.99999856, 0.01392758]),
  'C_t_candidate': array([ 0.99999347, -0.20785486]),
  'C_t': array([ 5.68814392, -0.04377964]),
  'h_t': array([ 9.99975629e-01, -6.09355263e-04])}]