In [1]:
import numpy as np 
from scipy.special import expit # sigmoid

In [2]:
# LSTM Step By Step Example
# 1. Define the activation functions
#.   1.1. Sigmoid (expit(x) = 1/(1+exp(-x)))
#    1.2. Tanh.  ()


In [3]:
# Sigmoid function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Hyperbolic tangent (tanh) function
x = np.array([-2, -1, 0, 1, 2])
sigmoid_values = sigmoid(x)
tanh_values = np.tanh(x)

print("Sigmoid:", sigmoid_values)
print("Tanh:", tanh_values)

Sigmoid: [0.11920292 0.26894142 0.5        0.73105858 0.88079708]
Tanh: [-0.96402758 -0.76159416  0.          0.76159416  0.96402758]


In [4]:
x = np.array([-2, -1, 0, 1, 2])
sigmoid_values = expit(x)  # Equivalent to sigmoid(x)

print("Sigmoid (using SciPy):", sigmoid_values)

Sigmoid (using SciPy): [0.11920292 0.26894142 0.5        0.73105858 0.88079708]


In [6]:
# 2. Set the input size, the hidden size the scale factor
input_size = 3
hidden_size = 2
scale=0.01

In [8]:
# 2. Initialize weights and biases
np.random.seed(1010)


# Forget gate weights and bias
Wf = np.random.randn(hidden_size, input_size + hidden_size) * scale
bf = np.zeros((hidden_size, 1))

# Input gate weights and bias
Wi = np.random.randn(hidden_size, input_size + hidden_size) * scale
bi = np.zeros((hidden_size, 1))

# Candidate weights and bias
Wc = np.random.randn(hidden_size, input_size + hidden_size) * scale
bc = np.zeros((hidden_size, 1))

# Output gate weights and bias
Wo = np.random.randn(hidden_size, input_size + hidden_size) * scale
bo = np.zeros((hidden_size, 1))

print("*"*72)
print("The forget Gate:\n")
print(f"The forget gate:\nThe weights:\n{Wf}\nThe bias:\n{bf}")
print(f"The input gate:\nThe weights:\n{Wi}\nThe bias:\n{bi}")
print(f"The candidate gate:\nThe weights:\n{Wc}\nThe bias:\n{bc}")
print(f"The output gate:\nThe weights:\n{Wo}\nThe bias:\n{bo}")
print("*"*72)

************************************************************************
The forget Gate:

The forget gate:
The weights:
[[-0.01175448 -0.00383148 -0.01471366 -0.01800569  0.001301  ]
 [ 0.01595619  0.00993161 -0.02363707 -0.00479592 -0.01650382]]
The bias:
[[0.]
 [0.]]
The input gate:
The weights:
[[-0.0054349   0.00779611 -0.00502609  0.0028589   0.02703237]
 [-0.00074517 -0.01370103  0.00358587  0.0059805  -0.00306799]]
The bias:
[[0.]
 [0.]]
The candidate gate:
The weights:
[[ 0.00762969 -0.00998878  0.01009264 -0.00247785 -0.02583517]
 [ 0.00757221 -0.01776461 -0.00779696 -0.00918535 -0.00394989]]
The bias:
[[0.]
 [0.]]
The output gate:
The weights:
[[ 0.00248006  0.00298038  0.00283817 -0.00471223  0.00952028]
 [-0.00638603 -0.01260901 -0.00558495  0.00172743 -0.00055948]]
The bias:
[[0.]
 [0.]]
************************************************************************


In [9]:
# Create sample input
x_t = np.array([[0.5], [0.8], [0.2]])  # Input vector (3x1)
h_prev = np.array([[0.3], [0.4]])      # Previous hidden state (2x1)
c_prev = np.array([[0.2], [0.3]])      # Previous cell state (2x1)

print(f"The input vector\n {x_t}")
print(f"The previous hidden state:\n {h_prev}")
print(f"The previous cell state:\n {c_prev}")

The input vector
 [[0.5]
 [0.8]
 [0.2]]
The previous hidden state:
 [[0.3]
 [0.4]]
The previous cell state:
 [[0.2]
 [0.3]]


In [10]:
# 4. Combine the inputs
combined_input = np.vstack((h_prev, x_t))
print(f"The combined input:\n {combined_input}")

The combined input:
 [[0.3]
 [0.4]
 [0.5]
 [0.8]
 [0.2]]


In [11]:
# 5. Compute forget gate activation
print(Wf.shape)
print(combined_input.shape)

# compute and then activate the forget gate
f_t = expit(np.dot(Wf, combined_input) + bf)

print(f_t.shape)
print(f"The activated forget gate:\n {f_t}")

(2, 5)
(5, 1)
(2, 1)
The activated forget gate:
 [[0.49336036]
 [0.49745089]]


In [12]:
# Compute input gate activation and candidate values

i_t = sigmoid(np.dot(Wi, combined_input) + bi)
c_tilde = np.tanh(np.dot(Wc, combined_input) + bc)
print(f"The input gate:\n {i_t}")
print(f"The c_tilde gate:\n {c_tilde}")

The input gate:
 [[0.50166712]
 [0.50006494]]
The c_tilde gate:
 [[-0.00380958]
 [-0.01686931]]


In [13]:
# Compute output gate activation

o_t = sigmoid(np.dot(Wo, combined_input) + bo)
print(f"The output gate:\n {o_t}")

The output gate:
 [[0.50037238]
 [0.49787955]]


In [15]:
# Update cell state using forget and input gates
#     f_t: Forget gate output
#     c_prev: Previous cell state
#     i_t: Input gate output
#     c_tilde: Candidate values

c_t =  f_t * c_prev + i_t * c_tilde
print(c_t)

[[0.09676093]
 [0.14079951]]


In [16]:
# Compute hidden state
h_t = o_t * np.tanh(c_t)
print(f"The hidden state:\n {h_t}")

The hidden state:
 [[0.04826596]
 [0.0696416 ]]
