In [117]:
import numpy as np

class RNN:
    def __init__(self, input_size, hidden_size, output_size):
        """
        Initialize the RNN model.

        Args:
            input_size (int): The size of the input vector.
            hidden_size (int): The number of units in the hidden layer.
            output_size (int): The size of the output vector.
        """
        # Initialize the RNN parameters
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Initialize the weights and biases for the RNN
        self.Wxh = np.random.normal(size=(input_size, hidden_size))  # Weight matrix for input to hidden
        self.Whh = np.random.normal(size=(hidden_size, hidden_size))   # Weight matrix for hidden to hidden
        self.Why = np.random.normal(size=(hidden_size, output_size))   # Weight matrix for hidden to output
        self.bh = np.zeros(hidden_size)   # Bias vector for hidden
        self.by = np.zeros(output_size)   # Bias vector for output

        # Initialize the hidden state
        self.h = np.zeros(hidden_size)

    def forward(self, inputs):
        """
        Perform forward pass through the RNN model.

        Args:
            inputs (ndarray): Array of input vectors with shape (sequence_length, input_size).

        Returns:
            hiddens (ndarray): Array of hidden states with shape (sequence_length, hidden_size).
            outputs (ndarray): Array of output vectors with shape (sequence_length, output_size).
            p_softmaxs (ndarray): Array of softmax probabilities with shape (sequence_length, output_size).
        """
        hiddens = []
        outputs = []
        p_softmaxs = []
        hidden_i = self.h

        for i in range(len(inputs)):
            hidden_i = np.tanh(np.dot(inputs[i], self.Wxh) + np.dot(hidden_i, self.Whh) + self.bh)
            output_i = np.dot(hidden_i, self.Why) + self.by
            output_exp = np.exp(output_i)
            p_softmaxs_i = output_exp / np.sum(output_exp)
            hiddens.append(hidden_i)
            outputs.append(output_i)
            p_softmaxs.append(p_softmaxs_i)

        return np.array(hiddens), np.array(outputs), np.array(p_softmaxs)


    def backward(self, inputs, targets):
        """
        Perform backward pass through the RNN model.

        Args:
            inputs (ndarray): Array of input vectors with shape (sequence_length, input_size).
            targets (ndarray): Array of target vectors with shape (sequence_length, output_size).

        Returns:
            dWxh (ndarray): Gradient of the weight matrix Wxh.
            dWhh (ndarray): Gradient of the weight matrix Whh.
            dWhy (ndarray): Gradient of the weight matrix Why.
            dbh (ndarray): Gradient of the bias vector bh.
            dby (ndarray): Gradient of the bias vector by.
        """
            
        h, y, p = self.forward(inputs)
        dWxh, dWhh, dWhy = np.zeros_like(self.Wxh), np.zeros_like(self.Whh), np.zeros_like(self.Why)
        dbh, dby = np.zeros_like(self.bh), np.zeros_like(self.by)
        d_h_next = np.zeros_like(h[0])

        for i in reversed(range(len(inputs))):
            dy = p[i] - targets[i]
            dWhy += np.dot(h[i].T, dy)
            dby += dy.sum(axis=0)

            dh = np.dot(dy, self.Why.T) + d_h_next
            a = 1 - h[i] ** 2
            dbh = a
            dWxh += np.dot(inputs[i].T, a)
            dWhh += np.dot(h[i-1].T, a)

            d_h_next = np.dot(a, self.Whh.T)

        return dWxh, dWhh, dWhy, dbh, dby

    def train(self, inputs, targets, num_epochs):
        """
        Train the RNN model on the given inputs and targets.

        Args:
            inputs (list): List of input vectors.
            targets (list): List of target vectors.
            num_epochs (int): Number of training epochs.
        """
        for epoch in range(num_epochs):
            loss = 0
            for i in range(len(inputs)):
                x = inputs[i]
                y_true = targets[i]

                # Forward pass
                h, y, p = self.forward(x)

                # Compute loss
                loss += -np.sum(np.log(p) * y_true)

                # Backward pass
                dWxh, dWhh, dWhy, dbh, dby = self.backward(x, y_true)

                # Update weights and biases
                self.Wxh -= dWxh
                self.Whh -= dWhh
                self.Why -= dWhy
                self.bh -= dbh.sum(axis = 0 )
                self.by -= dby.sum(axis = 0 )

            print(f"Epoch {epoch + 1} Loss: {loss}")

    def sample(self, seed, num_chars):
        """
        Generate a sequence of characters using the trained RNN model.

        Args:
            seed (ndarray): The seed vector for generating the sequence.
            num_chars (int): Number of characters to generate.

        Returns:
            result (list): List of generated characters.
        """
        h = self.h
        x = seed
        result = []
        for _ in range(num_chars):
            h, _, p = self.forward(x)
            p = p.flatten()  # Flatten the probability array
            p /= np.sum(p)  # Normalize the probabilities to sum to 1
            idx = np.random.choice(range(len(p)), p=p)
            x = np.zeros_like(x)
            result.append(idx)

        return result





In [120]:
# Create an instance of the RNN
input_size = 3
hidden_size = 4
output_size = 3
rnn = RNN(input_size, hidden_size, output_size)

# Generate some dummy input and target data
inputs = [np.eye(input_size)[np.random.choice(input_size)] for _ in range(4)]
targets = [np.eye(output_size)[np.random.choice(output_size)] for _ in range(4)]

# Train the RNN
rnn.train(inputs, targets, 6)

# Generate a sample
seed = np.eye(input_size)[np.random.choice(input_size)]
num_chars = 3
sample = rnn.sample(seed, num_chars)


Epoch 1 Loss: 96.05617072760823
Epoch 2 Loss: 123.67677428091359
Epoch 3 Loss: 131.47820300323775
Epoch 4 Loss: 126.75024079816481
Epoch 5 Loss: 89.46563059921911
Epoch 6 Loss: 79.71362920729528


In [121]:
sample

[19, 6, 2]