In [None]:
#Cell 1: Import Libraries
import numpy as np

In [None]:
#Cell 2: Define the Custom Tensor Class
class NeuralTensor:
    """
    A custom tensor class that supports automatic differentiation.
    """
    def __init__(self, data, requires_grad=False, creators=None, op_name=None, tensor_id=None):
        self.data = np.array(data)
        self.requires_grad = requires_grad
        self.gradient = None
        self.tensor_id = np.random.randint(0, 100000) if tensor_id is None else tensor_id
        self.creators = creators
        self.op_name = op_name
        self.children = {}

        if creators is not None:
            for c in creators:
                if self.tensor_id not in c.children:
                    c.children[self.tensor_id] = 1
                else:
                    c.children[self.tensor_id] += 1

    def backward(self, grad=None, grad_origin=None):
        if self.requires_grad:
            if grad is None:
                grad = NeuralTensor(np.ones_like(self.data))

            if grad_origin is not None:
                if self.children[grad_origin.tensor_id] == 0:
                    raise Exception("Cannot backprop more than once")
                else:
                    self.children[grad_origin.tensor_id] -= 1

            if self.gradient is None:
                self.gradient = grad
            else:
                self.gradient += grad

            if self.creators is not None and (self._all_children_grads_accounted_for() or grad_origin is None):
                if self.op_name == "add":
                    self.creators[0].backward(self.gradient, self)
                    self.creators[1].backward(self.gradient, self)
                # Additional operations like 'sub', 'mul' etc. can be added here

    def _all_children_grads_accounted_for(self):
        for id, cnt in self.children.items():
            if cnt != 0:
                return False
        return True

    # Define other tensor operations (__add__, __sub__, __mul__, etc.) here


In [None]:
#Cell 3: Define Neural Network Layers
class Layer:
    """
    Base class for all layers in the network.
    """
    def __init__(self):
        # Initialize the list of parameters for the layer
        self.parameters = []

    def forward(self, input):
        """
        Computes the forward pass of the layer.
        :param input: Input data to the layer
        :return: Layer output
        """
        # To be implemented by subclasses
        raise NotImplementedError

    def backward(self, grad):
        """
        Computes the backward pass of the layer.
        :param grad: Gradient of the loss with respect to the output of the layer
        :return: Gradient of the loss with respect to the input of the layer
        """
        # To be implemented by subclasses
        raise NotImplementedError

    def get_parameters(self):
        """
        Returns the parameters of the layer.
        :return: List of parameters
        """
        return self.parameters


class LinearLayer(Layer):
    """
    A fully connected neural network layer.
    """

    def __init__(self, n_inputs, n_outputs):
        """
        Initializes weights and biases for the linear layer.
        :param n_inputs: Number of input features.
        :param n_outputs: Number of output features.
        """
        super().__init__()
        self.weights = NeuralTensor(np.random.randn(n_inputs, n_outputs) * np.sqrt(2. / n_inputs), requires_grad=True)
        self.bias = NeuralTensor(np.zeros(n_outputs), requires_grad=True)
        self.parameters.append(self.weights)
        self.parameters.append(self.bias)

    def forward(self, input):
        """
        Forward pass of the linear layer.
        :param input: Input tensor.
        :return: Output tensor of the linear transformation.
        """
        return input.mm(self.weights) + self.bias

    def backward(self, grad):
        """
        Backward pass of the linear layer is not explicitly defined here,
        as the NeuralTensor class handles automatic differentiation.
        """
        pass  # The backward pass is handled automatically by the NeuralTensor class.


class SigmoidLayer(Layer):
    """
    A layer that applies the sigmoid activation function.
    """
    
    def forward(self, input):
        """
        Forward pass for sigmoid activation: sigmoid(x) = 1 / (1 + exp(-x))
        
        :param input: Input tensor for the layer.
        :return: Output tensor after applying sigmoid activation.
        """
        self.input = input
        return 1 / (1 + np.exp(-input))
    
    def backward(self, grad_output):
        """
        Backward pass for the sigmoid activation.
        
        :param grad_output: Gradient of the loss function with respect to the output of this layer.
        :return: Gradient of the loss function with respect to the input of this layer.
        """
        sigmoid = 1 / (1 + np.exp(-self.input))
        return grad_output * sigmoid * (1 - sigmoid)


class TanhLayer(Layer):
    """
    A layer that applies the tanh activation function.
    """
    
    def forward(self, input):
        """
        Computes the forward pass using the tanh function.

        :param input: Input tensor for the layer.
        :return: Output tensor after applying the tanh function.
        """
        self.input = input
        return np.tanh(input)
    
    def backward(self, grad_output):
        """
        Computes the backward pass of the tanh function.

        :param grad_output: Gradient of the loss function with respect to the output of this layer.
        :return: Gradient of the loss function with respect to the input of this layer.
        """
        tanh_grad = 1 - np.tanh(self.input) ** 2
        return grad_output * tanh_grad



In [None]:
#Cell 4: Define RNN Layer
class RNNLayer(Layer):
    """
    A layer in a Recurrent Neural Network.
    """
    def __init__(self, input_size, hidden_size, output_size, activation='sigmoid'):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.activation = SigmoidLayer() if activation == 'sigmoid' else TanhLayer()

        self.input_hidden_layer = LinearLayer(input_size, hidden_size)
        self.hidden_hidden_layer = LinearLayer(hidden_size, hidden_size)
        self.hidden_output_layer = LinearLayer(hidden_size, output_size)

        self.parameters = (self.input_hidden_layer.get_parameters() +
                           self.hidden_hidden_layer.get_parameters() +
                           self.hidden_output_layer.get_parameters())

    def forward(self, input_tensor, hidden_tensor):
        from_prev_hidden = self.hidden_hidden_layer.forward(hidden_tensor)
        combined = self.input_hidden_layer.forward(input_tensor) + from_prev_hidden
        new_hidden = self.activation.forward(combined)
        output = self.hidden_output_layer.forward(new_hidden)
        return output, new_hidden

    def init_hidden_state(self, batch_size=1):
        return NeuralTensor(np.zeros((batch_size, self.hidden_size)), requires_grad=True)

In [None]:
#Cell 5: Define Additional Components
class EmbeddingLayer(Layer):
    """
    An embedding layer to map input indices to dense vectors.
    """

    def __init__(self, vocab_size, embedding_dim):
        """
        Initializes the EmbeddingLayer with random weights.

        :param vocab_size: The size of the vocabulary.
        :param embedding_dim: The dimensionality of the embeddings.
        """
        super().__init__()
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim

        # Initialize the embeddings
        self.weights = Tensor((np.random.rand(vocab_size, embedding_dim) - 0.5) / embedding_dim, autograd=True)
        self.parameters.append(self.weights)

    def forward(self, input):
        """
        Forward pass of the embedding layer. Maps input indices to embeddings.

        :param input: A batch of indices with shape (batch_size,).
        :return: The corresponding embeddings with shape (batch_size, embedding_dim).
        """
        return self.weights.index_select(input)

    def backward(self, grad_output):
        """
        Backward pass of the embedding layer. Updates gradients for embeddings.

        :param grad_output: The gradient of the loss with respect to the output of the embedding layer.
        """
        self.weights.backward(grad_output)


class CrossEntropyLoss:
    """
    A class to compute the cross entropy loss.
    """
    
    def forward(self, input, target):
        """
        Forward pass for computing the cross entropy loss.

        :param input: Predictions from the model, shape (batch_size, num_classes).
        :param target: Ground truth labels, shape (batch_size,).
        :return: The computed cross entropy loss.
        """
        self.input = input
        self.target = target
        self.softmax_output = self._softmax(input)
        self.log_likelihood = -np.log(self.softmax_output[range(target.shape[0]), target])
        loss = np.sum(self.log_likelihood) / input.shape[0]
        return loss

    def backward(self):
        """
        Backward pass for computing the gradient of the cross entropy loss 
        with respect to the input.

        :return: The gradients with respect to the input.
        """
        dx = self.softmax_output
        dx[range(self.target.shape[0]), self.target] -= 1
        dx = dx / self.target.shape[0]
        return dx

    def _softmax(self, x):
        """
        Private method to compute softmax values for each set of scores in x.

        :param x: Input array.
        :return: Softmax output array.
        """
        exps = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exps / np.sum(exps, axis=1, keepdims=True)


class SGD:
    """
    Stochastic Gradient Descent (SGD) optimizer.
    """

    def __init__(self, parameters, alpha=0.01):
        """
        Initializes the SGD optimizer.

        :param parameters: A list of parameters to optimize.
        :param alpha: The learning rate.
        """
        self.parameters = parameters
        self.alpha = alpha

    def zero(self):
        """
        Resets the gradients of all parameters to zero.
        """
        for p in self.parameters:
            p.grad.data *= 0

    def step(self, zero=True):
        """
        Performs a single optimization step.

        :param zero: If True, resets gradients to zero after the step.
        """
        for p in self.parameters:
            p.data -= p.grad.data * self.alpha
            
            if zero:
                p.grad.data *= 0



In [None]:
#Cell 6: Define Preprocessing Function
def preprocess_shakespeare_text(file_path):
    """
    Preprocesses the Shakespeare text dataset.
    """
    with open(file_path, 'r') as file:
        raw_text = file.read()

    vocab = list(set(raw_text))
    char_to_index = {char: i for i, char in enumerate(vocab)}
    index_to_char = {i: char for i, char in enumerate(vocab)}
    indexed_data = np.array([char_to_index[char] for char in raw_text])

    return raw_text, vocab, char_to_index, index_to_char, indexed_data


In [None]:
#Cell 7: Define Training Function
def train_rnn_model(model, data, epochs=10, batch_size=32, sequence_length=100, learning_rate=0.1):
    """
    Trains the RNN model.
    """
    criterion = CrossEntropyLoss()
    optimizer = SGD(model.get_parameters(), alpha=learning_rate)

    for epoch in range(epochs):
        total_loss = 0
        n_loss = 0

        hidden = model.init_hidden_state(batch_size)
        for batch_i in range(0, data.size(0) - sequence_length, sequence_length):
            optimizer.zero()

            hidden = NeuralTensor(hidden.data, requires_grad=True)
            loss = None

            for t in range(sequence_length):
                input = NeuralTensor(data[batch_i:batch_i + sequence_length], requires_grad=True)
                rnn_input = embed.forward(input=input)
                output, hidden = model.forward(input=rnn_input, hidden=hidden)

                target = NeuralTensor(data[batch_i + 1:batch_i + sequence_length + 1], requires_grad=True)
                batch_loss = criterion.forward(output, target)
                if loss is None:
                    loss = batch_loss
                else:
                    loss += batch_loss

            loss.backward()
            optimizer.step()
            total_loss += loss.data

        print(f'Epoch {epoch}, Loss: {total_loss}')


In [None]:
#Cell 8: Load Data and Initialize Model
file_path = 'tinyshakespeare.txt'
raw_text, vocab, char_to_index, index_to_char, indexed_data = preprocess_shakespeare_text(file_path)

embed = EmbeddingLayer(vocab_size=len(vocab), dim=512)
model = RNNLayer(input_size=512, hidden_size=512, output_size=len(vocab))


In [None]:
#Cell 9: Train the Model
train_rnn_model(model, indexed_data, epochs=40, batch_size=32, sequence_length=100, learning_rate=0.05)
