# PyTorch: Learn the Basics

In [2]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [5]:
import torch
import torch.nn as nn

class SimpleRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        """
        Initializes the SimpleRNN model.

        Args:
            input_size (int): The number of expected features in the input `x`.
                              For word embeddings, this would be the embedding dimension.
            hidden_size (int): The number of features in the hidden state `h`.
            output_size (int): The size of the output layer (e.g., number of classes for classification).
            num_layers (int, optional): Number of recurrent layers. Defaults to 1.
        """
        super(SimpleRNN, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # Define the RNN layer
        # batch_first=True means input/output tensors are (batch, seq, feature)
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)

        # Define a linear layer to map the RNN's output to the desired output_size
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        """
        Defines the forward pass of the RNN model.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, sequence_length, input_size).

        Returns:
            torch.Tensor: Output tensor of shape (batch_size, output_size).
        """
        # Initialize hidden state with zeros
        # The hidden state tensor has shape (num_layers * num_directions, batch_size, hidden_size)
        # For a simple RNN, num_directions is 1.
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Pass input through the RNN layer
        # output: (batch_size, sequence_length, hidden_size * num_directions)
        # hn: (num_layers * num_directions, batch_size, hidden_size)
        output, hn = self.rnn(x, h0)

        # We are interested in the output of the last time step for classification/prediction.
        # In batch_first=True mode, output[:, -1, :] gives the last time step's output for all batches.
        # This is equivalent to hn[-1, :, :] if num_layers = 1 and it's a simple RNN.
        # For multi-layered RNNs, hn[-1] is the final hidden state of the last layer.
        final_output = self.fc(output[:, -1, :]) # Taking the output of the last time step

        return final_output

In [4]:
%watermark

Last updated: 2025-07-28T22:57:59.989127+00:00

Python implementation: CPython
Python version       : 3.12.11
IPython version      : 9.4.0

Compiler    : GCC 12.2.0
OS          : Linux
Release     : 6.10.14-linuxkit
Machine     : aarch64
Processor   : 
CPU cores   : 7
Architecture: 64bit



In [12]:
%watermark -iv

torchvision: 0.22.1
torch      : 2.7.1

