In [13]:
import torch
import torchvision

In [14]:
train_dataset = torchvision.datasets.MNIST(root='dataset/', train=True, transform=torchvision.transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.MNIST(root='dataset/', train=False, transform=torchvision.transforms.ToTensor(), download=True)

In [15]:
batch_size = 128
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [16]:
images,labels = next(iter(test_loader))
print(images.shape)

torch.Size([128, 1, 28, 28])


In [17]:
labels.shape

torch.Size([128])

In [18]:
labels[0].item()

7

In [19]:
import torch
print(torch.cuda.is_available())
print(torch.__version__)
print(torch.version.cuda)
print(torch.cuda.get_device_name(0))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

True
2.2.1+cu121
12.1
NVIDIA GeForce RTX 3050 6GB Laptop GPU


In [20]:
input_size = 28
sequence_length = 28
num_layers = 2 
hidden_size = 256
num_classes = 10
learning_rate = 0.01

In [21]:
import torch.nn as nn 
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
     
    def forward(self,x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.rnn(x,h0)
        out = out[:, -1, :]
        out = self.fc(out) 
        return out  

In [22]:
net=RNN(input_size, hidden_size, num_layers, num_classes).to(device)

In [23]:
net(images[0].to(device)).max(1)

torch.return_types.max(
values=tensor([0.1119], device='cuda:0', grad_fn=<MaxBackward0>),
indices=tensor([4], device='cuda:0'))

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import DataLoader


input_size = 28       
sequence_length = 28  
hidden_size = 128
num_layers = 1
num_classes = 10
batch_size = 100
learning_rate = 0.001
num_epochs = 5

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


transform = transforms.ToTensor()

train_dataset = MNIST(root='data', train=True, download=True, transform=transform)
test_dataset = MNIST(root='data', train=False, download=True, transform=transform)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


class RNNNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNNNet, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        out, _ = self.rnn(x, h0)  
        out = self.fc(out[:, -1, :])  
        return out

net = RNNNet(input_size, hidden_size, num_layers, num_classes).to(device)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)


for epoch in range(num_epochs):
    net.train()
    train_loss = 0
    train_acc = 0
    
    for images, labels in train_loader:
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)
        
        outputs = net(images)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        train_acc += (predicted == labels).sum().item()
    
    avg_train_loss = train_loss / len(train_loader)
    avg_train_acc = train_acc / len(train_loader.dataset)
    

    net.eval()
    test_loss = 0
    test_acc = 0
    
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.reshape(-1, sequence_length, input_size).to(device)
            labels = labels.to(device)
            
            outputs = net(images)
            loss = criterion(outputs, labels)
            
            test_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            test_acc += (predicted == labels).sum().item()
    
    avg_test_loss = test_loss / len(test_loader)
    avg_test_acc = test_acc / len(test_loader.dataset)
    
    print(f'Epoch [{epoch+1}/{num_epochs}], '
          f'Train Acc: {avg_train_acc*100:.2f}%, Train Loss: {avg_train_loss:.4f}, '
          f'Test Acc: {avg_test_acc*100:.2f}%, Test Loss: {avg_test_loss:.4f}')

Epoch [1/5], Train Acc: 72.05%, Train Loss: 0.8514, Test Acc: 84.95%, Test Loss: 0.5011
Epoch [2/5], Train Acc: 88.85%, Train Loss: 0.3801, Test Acc: 92.13%, Test Loss: 0.2761
Epoch [3/5], Train Acc: 92.36%, Train Loss: 0.2643, Test Acc: 94.54%, Test Loss: 0.1936
Epoch [4/5], Train Acc: 93.81%, Train Loss: 0.2152, Test Acc: 94.40%, Test Loss: 0.1956
Epoch [5/5], Train Acc: 94.72%, Train Loss: 0.1865, Test Acc: 94.64%, Test Loss: 0.1924
