In [22]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms

In [24]:
#set device
device='mps' if torch.backends.mps.is_available() else 'cpu'

#hyperparameter

input_size=28
seq_len=28
num_layer=2
hidden_size=256
num_classes=10
learning_rate=0.001
batch_size=64
num_epoch=2

#create RNN
class RNN(nn.Module):
    
    def __init__(self,input_size,hidden_size,num_layer,num_classes=10):
        super(RNN,self).__init__()
        self.hidden_size=hidden_size
        self.num_layer=num_layer
        self.rnn=nn.RNN(input_size,hidden_size,num_layer,batch_first=True)
        self.fc=nn.Linear(hidden_size*seq_len,num_classes)


    def forward(self,x):
        h0=torch.zeros(self.num_layer,x.size(0),self.hidden_size).to(device)
        out,_=self.rnn(x,h0)
        out=out.reshape(out.shape[0],-1)
        out=self.fc(out)
        return out

#init network
model=RNN(input_size,hidden_size,num_layer,num_classes=10).to(device)

#loss and optimizer
loss_fn=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=learning_rate)


#load data

train_dataset=datasets.MNIST(root='datasets/',train=True,transform=transforms.ToTensor(),download=True)
train_loader=DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)

test_dataset=datasets.MNIST(root='datasets/',train=False,transform=transforms.ToTensor(),download=True)
test_loader=DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=True)


#train network
for epoch in range(num_epoch):
    for batch_idx, (data,targets) in enumerate(train_loader):
        #get data to gpu if avilable
        data=data.squeeze(1).to(device)
        targets=targets.to(device)
           
        #forward
        scores=model(data)
        
        loss=loss_fn(scores,targets)
        
        #backword
        optimizer.zero_grad()
        loss.backward()
        
        #GD or adam step
        optimizer.step()

#check accuracy on both train and test set
def check_accuracy(loader,model):
    
    if loader.dataset.train:
        print("checking accuracy on train dataset")
    else:
        print("checking accuravy on test dataset")
    
    num_correct=0
    num_samples=0
    model.eval()
    
    with torch.no_grad():
        for x,y in loader:
            x=x.to(device).squeeze(1)
            y=y.to(device)
            
            scores=model(x)
            _,pred=scores.max(1)
            num_correct+=(pred==y).sum()
            num_samples+=pred.size(0)
        print("Accuracy is :",round(float(num_correct)/float(num_samples)*100,2))
            
    model.train()
    return 

check_accuracy(train_loader,model)   
check_accuracy(test_loader,model) 

checking accuracy on train dataset
Accuracy is : 0.0
checking accuravy on test dataset
Accuracy is : 0.0
