<a href="https://colab.research.google.com/github/Tongxi-Hu/deep-learning/blob/main/misc/mnist_with_different_modelling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms

### 1-linear regression

In [None]:
class NN(nn.Module):
    def __init__(self,input_size,num_classes) -> None:
        super(NN,self).__init__()
        self.fc1=nn.Linear(input_size,50)
        self.fc2=nn.Linear(50,num_classes)
    
    def forward(self,x):
        x=F.relu(self.fc1(x))
        x=self.fc2(x)
        return x

model=NN(784,10)
x=torch.randn(64,784)
model(x).shape

torch.Size([64, 10])

In [None]:
device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
input_size=784
num_classes=10
learning_rate=0.001
batch_size=64
num_epochs=1

In [None]:
train_dataset=datasets.MNIST(root='dataset/',train=True,transform=transforms.ToTensor(),download=True)
train_loader=DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
test_dataset=datasets.MNIST(root='dataset/',train=False,transform=transforms.ToTensor(),download=True)
test_loader=DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=True)

In [None]:
model=NN(input_size=input_size,num_classes=num_classes).to(device)
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=learning_rate)

In [None]:
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        data=data.to(device=device).reshape(data.shape[0],-1)
        targets=targets.to(device=device)
        scores=model(data)
        loss=criterion(scores,targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [None]:
def check_accuracy(loader,model):
    num_correct=0
    num_samples=0
    model.eval()
    with torch.no_grad():
        for x, y in loader:
            x=x.to(device=device).reshape(x.shape[0],-1)
            y=y.to(device=device)
            scores=model(x)
            _,prediction=scores.max(1)
            num_correct+=(prediction==y).sum()
            num_samples+=prediction.size(0)
        print(f'{float(num_correct)/float(num_samples)}')

check_accuracy(train_loader,model)
check_accuracy(test_loader,model)

0.9272333333333334
0.927


### 2-CNN




In [4]:
class CNN(nn.Module):
    def __init__(self,in_channels=1,num_classes=10) -> None:
        super(CNN,self).__init__()
        self.conv1=nn.Conv2d(in_channels=1,out_channels=8,kernel_size=(3,3),stride=(1,1),padding=(1,1))
        self.pool=nn.MaxPool2d(kernel_size=(2,2),stride=(2,2))
        self.conv2=nn.Conv2d(in_channels=8,out_channels=16,kernel_size=(3,3),stride=(1,1),padding=(1,1))
        self.fc1=nn.Linear(16*7*7,num_classes)
    def forward(self,x):
        x=F.relu(self.conv1(x))
        x=self.pool(x)
        x=F.relu(self.conv2(x))
        x=self.pool(x)
        x=x.reshape(x.shape[0],-1)
        x=self.fc1(x)
        return x

model=CNN()
x=torch.randn(64,1,28,28)
model(x).shape

torch.Size([64, 10])

In [5]:
device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
in_channel=1
num_classes=10
learning_rate=0.001
batch_size=64
num_epochs=1

In [7]:
model=CNN(in_channels=in_channel,num_classes=num_classes).to(device)
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=learning_rate)

In [8]:
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        data=data.to(device=device)
        targets=targets.to(device=device)
        scores=model(data)
        loss=criterion(scores,targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [9]:
check_accuracy(train_loader,model)
check_accuracy(test_loader,model)

0.9679166666666666
0.9709


### 3-RNN

In [10]:
class RNN(nn.Module):
    def __init__(self,input_size,hidden_size,num_layers,num_classes) -> None:
        super(RNN,self).__init__()
        self.hidden_size=hidden_size
        self.num_layers=num_layers
        self.rnn=nn.RNN(input_size,hidden_size,num_layers,batch_first=True)
        self.fc=nn.Linear(hidden_size*sequence_length,num_classes)
    
    def forward(self,x):
        h0=torch.zeros(self.num_layers,x.size(0),self.hidden_size).to(device)
        out,_=self.rnn(x,h0)
        out=out.reshape(out.shape[0],-1)
        out=self.fc(out)
        return out

In [11]:
device = torch.device('cuda' if torch.cuda.is_available else 'cpu')
input_size=28
sequence_length=28
num_layers=2
hidden_size=256
num_classes=10
learning_rate=0.001
batch_size=64
num_epochs=2

In [13]:
model=RNN(input_size,hidden_size,num_layers,num_classes).to(device)
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=learning_rate)

In [14]:
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        data=data.to(device=device).squeeze(1)
        targets=targets.to(device=device)
        scores=model(data)
        loss=criterion(scores,targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [16]:
check_accuracy(train_loader,model)
check_accuracy(test_loader,model)

0.9651833333333333
0.9635


### 4-GRU

In [17]:
class GRU(nn.Module):
    def __init__(self,input_size,hidden_size,num_layers,num_classes) -> None:
        super(GRU,self).__init__()
        self.hidden_size=hidden_size
        self.num_layers=num_layers
        self.gru=nn.GRU(input_size,hidden_size,num_layers,batch_first=True)
        self.fc=nn.Linear(hidden_size*sequence_length,num_classes)
    
    def forward(self,x):
        h0=torch.zeros(self.num_layers,x.size(0),self.hidden_size).to(device)
        out,_=self.gru(x,h0)
        out=out.reshape(out.shape[0],-1)
        out=self.fc(out)
        return out

In [18]:
model=GRU(input_size,hidden_size,num_layers,num_classes).to(device)
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=learning_rate)

In [19]:
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        data=data.to(device=device).squeeze(1)
        targets=targets.to(device=device)
        scores=model(data)
        loss=criterion(scores,targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [20]:
check_accuracy(train_loader,model)
check_accuracy(test_loader,model)

0.9843666666666666
0.9835


### 5-LSTM

In [21]:
class LSTM(nn.Module):
    def __init__(self,input_size,hidden_size,num_layers,num_classes) -> None:
        super(LSTM,self).__init__()
        self.hidden_size=hidden_size
        self.num_layers=num_layers
        self.lstm=nn.LSTM(input_size,hidden_size,num_layers,batch_first=True)
        self.fc=nn.Linear(hidden_size*sequence_length,num_classes)
    
    def forward(self,x):
        h0=torch.zeros(self.num_layers,x.size(0),self.hidden_size).to(device)
        c0=torch.zeros(self.num_layers,x.size(0),self.hidden_size).to(device)
        out,_=self.lstm(x,(h0,c0))
        out=out.reshape(out.shape[0],-1)
        out=self.fc(out)
        return out

In [22]:
model=LSTM(input_size,hidden_size,num_layers,num_classes).to(device)
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(),lr=learning_rate)

In [23]:
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        data=data.to(device=device).squeeze(1)
        targets=targets.to(device=device)
        scores=model(data)
        loss=criterion(scores,targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [24]:
check_accuracy(train_loader,model)
check_accuracy(test_loader,model)

0.9858833333333333
0.9853
