In [10]:
import torch
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as opt
import os

In [33]:
LEARNING_RATE = 0.001
BATCH_SIZE = 4096
N_EPOCHS = 40
dataDirectory = 'dataSet'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device = ", device)

Device =  cuda


In [12]:
if not os.path.exists(dataDirectory):
    os.makedirs(dataDirectory)

downLoad = False
if not os.path.exists(os.path.join(dataDirectory, 'MNIST')):
    downLoad = True
trainData = datasets.MNIST(
    root = dataDirectory,
    train = True,
    transform = ToTensor(),
    download=downLoad
)
testData = datasets.MNIST(
    root = dataDirectory,
    train = False,
    transform = ToTensor(),
    download=downLoad
)

In [13]:
print("train data : ", trainData.data.shape)
print("test data : ", testData.data.shape)

train data :  torch.Size([60000, 28, 28])
test data :  torch.Size([10000, 28, 28])


In [14]:
loaders = {
    'trainLoader' : DataLoader(
        dataset=trainData, 
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=1
    )
    ,
    'testLoader' : DataLoader(
        dataset=testData, 
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=1
    )
}
loaders

{'trainLoader': <torch.utils.data.dataloader.DataLoader at 0x191222afb80>,
 'testLoader': <torch.utils.data.dataloader.DataLoader at 0x191222af040>}

In [15]:
def train(
        model : nn.Module,
        optimizer : opt.Adam,
        lossFunction : nn.CrossEntropyLoss
):
        model.train()
        for idx, (data, target) in enumerate(loaders['trainLoader']):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = lossFunction(output, target)
            loss.backward()
            optimizer.step()


@torch.no_grad()
def test(
        model : nn.Module,
        lossFunction : nn.CrossEntropyLoss
):
    model.eval()
    testLoss = 0
    correct = 0
    for data, target in loaders['testLoader']:
        data, target = data.to(device), target.to(device)
        output = model(data)
        testLoss += lossFunction(output, target).item()
        pred = output.argmax(dim = 1, keepdim = True)
        correct += pred.eq(target.view_as(pred)).sum().item()
    testLoss /= len(loaders['testLoader'].dataset)
    print("Accuracy : ", 100*correct / len(loaders['testLoader'].dataset))

In [16]:
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(784, 300)
        self.fc2 = nn.Linear(300, 50)
        self.fc3 = nn.Linear(50, 10)
    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.softmax(x, dim=1)

In [17]:
mlp = MLP().to(device=device)
optimizer = opt.Adam(mlp.parameters(), lr=LEARNING_RATE)
lossFunction = nn.CrossEntropyLoss()

In [18]:
for _ in range(N_EPOCHS):
    train(
        model=mlp,
        optimizer=optimizer,
        lossFunction=lossFunction
    )
test(
    model=mlp,
    lossFunction=lossFunction
)   
    

Accuracy :  92.08


In [30]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.activation = F.relu
        self.conv1 = nn.Conv2d(1, 6, 5, padding=2)
        self.pool1 = nn.AvgPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.pool2 = nn.AvgPool2d(2, 2)

        self.fc1 = nn.Linear(400, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool1(self.activation(self.conv1(x)))
        x = self.pool2(self.activation(self.conv2(x)))
        x = x.view(-1, 400)
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.fc3(x)
        return F.softmax(x, dim=1)
        

In [36]:
cnn = CNN().to(device=device)
optimizer2 = opt.Adam(cnn.parameters(), lr=0.001)
lossFunction2 = nn.CrossEntropyLoss()

In [37]:
for _ in range(N_EPOCHS):
    train(
        model=cnn,
        optimizer=optimizer2,
        lossFunction=lossFunction2
    )
    test(
        model=cnn,
        lossFunction=lossFunction2
    )

Accuracy :  11.35
Accuracy :  11.35
Accuracy :  11.35
Accuracy :  11.35
Accuracy :  11.35
Accuracy :  11.91
Accuracy :  20.96
Accuracy :  26.79
Accuracy :  29.47
Accuracy :  34.88
Accuracy :  42.83
Accuracy :  49.13
Accuracy :  53.22
Accuracy :  60.31
Accuracy :  62.76
Accuracy :  64.2
Accuracy :  68.2
Accuracy :  69.94
Accuracy :  71.0
Accuracy :  71.51
Accuracy :  72.33
Accuracy :  72.83
Accuracy :  73.13
Accuracy :  73.44
Accuracy :  73.9
Accuracy :  74.19
Accuracy :  74.48
Accuracy :  78.16
Accuracy :  79.87
Accuracy :  80.57
Accuracy :  81.73
Accuracy :  82.21
Accuracy :  82.51
Accuracy :  82.82
Accuracy :  83.06
Accuracy :  83.54
Accuracy :  83.74
Accuracy :  84.04
Accuracy :  84.36
Accuracy :  84.55
