In [18]:
import torch
import torchvision
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary

In [19]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

continue_train = False

os.makedirs('results', exist_ok=True)
model_path = 'results/model.pth'
optimizer_path = 'results/optimizer.pth'

n_epochs = 10
batch_size_train = 64
batch_size_test = 1000
learning_rate = 0.001
momentum = 0.9
log_interval = 200

writer = SummaryWriter('logs')

random_seed = 1
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

<torch._C.Generator at 0x7fbe100f9dd0>

In [20]:
train_loader = torch.utils.data.DataLoader(
        torchvision.datasets.EMNIST(
            '../datasets/',
            split = 'balanced',
            train = True,
            download = True,
            transform = torchvision.transforms.Compose([ 
                torchvision.transforms.ToTensor(),
                torchvision.transforms.Normalize(
                    (0.1307,),(0.3081,))
                ])),
        batch_size = batch_size_train,
        shuffle = True,
        num_workers=4,
        pin_memory=True)
 
test_loader = torch.utils.data.DataLoader(
        torchvision.datasets.EMNIST(
            '../datasets/',
            split = 'balanced',
            train = False,
            download = True,
            transform = torchvision.transforms.Compose([
                torchvision.transforms.ToTensor(),
                torchvision.transforms.Normalize(
                    (0.1307,),(0.3081,))
                ])),
        batch_size = batch_size_test,
        shuffle = True,
        num_workers = 4,
        pin_memory = True)

In [21]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 50, kernel_size=5)
        self.conv2 = nn.Conv2d(50, 100, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(100*4*4, 100)
        self.fc2 = nn.Linear(100, 47)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

In [23]:
network = Net()
network.to(device)
optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=momentum)

if continue_train:
    network_state_dict = torch.load(model_path)
    network.load_state_dict(network_state_dict)

    optimizer_state_dict = torch.load(optimizer_path)
    optimizer.load_state_dict(optimizer_state_dict)

In [24]:
summary(network, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 50, 24, 24]           1,300
            Conv2d-2            [-1, 100, 8, 8]         125,100
         Dropout2d-3            [-1, 100, 8, 8]               0
            Linear-4                  [-1, 100]         160,100
            Linear-5                   [-1, 47]           4,747
Total params: 291,247
Trainable params: 291,247
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.32
Params size (MB): 1.11
Estimated Total Size (MB): 1.43
----------------------------------------------------------------


In [25]:
def train(epoch):
    running_loss = 0.0
    network.train()
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        output = network(inputs)
        loss = F.nll_loss(output, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if i % log_interval == 0:
            step = (i+1) * epoch + (epoch-1) *len(train_loader)
            writer.add_scalar('Loss/train', loss.item(), step)
            
            j = i * len(inputs)
            k = round(100. * i / len(train_loader))

            print(f'Train Epoch: {epoch} [{j}/{len(train_loader.dataset)} ({k}%)]\tLoss: {round(loss.item(), 4)}')

    torch.save(network.state_dict(), model_path)
    torch.save(optimizer.state_dict(), optimizer_path)
    return output

In [26]:
def test(epoch):
    network.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(test_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            output = network(inputs)
            test_loss += F.nll_loss(output, labels, reduction='sum').item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(labels.data.view_as(pred)).sum()
    
            if i % log_interval == 0:
                step = (i+1) * epoch + (epoch-1) *len(test_loader)
                writer.add_scalar('Loss/test', test_loss / len(test_loader.dataset), step)
                
    test_loss /= len(test_loader.dataset)
        
    test_accuracy = 100. * correct / len(test_loader.dataset)
    test_accuracy = int(test_accuracy)
    writer.add_scalar('Accuracy/test', test_accuracy, epoch)
    print(f'\nTest set: Avg. loss: {round(test_loss, 4)}, Accuracy: {correct}/{len(test_loader.dataset)} ({round(test_accuracy)}%)\n')

In [27]:
for epoch in range(1, n_epochs + 1):
    train(epoch)
    test(epoch)
writer.close()


Test set: Avg. loss: 0.903, Accuracy: 13746/18800 (73%)


Test set: Avg. loss: 0.6523, Accuracy: 14897/18800 (79%)


Test set: Avg. loss: 0.5708, Accuracy: 15306/18800 (81%)


Test set: Avg. loss: 0.5253, Accuracy: 15562/18800 (82%)


Test set: Avg. loss: 0.4977, Accuracy: 15711/18800 (83%)


Test set: Avg. loss: 0.4759, Accuracy: 15670/18800 (83%)


Test set: Avg. loss: 0.459, Accuracy: 15847/18800 (84%)


Test set: Avg. loss: 0.4445, Accuracy: 15916/18800 (84%)


Test set: Avg. loss: 0.4355, Accuracy: 16019/18800 (85%)


Test set: Avg. loss: 0.4298, Accuracy: 15995/18800 (85%)

