In [None]:
import numpy as np
import torch
import torchvision
import matplotlib.pyplot as plt
from time import time
from torchvision import datasets, transforms
from torch import nn, optim
import os
from pathlib import Path
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
from torchvision.transforms.functional import InterpolationMode

In [None]:
# Constants
dataset_path = os.getcwd() + "/datasets"
train_dataset_path = dataset_path + "/train"
test_dataset_path = dataset_path + "/test"

In [None]:
class AddGaussianNoise(object):
    def __init__(self, mean=0., std=0.1):
        self.std = std
        self.mean = mean
        
    def __call__(self, tensor):
        return tensor + torch.randn(tensor.size()) * self.std + self.mean
    
    def __repr__(self):
        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)
    
class AddWhiteBlocks(object):
    def __call__(self, tensor):
        border_width = np.random.randint(1, 3)
        side = np.random.randint(0, 4)
        if side == 0:
            border = torch.from_numpy(np.random.choice([0., 1.], (border_width, tensor.shape[1])))
            tensor[0, 0:border_width, :] = border
        elif side == 1:
            border = torch.from_numpy(np.random.choice([0., 1.], (border_width, tensor.shape[1])))
            tensor[0, tensor.shape[1] - border_width:tensor.shape[1], :] = border
        elif side == 2:
            border = torch.from_numpy(np.random.choice([0., 1.], (tensor.shape[2], border_width)))
            tensor[0, :, 0:border_width] = border.reshape((tensor.shape[2], border_width))
        else:
            border = torch.from_numpy(np.random.choice([0., 1.], (tensor.shape[2], border_width)))
            tensor[0, :, tensor.shape[2] - border_width:tensor.shape[2]] = border
        return tensor
    
    def __repr__(self):
        return self.__class__.__name__

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomApply([AddWhiteBlocks()], p=0.5),
    transforms.RandomAffine(
        degrees=(10, 10),
        translate=(0.2, 0.2),
        scale=(0.7, 1.2),
        fill=0
    ),
    transforms.RandomApply([AddGaussianNoise()], p=0.5),
    transforms.Normalize((0.5,), (0.5,)),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
])

In [None]:
new_mirror = 'https://ossci-datasets.s3.amazonaws.com/mnist'
datasets.MNIST.resources = [
   ('/'.join([new_mirror, url.split('/')[-1]]), md5)
   for url, md5 in datasets.MNIST.resources
]
trainset = datasets.MNIST(train_dataset_path, download=True, train=True, transform=transform)
valset = datasets.MNIST(test_dataset_path, download=True, train=False, transform=test_transform)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
valloader = DataLoader(valset, shuffle=True, batch_size=1000)

print(trainset)

In [None]:
dataiter = iter(trainloader)
images, labels = dataiter.next()
plt.imshow(images[1].numpy().squeeze(), cmap="gray")

In [None]:
print(images.shape)
print(labels.shape)

In [None]:
class SimpleDigitModel(nn.Module):
    def __init__(self):
        super(SimpleDigitModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3)
        self.maxPool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3)
        self.maxPool2 = nn.MaxPool2d(2)
        self.conv2_drop = nn.Dropout2d()
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(576, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.maxPool1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = self.maxPool2(x)
        x = F.dropout(x, training=self.training)
        x = F.relu(x)
        x = self.conv3(x)
        x = F.relu(x)
        x = x.view(-1, 576)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x)

In [None]:
model = SimpleDigitModel()
optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9)
criterion = nn.NLLLoss()

In [None]:
n_epochs = 20
train_losses = []
train_counter = []
test_losses = []
test_counter = [i*len(trainloader.dataset) for i in range(n_epochs + 1)]

def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(trainloader):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), 
                len(trainloader.dataset),
                100. * batch_idx / len(trainloader), 
                loss.item()
            ))
            train_losses.append(loss.item())
            train_counter.append((batch_idx*64) + ((epoch-1)*len(trainloader.dataset)))

def test():
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in valloader:
            output = model(data)
            test_loss += criterion(output, target)
            pred = output.max(1, keepdim=True)
            pred_indices = pred[1]
            correct += pred_indices.eq(target.data.view_as(pred_indices)).sum()
        test_loss /= len(valloader)
        test_losses.append(test_loss)
        print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, 
            correct,
            len(valloader.dataset),
            100. * correct / len(valloader.dataset)
        ))
            
test()
for epoch in range(1, n_epochs + 1):
    train(epoch)
    test()

In [None]:
fig = plt.figure()
plt.plot(train_counter, train_losses, color='blue')
plt.scatter(test_counter, test_losses, color='red')
plt.legend(['Train Loss', 'Test Loss'], loc='upper right')
plt.xlabel('number of training examples seen')
plt.ylabel('negative log likelihood loss')

In [None]:
torch.save({
            'epoch': 20,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
            }, "models/checkpoints/simple_digit_model_end.pt")

In [None]:
import cv2
iin = cv2.imread("iin_test2.jpg")
iin = cv2.cvtColor(iin, cv2.COLOR_BGR2GRAY) / 255
plt.imshow(iin)
iin.shape

In [None]:
import imutils

num_digits = 12
height, width = iin.shape
width_per_digit = width // num_digits
digits = []
for i in range(12):
    digit = iin[:, i*width_per_digit: (i+1)*width_per_digit]
    digit = cv2.resize(digit, (28,28))
    digit = digit[np.newaxis, ...]
    digits.append(digit)
digits = np.stack(digits)
digits = torch.from_numpy(digits).float()

In [None]:
fig = plt.figure()
for i in range(12):
    plt.subplot(3,4,i+1)
    plt.tight_layout()
    plt.imshow(digits[i][0], cmap='gray', interpolation='none')
    plt.xticks([])
    plt.yticks([])

In [None]:
def getLabels(digits):
    digits = transforms.Normalize(0.5, 0.5)(digits)
    model.eval()
    with torch.no_grad():
        output = model(digits)
        pred = output.max(1, keepdim=True)
        pred_indices = pred[1]
        x = pred_indices.numpy().flatten()
        print("".join([str(i) for i in x.tolist()]))
getLabels(digits)