In [95]:
import numpy as np
import os
import torch
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from PIL import Image

In [96]:
class KuzushijiDataset(Dataset):
    def __init__(self, root_dir, train=True, transform=None):
        train_data_imgs = os.path.join(root_dir, "k49-train-imgs.npz")
        train_data_imgs = np.load(train_data_imgs[0:70000])
        train_data_imgs = train_data_imgs.f.arr_0

        train_data_labels = os.path.join(root_dir, "k49-train-labels.npz")
        train_data_labels = np.load(train_data_labels[0:70000])
        train_data_labels = train_data_labels.f.arr_0
        #self.train_data = [train_data_imgs, train_data_labels] # train dataset

        test_data_imgs = os.path.join(root_dir, "k49-test-imgs.npz")
        test_data_imgs = np.load(test_data_imgs)
        test_data_imgs = test_data_imgs.f.arr_0
        
        test_data_labels = os.path.join(root_dir, "k49-test-labels.npz")
        test_data_labels = np.load(test_data_labels)
        test_data_labels = test_data_labels.f.arr_0
        #self.test_data = [test_data_imgs, test_data_labels] # test dataset
        
        self.transform = transform
        #self.train = train
        
        if train:
            self.data = train_data_imgs
            self.targets = train_data_labels
        else:
            self.data = test_data_imgs
            self.targets = test_data_labels
            
        #print(self.data.shape)
        #print(self.targets.shape)

    def __len__(self):
        return(len(self.data))
    
    def __getitem__(self, idx):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where target is index of the target class.
        """
        img, target = self.data[idx], int(self.targets[idx])
        #print(self.data[1].shape)
        # doing this so that it is consistent with all other datasets
        # to return a PIL Image
        img = Image.fromarray(img, mode='L') # mode='L' - (8-bit pixels, black and white)

        if self.transform is not None:
            img = self.transform(img)

        #if self.transform is not None:
        #    target = self.transform(target)
            
        

        return img, target        

        

In [97]:
# Load train and test datasets
train_data = KuzushijiDataset("data", train=True, transform=transforms.ToTensor())

test_data = KuzushijiDataset("data", train=False, transform=transforms.ToTensor())

#train_data = train_data[:70000]

print("Train dataset size: ", len(train_data))
print("Test dataset size: ", len(test_data))

Train dataset size:  232365
Test dataset size:  38547


In [98]:
# Split the training dataset into training and validation
#train_data, val_data = torch.utils.data.random_split(train_data[:70000], [50000, 20000])    # split into 200k training & ~32k validation (roughly the size of the test dataset)
train_data, val_data = torch.utils.data.random_split(train_data, [200000, 32365])    # split into 200k training & ~32k validation (roughly the size of the test dataset)

In [117]:
batch_size = 128

# create training data loader
train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True) 


# create validation data loader
val_loader = torch.utils.data.DataLoader(dataset=val_data, batch_size=batch_size, shuffle=True)

# create test data loader
test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True)


In [118]:
# define the model

import torch.nn.functional as F

class ConvNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
            # torch.nn.conv2d(in_channels, out_channels, kernel_size)
            # in_channels is the number of layers which it takes in (i.e.num color channels in 1st layer)
            # out_channels is the number of different filters that we use
            # kernel_size is the depthxwidthxheight of the kernel#
            # stride is how many pixels we shift the kernel by each time
        self.conv_layers = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, stride=1), # -4
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=32, out_channels= 64, kernel_size=5, stride=1),#
            torch.nn.ReLU()
        )
        self.fc_layers = torch.nn.Sequential(
            torch.nn.Linear(64 * 20 * 20, 120),
            torch.nn.Linear(120, 49)
        )
        
        # Spatial transformer localization-network
        self.localization = torch.torch.nn.Sequential(
            torch.nn.Conv2d(1, 8, kernel_size=7),
            torch.nn.MaxPool2d(2, stride=2),
            torch.nn.ReLU(True),
            torch.nn.Conv2d(8, 10, kernel_size=5),
            torch.nn.MaxPool2d(2, stride=2),
            torch.nn.ReLU(True)
        )

        # Regressor for the 3 * 2 affine matrix
        self.fc_loc = torch.nn.Sequential(
            torch.nn.Linear(10 * 3 * 3, 32),
            torch.nn.ReLU(True),
            torch.nn.Linear(32, 3 * 2)
        )

        # Initialize the weights/bias with identity transformation
        self.fc_loc[2].weight.data.zero_()
        self.fc_loc[2].bias.data.copy_(torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))

        
    def stn(self, x):
        xs = self.localization(x)
        xs = xs.view(-1, 10 * 3 * 3)
        theta = self.fc_loc(xs)
        theta = theta.view(-1, 2, 3)

        grid = F.affine_grid(theta, x.size())
        x = F.grid_sample(x, grid)

        return x
    
    def forward(self, x):
        x = self.stn(x)
        x = self.conv_layers(x)
        x = x.view(x.shape[0], -1)
        x = self.fc_layers(x)
        X = F.softmax(x, dim=1)
        return x

    
    
class ConvNet2(torch.nn.Module):
    def __init__(self):
        super().__init__()
            # torch.nn.conv2d(in_channels, out_channels, kernel_size)
            # in_channels is the number of layers which it takes in (i.e.num color channels in 1st layer)
            # out_channels is the number of different filters that we use
            # kernel_size is the depthxwidthxheight of the kernel#
            # stride is how many pixels we shift the kernel by each time
        self.cnn1 = torch.nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, stride=1) #out 32x24x24
        self.cnn2 = torch.nn.Conv2d(in_channels=32, out_channels= 64, kernel_size=5, stride=1) #out 64x20x20
        self.fc1 = torch.nn.Linear(64 * 20 * 20, 120)
        self.fc2 = torch.nn.Linear(120, 49)
    def forward(self, x):
        x = F.relu(self.cnn1(x))
        x = F.relu(self.cnn2(x))
        x = x.view(x.shape[0], -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = F.softmax(x, dim=1)
        return x

In [119]:
use_cuda = torch.cuda.is_available()
use_cuda = False
device = torch.device("cuda" if use_cuda else "cpu")
print(use_cuda)

False


In [120]:
CNN = ConvNet().to(device)

criterion = torch.nn.CrossEntropyLoss()
#criterion = torch.nn.BCELoss()
# SET UP TRAINING VISUALISATION
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter() # we will use this to show our models performance on a graph

In [121]:
def train(model, epochs, optimiser='Adam', learning_rate = 0.0001, verbose=False, tag='Loss/Train'):
    if(optimiser=='Adam'):
        optimiser = torch.optim.Adam(model.parameters(), lr= learning_rate)
    else:
        optimiser = torch.optim.SGD(model.parameters(), lr= learning_rate)
        
    
    model.train()                                  # put the model into training mode (more on this later)
    for epoch in range(epochs):
        for idx, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            #print(inputs.shape)
            #print(labels.shape)
        
            
            prediction = model(inputs) # pass the data forward through the model
            #print(prediction.shape)
            #print('lab.type:', type(labels), '  pred.type:', type(prediction))
            
            #print(labels)
            #break
            loss = criterion(prediction, labels) # compute the loss
            #if verbose: print('Epoch:', epoch, '\tBatch:', idx, '\tLoss:', loss)
            optimiser.zero_grad() # reset the gradients attribute of each of the model's params to zero
            loss.backward() # backward pass to compute and set all of the model param's gradients
            optimiser.step() # update the model's parameters
            writer.add_scalar(tag, loss, epoch*len(train_loader) + idx)    # write loss to a graph
        if verbose: print('Epoch:', epoch, '\tAccuracy:', calc_accuracy(model, val_loader), '\tLoss:', loss)

In [122]:
import numpy as np
            
def calc_accuracy(model, dataloader, testset=False):
    misclass_i = []
    misclass_l = []
    num_correct = 0
    num_examples = len(dataloader.dataset)                       # test DATA not test LOADER
    for inputs, labels in dataloader:                  # for all exampls, over all mini-batches in the test dataset
        inputs, labels = inputs.to(device), labels.to(device)
        predictions = model(inputs)
        predictions = torch.max(predictions, axis=1)    # reduce to find max indices along direction which column varies
        predictions = predictions[1]                    # torch.max returns (values, indices)
        num_correct += int(sum(predictions == labels))

        if testset: #generate a list of misclassified data
            if sum(predictions != labels):
                for i in range(len(predictions)):
                    if(predictions[i] != labels[i]):
                        misclass_i.append(inputs[i])
                        misclass_l.append(labels[i])
                
    percent_correct = num_correct / num_examples * 100
    return percent_correct, misclass_i, misclass_l

In [123]:
model = CNN

train(model, epochs=8, optimiser='Adam', learning_rate=0.0001, verbose=True)
print('Train Accuracy:', calc_accuracy(model, train_loader))
print('Test Accuracy:', calc_accuracy(model, test_loader))

Epoch: 0 	Accuracy: (88.18785725320562, [], []) 	Loss: tensor(0.3839, grad_fn=<NllLossBackward>)
Epoch: 1 	Accuracy: (91.41356403522323, [], []) 	Loss: tensor(0.3764, grad_fn=<NllLossBackward>)
Epoch: 2 	Accuracy: (92.93990421751892, [], []) 	Loss: tensor(0.0724, grad_fn=<NllLossBackward>)
Epoch: 3 	Accuracy: (93.72470261084504, [], []) 	Loss: tensor(0.1173, grad_fn=<NllLossBackward>)
Epoch: 4 	Accuracy: (94.38591070600958, [], []) 	Loss: tensor(0.1190, grad_fn=<NllLossBackward>)
Epoch: 5 	Accuracy: (94.3333848292909, [], []) 	Loss: tensor(0.1911, grad_fn=<NllLossBackward>)
Epoch: 6 	Accuracy: (94.57129615325198, [], []) 	Loss: tensor(0.1334, grad_fn=<NllLossBackward>)
Epoch: 7 	Accuracy: (95.19233740151398, [], []) 	Loss: tensor(0.1645, grad_fn=<NllLossBackward>)


NameError: name 'cnn' is not defined