In [1]:
%reset -f
import os
import gzip
import numpy as np
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
from tqdm import tqdm

In [2]:
def load_mnist(path, kind='train'):


    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte.gz'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte.gz'
                               % kind)

    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,
                               offset=8)

    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,
                               offset=16).reshape(len(labels), 784)

    return images, labels

In [3]:
def _filter(xs, ys, lbls):
    idxs = [i for (i, l) in enumerate(ys) if l in lbls]
    return xs[idxs, :], ys[idxs]

In [4]:
def clear_gpu(model):
    # Removes model from gpu and clears the memory
    
    model = model.to('cpu')
    del model
    torch.cuda.empty_cache()

In [5]:
class Dataset(torch.utils.data.Dataset):
    # Basic dataset class to work with torch data loader

    def __init__(self, X, y):
        self.X = X
        self.y = y
        
        assert len(X) == len(y), print("Number of examples don't match up")

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        return self.X[index], self.y[index]

In [6]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 6, kernel_size=3, stride=1, padding=1)
        self.pool = torch.nn.MaxPool2d(2, 2)
        self.conv2 = torch.nn.Conv2d(6, 16, 5)
        self.fc1 = torch.nn.Linear(16 * 5 * 5, 120)
        self.fc2 = torch.nn.Linear(120, 84)
        self.fc3 = torch.nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x) 
            
        return x

In [7]:
dataloader_params = {'batch_size': 32, 'shuffle': True, 'num_workers': 6}
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

MAX_EPOCH = 100

# Load Data

In [8]:
train_images, train_labels = load_mnist('data', 'train')
test_images, test_labels = load_mnist('data', 't10k')

val_images = train_images[50000:]
val_labels = train_labels[50000:]

train_images = train_images[:50000]
train_labels = train_labels[:50000]

In [9]:
X_train1, y_train1 = _filter(train_images, train_labels, [0, 1, 4, 5, 8])
X_val1, y_val1 = _filter(val_images, val_labels, [0, 1, 4, 5, 8])
X_test1, y_test1 = _filter(test_images, test_labels, [0, 1, 4, 5, 8])

In [10]:
X_train2, y_train2 = _filter(train_images, train_labels, [2, 3, 6, 7, 9])
X_val2, y_val2 = _filter(val_images, val_labels, [2, 3, 6, 7, 9])
X_test2, y_test2 = _filter(test_images, test_labels, [2, 3, 6, 7, 9])

# FMNIST 2 Training

In [11]:
X_train2 = X_train2.reshape(X_train2.shape[0], 28, 28)
X_test2 = X_test2.reshape(X_test2.shape[0], 28, 28)
X_val2 = X_val2.reshape(X_val2.shape[0], 28, 28)

In [12]:
X_train2 = X_train2[:, np.newaxis, :, :]
X_test2 = X_test2[:, np.newaxis, :, :]
X_val2 = X_val2[:, np.newaxis, :, :]

In [13]:
train_data2 = Dataset(X_train2, y_train2)
train_generator2 = torch.utils.data.DataLoader(train_data2, **dataloader_params)

X_train2 = torch.from_numpy(X_train2).type(torch.FloatTensor).to(device)
y_train2 = torch.Tensor(y_train2).type(torch.LongTensor).to(device)

X_val2 = torch.from_numpy(X_val2).type(torch.FloatTensor).to(device)
y_val2 = torch.Tensor(y_val2).type(torch.LongTensor).to(device)

X_test2 = torch.from_numpy(X_test2).to(device)
y_test2 = torch.Tensor(y_test2).to(device)

In [14]:
net = Net().to(device)

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters())

# history = net.fit(train_generator2, 100, X_val2, y_val2, loss_fn, optimizer)

In [39]:
X = X_train2
y = y_train2

self = net

history = {
    'acc': [],
    'loss': []
}

for epoch in tqdm(range(30)):
    running_loss = 0.0
    for i, data in enumerate(train_generator2):
        inputs, labels = data
        inputs = inputs.type(torch.FloatTensor).to(device)
        labels =  labels.type(torch.LongTensor).to(device)

        optimizer.zero_grad()

        outputs = self.forward(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        
    with torch.no_grad():
        out = self.forward(X)
        preds = out.argmax(axis=1)

        accuracy = sum(preds == y)/len(y)
        loss = loss_fn(out, y)

        history['acc'].append(accuracy)
        history['loss'].append(loss)

100%|██████████| 30/30 [01:30<00:00,  3.03s/it]


In [45]:
X = X_val2
y = y_val2

with torch.no_grad():
    out = self.forward(X)
    preds = out.argmax(axis=1)

    accuracy = sum(preds == y)/len(y)
    loss = loss_fn(out, y)

    print(accuracy, loss)

tensor(0.9327, device='cuda:0') tensor(0.4972, device='cuda:0')
