# Import packages

In [67]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline

In [68]:
import torch 
from torch import nn 
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torchvision as tv 
from torchvision.datasets import MNIST
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import Subset

# Configurations

In [69]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
config = {
    "batch_size": 32,
    "test_batch_size": 1000,
    "epochs": 14,
    "lr": 1.0,
    "gamma": 0.7,
    "dry_run": False,
    "seed": 1,
    "log_interval": 10,
    "save_model": False,
    "data_dir": "./data",
    "subset_size": 500
}

# Import and Preprocessing Dataset

In [70]:
def get_balanced_subset_indices(targets, num_classes, num_per_class):
    indices = []
    for i in range(num_classes):
        class_indices = (targets == i).nonzero(as_tuple=True)[0]
        random_indices = np.random.choice(class_indices, num_per_class, replace=False)
        indices.extend(random_indices)
    return indices

In [71]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

full_train_dataset = MNIST(root = './data', train = True, download = True, transform = transform)
test_set = MNIST(root = './data', train = False, download = False, transform = transform)

num_classes = 10
num_per_class = 50

train_indices = get_balanced_subset_indices(full_train_dataset.targets, num_classes, num_per_class)

train_set = Subset(full_train_dataset, train_indices) if config['subset_size'] is None else full_train_dataset

In [72]:
print(train_indices)

[46896, 51980, 47389, 46740, 22208, 45235, 23210, 39856, 28971, 30434, 19504, 29052, 15711, 19303, 51892, 4756, 2493, 19292, 58952, 7465, 55917, 29560, 48196, 46871, 25650, 48322, 25278, 24346, 37079, 24661, 45511, 42157, 46627, 13880, 49639, 34580, 19490, 55006, 21635, 24388, 43728, 4002, 10471, 54595, 53893, 2411, 26040, 42552, 31079, 26713, 16718, 4160, 32054, 26125, 12226, 25436, 34272, 32795, 58887, 48361, 25853, 41282, 4066, 57599, 34010, 11236, 27501, 25751, 39776, 40004, 20699, 4972, 26827, 11352, 40696, 4129, 16233, 42874, 36145, 9069, 14310, 397, 40551, 43118, 16525, 44650, 32353, 24626, 1132, 43872, 55609, 48741, 33110, 4186, 29062, 14992, 59114, 49592, 5245, 10741, 44639, 48017, 10582, 7266, 32627, 46867, 5135, 45022, 15363, 23578, 46574, 11109, 15712, 54330, 42789, 14517, 52839, 14458, 51458, 33775, 50913, 52083, 16018, 19115, 48220, 1754, 18815, 30997, 38692, 42439, 43376, 50271, 4380, 24035, 28335, 1920, 53541, 44993, 16527, 59621, 44347, 27525, 1188, 36485, 2171, 18633,

# Nerual Network Definition

In [73]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, 3, 1),
            nn.ReLU(),
            nn.Conv2d(32, 64, 3, 1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25)
        )
        self.fc_layers = nn.Sequential(
            nn.Linear(9216, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = torch.flatten(x, 1)
        x = self.fc_layers(x)
        return F.log_softmax(x, dim=1)

# Evaluate the model

In [74]:
def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args['log_interval'] == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            if args['dry_run']:
                break

In [75]:
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [78]:
train_kwargs = {'batch_size': config['batch_size']}
test_kwargs = {'batch_size': config['test_batch_size']}
if torch.cuda.is_available():
    cuda_kwargs = {'num_workers': 1,
                   'pin_memory': True,
                   'shuffle': True}
    train_kwargs.update(cuda_kwargs)
    test_kwargs.update(cuda_kwargs)

for i in range(len(train_set)):
    data, target = train_set[i]
    print(f'{i}:{target}')
train_loader = DataLoader(train_set, **train_kwargs)
test_loader = DataLoader(test_set, **test_kwargs)

0:0
1:0
2:0
3:0
4:0
5:0
6:0
7:0
8:0
9:0
10:0
11:0
12:0
13:0
14:0
15:0
16:0
17:0
18:0
19:0
20:0
21:0
22:0
23:0
24:0
25:0
26:0
27:0
28:0
29:0
30:0
31:0
32:0
33:0
34:0
35:0
36:0
37:0
38:0
39:0
40:0
41:0
42:0
43:0
44:0
45:0
46:0
47:0
48:0
49:0
50:1
51:1
52:1
53:1
54:1
55:1
56:1
57:1
58:1
59:1
60:1
61:1
62:1
63:1
64:1
65:1
66:1
67:1
68:1
69:1
70:1
71:1
72:1
73:1
74:1
75:1
76:1
77:1
78:1
79:1
80:1
81:1
82:1
83:1
84:1
85:1
86:1
87:1
88:1
89:1
90:1
91:1
92:1
93:1
94:1
95:1
96:1
97:1
98:1
99:1
100:2
101:2
102:2
103:2
104:2
105:2
106:2
107:2
108:2
109:2
110:2
111:2
112:2
113:2
114:2
115:2
116:2
117:2
118:2
119:2
120:2
121:2
122:2
123:2
124:2
125:2
126:2
127:2
128:2
129:2
130:2
131:2
132:2
133:2
134:2
135:2
136:2
137:2
138:2
139:2
140:2
141:2
142:2
143:2
144:2
145:2
146:2
147:2
148:2
149:2
150:3
151:3
152:3
153:3
154:3
155:3
156:3
157:3
158:3
159:3
160:3
161:3
162:3
163:3
164:3
165:3
166:3
167:3
168:3
169:3
170:3
171:3
172:3
173:3
174:3
175:3
176:3
177:3
178:3
179:3
180:3
181:3
182:3
183:3
184:3


In [77]:
model = Net().to(device)
optimizer = optim.Adadelta(model.parameters(), lr=config['lr'])

scheduler = StepLR(optimizer, step_size=1, gamma=config['gamma'])
for epoch in range(1, config['epochs'] + 1):
    train(config, model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)
    scheduler.step()

if config['save_model']:
    torch.save(model.state_dict(), "mnist_cnn.pt")


Test set: Average loss: 2.2987, Accuracy: 1614/10000 (16%)


Test set: Average loss: 2.2787, Accuracy: 1829/10000 (18%)


Test set: Average loss: 2.1761, Accuracy: 2891/10000 (29%)


Test set: Average loss: 1.9264, Accuracy: 4421/10000 (44%)


Test set: Average loss: 1.6092, Accuracy: 5993/10000 (60%)


Test set: Average loss: 1.2803, Accuracy: 6673/10000 (67%)


Test set: Average loss: 1.0566, Accuracy: 7353/10000 (74%)


Test set: Average loss: 0.9064, Accuracy: 7733/10000 (77%)


Test set: Average loss: 0.8180, Accuracy: 7998/10000 (80%)


Test set: Average loss: 0.7553, Accuracy: 8173/10000 (82%)


Test set: Average loss: 0.7194, Accuracy: 8284/10000 (83%)


Test set: Average loss: 0.6947, Accuracy: 8352/10000 (84%)


Test set: Average loss: 0.6797, Accuracy: 8392/10000 (84%)


Test set: Average loss: 0.6697, Accuracy: 8411/10000 (84%)

