In [1]:
import sys
print(sys.version)
print(*sys.path, sep='\n')

3.6.9 (default, Nov  7 2019, 10:44:02) 
[GCC 8.3.0]
/usr/lib/python36.zip
/usr/lib/python3.6
/usr/lib/python3.6/lib-dynload

/home/lukec/venv/lib/python3.6/site-packages
/home/lukec/Downloads/jax/build
/home/lukec/.local/lib/python3.6/site-packages
/usr/local/lib/python3.6/dist-packages
/usr/lib/python3/dist-packages
/home/lukec/venv/lib/python3.6/site-packages/IPython/extensions
/home/lukec/.ipython


In [2]:
import numpy as np
import matplotlib.pyplot as plt
import time
import os
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision as tv

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

num_workers = os.cpu_count()
print(num_workers)

cuda:0
6


In [4]:
# parameters for trained CNN
root = os.path.join('.', 'dataset_root')
# mean, std = [0.13066046], [0.30150425] # based on training set
batch_size = 128
lr = 0.01
momentum=0.9
step_size=6
gamma=0.1
epochs = 40

In [5]:
def imshow(tensor_grid, mean=0., std=1., title=None):
    assert isinstance(tensor_grid, torch.Tensor)
    assert len(tensor_grid.size()) == 4, \
        f'For a batch of images only, {tensor_grid.size()} '
    
    tensor_grid = tv.utils.make_grid(tensor_grid)
    grid = tensor_grid.numpy().transpose((1,2,0))
    grid = std * grid + mean
    grid = np.clip(grid, 0, 1)
    plt.imshow(grid)
    
    if title is not None:
        plt.title(title)
        
    plt.pause(0.001)

In [6]:
# prepare data
# foolbox model expects raw numpy array as image
transform = tv.transforms.Compose([
        tv.transforms.ToTensor(),
#         tv.transforms.Normalize(mean, std)
])
train_dataset = tv.datasets.MNIST(
    root,
    train=True, 
    download=True,
    transform=transform)
test_dataset = tv.datasets.MNIST(
    root,
    train=False, 
    download=True,
    transform=transform)

train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers
)
test_loader = DataLoader(
    test_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers
)

In [7]:
class Net1(nn.Module):
    def __init__(self):
        super(Net1, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.fc1 = nn.Linear(5**2 * 64, 256)
        self.fc2 = nn.Linear(256, 128)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x

In [8]:
class Model_linear(nn.Module):
    def __init__(self):
        super(Model_linear, self).__init__()
        self.fc3 = nn.Linear(128, 10)
                             
    def forward(self, x):
        x = self.fc3(x)
        return x

In [9]:
def train(model, loader, optimizer):
    model.train()
    total_loss = 0.
    corrects = 0
        
    for x, y in loader:
        x = x.to(device)
        y = y.to(device)
        batch_size = x.size(0)
        
        optimizer.zero_grad()
        output = model(x)
        loss = F.nll_loss(output, y)
        loss.backward()
        optimizer.step()

        # for display
        total_loss += loss.item() * batch_size
        preds = output.max(1, keepdim=True)[1]
        corrects += preds.eq(y.view_as(preds)).sum().item()
    
    n = len(train_loader.dataset)
    total_loss = total_loss / n
    accuracy = corrects / n
    return total_loss, accuracy

In [10]:
def validate(model, loader):
    model.eval()
    total_loss = 0.
    corrects = 0
    
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device)
            batch_size = x.size(0)
            output = model(x)
            loss = F.nll_loss(output, y)
            total_loss += loss.item() * batch_size
            preds = output.max(1, keepdim=True)[1]
            corrects += preds.eq(y.view_as(preds)).sum().item()
    
    n = len(test_loader.dataset)
    total_loss = total_loss / n
    accuracy = corrects / n
    return total_loss, accuracy

In [11]:
# NOTE: NO GPU AT SCHOOL!
model1 = Net1()
model_linear = Model_linear()
model_softmax = torch.nn.Sequential(
    model1,
    model_linear,
    torch.nn.LogSoftmax(dim=1)
)
model_softmax.to(device)
optimizer = torch.optim.SGD(model_softmax.parameters(), lr=lr, momentum=momentum)
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer, 
    step_size=step_size, 
    gamma=gamma)

print(model_softmax)

Sequential(
  (0): Net1(
    (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
    (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (dropout1): Dropout2d(p=0.25, inplace=False)
    (fc1): Linear(in_features=1600, out_features=256, bias=True)
    (fc2): Linear(in_features=256, out_features=128, bias=True)
  )
  (1): Model_linear(
    (fc3): Linear(in_features=128, out_features=10, bias=True)
  )
  (2): LogSoftmax()
)


In [12]:
since = time.time()
for epoch in range(epochs):
    start = time.time()
    
    best_model_state = copy.deepcopy(model_softmax.state_dict())
    best_tr_acc = 0.0
    best_va_acc = 0.0
    prev_loss = 1e10
    
    tr_loss, tr_acc = train(model_softmax, train_loader, optimizer)
    va_loss, va_acc = validate(model_softmax, test_loader)
    lr_scheduler.step()
    
    # save best result
    if tr_acc >= best_tr_acc and va_acc >= best_va_acc:
        best_model_state = copy.deepcopy(model_softmax.state_dict())
        best_tr_acc = tr_acc
        best_va_acc = va_acc
    
    # display
    time_elapsed = time.time() - start
    print(('[{:2d}] {:.0f}m {:.1f}s Train Loss: {:.4f} Accuracy: {:.4f}%, ' +
        'Test Loss: {:.4f} Accuracy: {:.4f}%').format(
            epoch+1, time_elapsed // 60, time_elapsed % 60,
            tr_loss, tr_acc*100.,
            va_loss, va_acc*100.))
    
time_elapsed = time.time() - since
print('Training completed in {:.0f}m {:.1f}s'.format(
    time_elapsed // 60,
    time_elapsed % 60))
print(f'Best val Acc: {best_va_acc:4f}')

[ 1] 0m 2.2s Train Loss: 0.7668 Accuracy: 76.4517%, Test Loss: 0.1245 Accuracy: 95.9500%
[ 2] 0m 2.0s Train Loss: 0.1258 Accuracy: 96.0767%, Test Loss: 0.0735 Accuracy: 97.6000%
[ 3] 0m 2.1s Train Loss: 0.0844 Accuracy: 97.3783%, Test Loss: 0.0543 Accuracy: 98.1800%
[ 4] 0m 2.1s Train Loss: 0.0650 Accuracy: 98.0100%, Test Loss: 0.0445 Accuracy: 98.4400%
[ 5] 0m 2.4s Train Loss: 0.0533 Accuracy: 98.3517%, Test Loss: 0.0364 Accuracy: 98.7600%
[ 6] 0m 2.0s Train Loss: 0.0459 Accuracy: 98.5283%, Test Loss: 0.0344 Accuracy: 98.8400%
[ 7] 0m 2.1s Train Loss: 0.0327 Accuracy: 99.0067%, Test Loss: 0.0287 Accuracy: 98.9600%
[ 8] 0m 2.0s Train Loss: 0.0306 Accuracy: 99.0750%, Test Loss: 0.0274 Accuracy: 99.0000%
[ 9] 0m 2.1s Train Loss: 0.0302 Accuracy: 99.0733%, Test Loss: 0.0269 Accuracy: 99.0200%
[10] 0m 2.0s Train Loss: 0.0296 Accuracy: 99.0900%, Test Loss: 0.0266 Accuracy: 99.0300%
[11] 0m 2.1s Train Loss: 0.0281 Accuracy: 99.1450%, Test Loss: 0.0266 Accuracy: 99.0300%
[12] 0m 2.0s Train Lo

In [13]:
torch.save(model_softmax.state_dict(), 'mnist_model3.pt')