# Machine Intelligence with Deep Learning
## Importance batching for improved training of neural networks
---

In [1]:
import timeit

In [2]:
from models.resnet import ResNet18
from utils.data_utils import DataLoader

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import os
import pandas as pd

In [3]:
SEEDS = [10, 42, 4] # don't change!

In [4]:
### Training
def train(epoch, optimizer, criterion):
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
    train_acc = 100.*correct/total
    train_loss /= total
    return train_acc, train_loss

### Testing
def test(epoch, best_acc, seed):
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    # Save checkpoint.
    test_acc = 100.*correct/total
    test_loss /= total
    if test_acc > best_acc:
        best_acc = test_acc
        print("==> Saving to checkpoint..")
        net.save(best_acc, epoch, seed)
    return test_acc, test_loss

In [None]:
resume = False
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if device == 'cuda':
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
### task: classification of the following classes
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
    
### hyperparameters
test_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch
num_epochs = 300 # number of iterations the model gets trained
learning_rate = 0.1 # factor for weight updates
learning_rate_switches = { # learning rate is reset after specific epochs
    '150': 0.01,
    '250': 0.001
}
momentum = 0.9
weight_decay = 5e-4

start = timeit.default_timer()

rows = []
for seed in SEEDS:
    for shuffle_setting in [{'train': False, 'test': False}, {'train': True, 'test': True}]:
        print("\n==> Beginning training with seed {} and shuffle setting {}".format(seed, shuffle_setting))
        print("-" * 30)
        torch.manual_seed(seed)
        if device == 'cuda':
            torch.cuda.manual_seed_all(seed)
             
        ### Model
        print('==> Building model..')
        net = ResNet18()
        net = net.to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)
        
        ### load the data
        # if needed, specify batch sizes and shuffle settings
        print('==> Loading data..')
        dataloader = DataLoader(batch_sizes={'train': 64, 'test': 64}, shuffle=shuffle_setting)
        dataloader.download_cifar()
        trainloader, testloader = dataloader.get_loaders()

        if resume:
            print('==> Resuming from checkpoint..')
            assert os.path.isdir('serialized'), 'Error: no serialized directory found!'
            ckpt = torch.load('./serialized/ckpt_{}.pth'.format(seed))
            test_acc, start_epoch, net = net.load(ckpt)

        for epoch in range(start_epoch, start_epoch+num_epochs):
            if str(epoch+1) in learning_rate_switches.keys():
                print('==> Resetting learning rate')
                for param_group in optimizer.param_groups:
                    param_group['lr'] = learning_rate_switches[str(epoch+1)]
                    
            train_acc, train_loss = train(epoch, optimizer, criterion)
            test_acc, test_loss = test(epoch, test_acc, seed)
            print("[{}/{}]: Train Acc: {} | Test Acc: {} | Train Loss: {} | Test Loss: {}"\
                  .format(epoch+1, num_epochs, train_acc, test_acc, train_loss, test_loss))
            row = {
                'epoch': epoch + 1,
                'seed': seed,
                'train': True,
                'shuffle': True if shuffle_setting['train'] == True else False,
                'accuracy': train_acc,
                'loss': train_loss
            }
            rows.append(row)
            row = {
                'epoch': epoch + 1,
                'seed': seed,
                'train': False,
                'shuffle': True if shuffle_setting['train'] == True else False,
                'accuracy': test_acc,
                'loss': test_loss
            }
            rows.append(row)
            
stop = timeit.default_timer()
logging_df = pd.DataFrame(rows, columns=['epoch', 'seed', 'train', 'shuffle', 'accuracy', 'loss'])        
logging_df.to_csv('training_logs.txt', sep='\t', index=False)
print(stop - start)


==> Beginning training with seed 10 and shuffle setting {'train': False, 'test': False}
------------------------------
==> Building model..
==> Loading data..
==> Preparing data..
Files already downloaded and verified
Files already downloaded and verified
==> Saving to checkpoint..
[1/300]: Train Acc: 30.204 | Test Acc: 42.21 | Train Loss: 0.029692338154315948 | Test Loss: 0.024461873602867126
==> Saving to checkpoint..
[2/300]: Train Acc: 47.41 | Test Acc: 54.88 | Train Loss: 0.022342565851211547 | Test Loss: 0.020094860738515855
==> Saving to checkpoint..
[3/300]: Train Acc: 61.142 | Test Acc: 62.8 | Train Loss: 0.017034988161325454 | Test Loss: 0.016805401432514192
==> Saving to checkpoint..
[4/300]: Train Acc: 70.042 | Test Acc: 68.6 | Train Loss: 0.013407085995078087 | Test Loss: 0.01476631572842598
==> Saving to checkpoint..
[5/300]: Train Acc: 74.12 | Test Acc: 74.18 | Train Loss: 0.011554079818725586 | Test Loss: 0.012024174857139587
==> Saving to checkpoint..
[6/300]: Train A

[63/300]: Train Acc: 84.456 | Test Acc: 79.68 | Train Loss: 0.007154313747435808 | Test Loss: 0.009137991181015968
==> Saving to checkpoint..
[64/300]: Train Acc: 84.56 | Test Acc: 82.22 | Train Loss: 0.007115069808065891 | Test Loss: 0.008532966715097427
[65/300]: Train Acc: 84.486 | Test Acc: 76.39 | Train Loss: 0.007089261915683746 | Test Loss: 0.011465628811717034
==> Saving to checkpoint..
[66/300]: Train Acc: 84.54 | Test Acc: 78.68 | Train Loss: 0.007164024459421635 | Test Loss: 0.010375807008147239
[67/300]: Train Acc: 84.398 | Test Acc: 77.29 | Train Loss: 0.007120339586436749 | Test Loss: 0.010807789206504822
==> Saving to checkpoint..
[68/300]: Train Acc: 84.514 | Test Acc: 80.71 | Train Loss: 0.00713280066549778 | Test Loss: 0.008897367417812347
[69/300]: Train Acc: 84.588 | Test Acc: 78.57 | Train Loss: 0.007102197563052177 | Test Loss: 0.009732581359148026
[70/300]: Train Acc: 84.48 | Test Acc: 76.84 | Train Loss: 0.007141555557847023 | Test Loss: 0.011013723051548005
==>

[127/300]: Train Acc: 84.692 | Test Acc: 77.76 | Train Loss: 0.007000757067501545 | Test Loss: 0.01033462873697281
==> Saving to checkpoint..
[128/300]: Train Acc: 84.956 | Test Acc: 80.11 | Train Loss: 0.006963993461132049 | Test Loss: 0.009009414859116077
==> Saving to checkpoint..
[129/300]: Train Acc: 84.682 | Test Acc: 80.72 | Train Loss: 0.007077049410045147 | Test Loss: 0.008867117609083652
[130/300]: Train Acc: 84.732 | Test Acc: 79.36 | Train Loss: 0.006974721609354019 | Test Loss: 0.009563758796453476
==> Saving to checkpoint..
[131/300]: Train Acc: 84.7 | Test Acc: 82.3 | Train Loss: 0.007016783650517464 | Test Loss: 0.008271797536313534
[132/300]: Train Acc: 84.854 | Test Acc: 77.39 | Train Loss: 0.007003361097872258 | Test Loss: 0.010534577694535256
==> Saving to checkpoint..
[133/300]: Train Acc: 84.91 | Test Acc: 79.51 | Train Loss: 0.006966587016880512 | Test Loss: 0.009603984582424164
==> Saving to checkpoint..
[134/300]: Train Acc: 84.626 | Test Acc: 80.85 | Train Los

==> Saving to checkpoint..
[190/300]: Train Acc: 96.94 | Test Acc: 90.93 | Train Loss: 0.001416290818527341 | Test Loss: 0.005013859637081623
[191/300]: Train Acc: 96.818 | Test Acc: 90.63 | Train Loss: 0.0014115072971582414 | Test Loss: 0.0051766605123877525
[192/300]: Train Acc: 96.988 | Test Acc: 90.59 | Train Loss: 0.001381943310946226 | Test Loss: 0.005429361417889595
==> Saving to checkpoint..
[193/300]: Train Acc: 96.884 | Test Acc: 90.83 | Train Loss: 0.0013956366735696792 | Test Loss: 0.00525816668793559
[194/300]: Train Acc: 97.254 | Test Acc: 90.38 | Train Loss: 0.0012799927805364133 | Test Loss: 0.0053899135690182445
==> Saving to checkpoint..
[195/300]: Train Acc: 96.788 | Test Acc: 91.36 | Train Loss: 0.0014493197614699601 | Test Loss: 0.004959633735194802
[196/300]: Train Acc: 97.212 | Test Acc: 91.23 | Train Loss: 0.001304693985953927 | Test Loss: 0.005114466837421059
[197/300]: Train Acc: 97.046 | Test Acc: 90.31 | Train Loss: 0.0013368735709786415 | Test Loss: 0.00560

==> Saving to checkpoint..
[253/300]: Train Acc: 99.782 | Test Acc: 94.05 | Train Loss: 0.00016435858950018882 | Test Loss: 0.003591726702079177
==> Saving to checkpoint..
[254/300]: Train Acc: 99.808 | Test Acc: 94.06 | Train Loss: 0.0001429077585786581 | Test Loss: 0.0036480036940425634
[255/300]: Train Acc: 99.826 | Test Acc: 93.99 | Train Loss: 0.0001257695323973894 | Test Loss: 0.0037085915580391884
==> Saving to checkpoint..
[256/300]: Train Acc: 99.85 | Test Acc: 94.03 | Train Loss: 0.00011956680737435818 | Test Loss: 0.003658270237967372
==> Saving to checkpoint..
[257/300]: Train Acc: 99.88 | Test Acc: 94.13 | Train Loss: 0.00010427257023751736 | Test Loss: 0.00367501832023263
==> Saving to checkpoint..
[258/300]: Train Acc: 99.916 | Test Acc: 94.18 | Train Loss: 8.816946782171727e-05 | Test Loss: 0.003650773920863867
[259/300]: Train Acc: 99.884 | Test Acc: 94.18 | Train Loss: 9.015406638383866e-05 | Test Loss: 0.00368080006018281
==> Saving to checkpoint..
[260/300]: Train A