# Machine Intelligence with Deep Learning
## Importance batching for improved training of neural networks
---

In [1]:
import timeit

In [2]:
from models.resnet import ResNet18
from utils.data_utils import DataLoader
from utils.logging_utils import *

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import os
import pandas as pd

from datetime import datetime
today = datetime.today().strftime('%Y%m%d')

In [3]:
SEEDS = [10, 42, 4] # don't change!
#STRATEGIES = ['freeze', 'shuffle', 'homogeneous', 'heterogeneous', 'max_k_loss', 'min_k_loss'] # can be changed
STRATEGIES = ['max_k_loss', 'min_k_loss']

In [4]:
### Training
def train(epoch, optimizer, criterion_fn, seed, dataloader, strategy, device):
    criterion = criterion_fn()
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(dataloader.yield_batches(strategy, \
                                                  random_state=seed, use_train=True, \
                                                  criterion=criterion_fn(reduction='none'),\
                                                  device=device)):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
    train_acc = 100.*correct/total
    train_loss /= total
    return train_acc, train_loss

### Testing
def test(epoch, best_acc, criterion_fn, seed, dataloader):
    criterion = criterion_fn()
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(dataloader.yield_batches('shuffle', \
                                                                               random_state=seed, use_train=False, 
                                                                               num_repeats=200)):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    # Save checkpoint.
    test_acc = 100.*correct/total
    test_loss /= total
    if test_acc > best_acc:
        best_acc = test_acc
        net.save(best_acc, epoch, seed, strategy)
    return test_acc, test_loss

In [5]:
resume = False
given_date = '20191113' #only needed if resumed from checkpoint
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if device == 'cuda':
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
### task: classification of the following classes
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

    
### hyperparameters
test_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch
num_epochs = 100 #100 # number of epochs the model gets trained
#learning_rates = { # learning rate is reset after specific epochs
#    '1': 0.1, # 50 epochs
#    '50': 0.01, # 40 epochs
#    '90': 0.005, # 30 epochs
#    '120': 0.001 # 30 epochs
#}
learning_rate = 0.01
momentum = 0.9
weight_decay = 5e-4

print("Begin training.")
start = timeit.default_timer()
#Logging header
length_table = 90
log_separating_line(length_table)
log_header_line("Seeds: {}".format(SEEDS), length_table)
log_header_line("Strategies: {}".format(STRATEGIES), length_table)
log_header_line("-> Resulting number of iterations: {}".format(len(SEEDS) * len(STRATEGIES)), length_table)
log_header_line("Number of iterations: {}".format(num_epochs), length_table)
log_header_line("Learning rate: {}".format(learning_rate), length_table)
log_header_line("Resuming from checkpoint: {}".format(True if resume else False), length_table)
log_separating_line(length_table)

rows = []
for seed in SEEDS:
    for strategy in STRATEGIES:
                
        np.random.seed(seed)
        torch.manual_seed(seed)
        if device == 'cuda':
            torch.cuda.manual_seed_all(seed)
             
        ### Model
        net = ResNet18()
        net = net.to(device)
        criterion = nn.CrossEntropyLoss # no function!
        optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)
        
        ### load the data
        # if needed, specify batch sizes and shuffle settings
        dataloader = DataLoader()
        dataloader.download_cifar()
        dataloader.set_model(net)
        print()
        log_separating_line(length_table)
        log_position_header(seed, strategy, length_table)
        log_separating_line(length_table)

        if resume:
            assert os.path.isdir('serialized'), 'Error: no serialized directory found!'
            ckpt = torch.load('./serialized/{}/{}_ckpt_{}.pth'.format(given_date, strategy, seed))
            test_acc, start_epoch, net = net.load(ckpt)

        for epoch in range(start_epoch, start_epoch+num_epochs):
            if strategy in ['max_k_loss', 'min_k_loss']:
                dataloader.initialize_weights(criterion(reduction='none'), device, seed=seed, 
                                              dump='./dump_logs/{}_{}_{}.txt'.format(strategy, seed, epoch))
            # reset learning rate at specific epochs
            #if str(epoch+1) in learning_rates.keys():
            #    for param_group in optimizer.param_groups:
            #        param_group['lr'] = learning_rates[str(epoch+1)]
                    
            train_acc, train_loss = train(epoch, optimizer, criterion, seed, dataloader, \
                                          strategy, device)
            test_acc, test_loss = test(epoch, test_acc, criterion, seed, dataloader)
            log_position_line(epoch + 1, num_epochs, train_acc, test_acc, train_loss, test_loss, length_table)
            row = {
                'epoch': epoch + 1,
                'seed': seed,
                'train': True,
                'strategy': strategy,
                'accuracy': train_acc,
                'loss': train_loss
            }
            rows.append(row)
            row = {
                'epoch': epoch + 1,
                'seed': seed,
                'train': False,
                'strategy': strategy,
                'accuracy': test_acc,
                'loss': test_loss
            }
            rows.append(row)
        log_separating_line(length_table)
            
stop = timeit.default_timer()
time_needed = stop - start
hrs = int(time_needed / 3600)
mins = int((time_needed / 60) % 60)
secs = int(time_needed % 60)
print()
print("Finished training. Time needed: {} hrs {} mins {} secs".format(hrs, mins, secs))

logging_df = pd.DataFrame(rows, columns=['epoch', 'seed', 'train', 'strategy', 'accuracy', 'loss'])   
training_logs_dir = 'evaluation_logs'
logging_df.to_csv('{}.txt'.format(os.path.join(training_logs_dir, today)), sep='\t', index=False)

Begin training.
+----------------------------------------------------------------------------------------+
| Seeds: [10, 42, 4]                                                                     |
| Strategies: ['max_k_loss', 'min_k_loss']                                               |
| -> Resulting number of iterations: 6                                                   |
| Number of iterations: 100                                                              |
| Learning rate: 0.01                                                                    |
| Resuming from checkpoint: False                                                        |
+----------------------------------------------------------------------------------------+
Files already downloaded and verified
Files already downloaded and verified

+----------------------------------------------------------------------------------------+
| Seed: 10      Strategy: max_k_loss                                                    

| [078/100]:    34.64           010.0           0.03384      00.0461                     |
| [079/100]:    22.06           010.0           0.03513      0.04598                     |
| [080/100]:    34.05           08.58           0.03219      0.04214                     |
| [081/100]:    26.39           12.26           0.03112      0.04399                     |
| [082/100]:    43.84           19.48           0.02877      0.04791                     |
| [083/100]:    37.83           08.85           0.02763      0.04712                     |
| [084/100]:    32.11           17.37           0.03268      0.04247                     |
| [085/100]:    43.88           009.9           0.02897      0.04474                     |
| [086/100]:    26.98           17.01           0.03394      0.05459                     |
| [087/100]:    32.95           19.68           0.02893      0.04097                     |
| [088/100]:    040.3           17.93           0.02666      0.05362                     |

| [063/100]:    100.0           010.0           00000.0      0.39326                     |
| [064/100]:    100.0           010.0           00000.0      0.38912                     |
| [065/100]:    100.0           010.0           00000.0      0.38509                     |
| [066/100]:    100.0           010.0           00000.0      0.38113                     |
| [067/100]:    100.0           010.0           00000.0      0.37725                     |
| [068/100]:    100.0           010.0           00000.0      0.37392                     |
| [069/100]:    100.0           010.0           00000.0      0.37009                     |
| [070/100]:    100.0           010.0           00000.0      0.36639                     |
| [071/100]:    100.0           010.0           00000.0      0.36293                     |
| [072/100]:    100.0           010.0           00000.0      0.35893                     |
| [073/100]:    100.0           010.0           00000.0      0.35571                     |

| [048/100]:    62.84           15.88           0.01937      0.06793                     |
| [049/100]:    52.02           10.09           0.03106      00.0539                     |
| [050/100]:    24.75           11.05           0.03471      0.04181                     |
| [051/100]:    29.59           12.21           0.03212      0.04611                     |
| [052/100]:    64.02           10.01           00.0182      0.09011                     |
| [053/100]:    25.45           12.95           0.03926      0.10262                     |
| [054/100]:    32.14           011.0           0.03118      0.03937                     |
| [055/100]:    059.3           12.87           000.024      0.04399                     |
| [056/100]:    23.45           010.0           0.03364      0.04206                     |
| [057/100]:    58.11           14.42           0.02184      0.05404                     |
| [058/100]:    23.92           12.88           0.03515      0.03847                     |

| [033/100]:    100.0           010.0           00000.0      02.6727                     |
| [034/100]:    100.0           010.0           00000.0      2.64603                     |
| [035/100]:    100.0           010.0           00000.0      2.62031                     |
| [036/100]:    100.0           010.0           00000.0      2.58885                     |
| [037/100]:    100.0           010.0           00000.0      2.56457                     |
| [038/100]:    100.0           010.0           00000.0      2.54252                     |
| [039/100]:    100.0           010.0           00000.0      2.51751                     |
| [040/100]:    100.0           010.0           00000.0      2.49209                     |
| [041/100]:    100.0           010.0           00000.0      2.46746                     |
| [042/100]:    100.0           010.0           00000.0      02.4421                     |
| [043/100]:    100.0           010.0           00000.0      2.41979                     |

| [018/100]:    20.61           14.28           0.03763      0.13414                     |
| [019/100]:    24.22           13.15           0.03381      0.04724                     |
| [020/100]:    35.17           10.01           0.02984      00.0585                     |
| [021/100]:    32.38           11.43           0.02923      0.04361                     |
| [022/100]:    19.22           10.48           0.03493      0.04362                     |
| [023/100]:    36.41           012.2           0.02854      0.04914                     |
| [024/100]:    34.17           010.0           0.03193      0.07339                     |
| [025/100]:    026.8           12.39           0.03327      0.05412                     |
| [026/100]:    17.42           10.97           0.03692      0.03901                     |
| [027/100]:    63.72           10.33           0.02162      0.03799                     |
| [028/100]:    50.06           010.0           0.02733      0.04711                     |

| [003/100]:    100.0           010.0           00000.0      2.08469                     |
| [004/100]:    100.0           010.0           00000.0      2.06365                     |
| [005/100]:    100.0           010.0           00000.0      2.04422                     |
| [006/100]:    100.0           010.0           00000.0      2.02489                     |
| [007/100]:    100.0           010.0           00000.0      2.00519                     |
| [008/100]:    100.0           010.0           00000.0      1.97748                     |
| [009/100]:    100.0           010.0           00000.0      1.96287                     |
| [010/100]:    100.0           010.0           00000.0      1.94655                     |
| [011/100]:    100.0           010.0           00000.0      1.92789                     |
| [012/100]:    100.0           010.0           00000.0      1.90602                     |
| [013/100]:    100.0           010.0           00000.0      1.88462                     |

| [094/100]:    100.0           010.0           00000.0      0.60549                     |
| [095/100]:    100.0           010.0           00000.0      0.22669                     |
| [096/100]:    100.0           010.0           00000.0      0.16938                     |
| [097/100]:    100.0           010.0           00000.0      0.20217                     |
| [098/100]:    100.0           010.0           00000.0      0.16714                     |
| [099/100]:    100.0           010.0           00000.0      0.16343                     |
| [100/100]:    100.0           010.0           00000.0      0.16743                     |
+----------------------------------------------------------------------------------------+

Finished training. Time needed: 8 hrs 35 mins 38 secs
