***Challenge 1***

Here the goal is to train on 100 samples. In this preliminary testbed the evaluation will be done on a 2000 sample validation set. Note in the end the final evaluation will be done on the full CIFAR-10 test set as well as potentially a separate dataset. The validation samples here should not be used for training in any way, the final evaluation will provide only random samples of 100 from a datasource that is not the CIFAR-10 training data. 

Feel free to modify this testbed to your liking, including the normalization transformations etc. Note however the final evaluation testbed will have a rigid set of components where you will need to place your answer. The only constraint is the data. Refer to the full project instructions for more information.


In [1]:
import os
output_path = 'output_txt/'
img_path = 'img/'
Colab = False
Kaggle = 'kaggle' in os.getcwd()
root = '.' # Root to download dataset
if 'google.colab' in str(get_ipython()):
    print('Running on CoLab')
    Colab = True  
    from google.colab import drive
    if not os.path.exists('/content/drive/MyDrive/'):
        drive.mount('/content/drive', force_remount=False)
    else:
        print('Drive already mounted at at /content/drive')
    Google_path = '/content/drive/MyDrive/Colab Notebooks/COMP691_project/'
    if not os.path.exists(Google_path):
        os.mkdir(Google_path)
    img_path = Google_path + img_path
    if not os.path.exists(img_path):
        os.mkdir(img_path)
    output_path = Google_path + output_path  
else:
    if Kaggle:
        root = '../input/cifar10'
        output_path = ''
        img_path = ''
        print('Running in Kaggle')
    else:
        print('Not running on CoLab or Kaggle')
output_file_name = 'report_ADAM_cosine_improve_ResNet9_RandomSearch_new.txt'
output_file_path = output_path + output_file_name
progress_file = output_path + 'Random_search_ResNet9_progress_new.txt'
img_file_name_prefix = output_file_name.replace('.txt', '')
img_file_path = img_path + img_file_name_prefix + '/'
save_state_file_path = output_file_path.replace('.txt', '.pkl')
if not os.path.exists(img_file_path):
    os.mkdir(img_file_path)
if not os.path.exists(output_path):
    os.mkdir(output_path)

Running on CoLab
Drive already mounted at at /content/drive


Setup training functions. Again you are free to fully modify this testbed in your prototyping within the constraints of the data used. You can use tools outside of pytorch for training models if desired as well although the torchvision dataloaders will still be useful for interacting with the cifar-10 dataset. 

In [2]:
import gc
import math
from matplotlib import pyplot as plt 
from numpy import unravel_index

def train(model, device, train_loader, optimizer, epoch, grad_clip=None, sched=None, display=True):
    model.train()
    loss_function = nn.CosineEmbeddingLoss()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data).to(device)

        GT=torch.zeros((len(target),10))
        for idx in range(len(target)):
            GT[idx][target[idx]]=1

        GT=GT.to(device)
        
        loss = loss_function(output, GT, torch.Tensor(output.size(0)).to(device).fill_(1.0))
        #loss = F.cross_entropy(output, target)
        loss.backward()
        if grad_clip:
            nn.utils.clip_grad_value_(model.parameters(), grad_clip)
        optimizer.step()
        if sched:
            sched.step()
        if display and (batch_idx == 0 or batch_idx + 1 == len(train_loader)):
          print('   Train Epoch: {} [step {}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
              epoch + 1, batch_idx + 1, len(train_loader),
              100. * batch_idx / len(train_loader), loss.detach().item()))
        if device == torch.device('cuda'):
            del loss, output
            gc.collect()
            torch.cuda.empty_cache()

def test(model, device, test_loader, display=True):
    model.eval()
    test_loss = 0
    correct = 0
    loss_function = nn.CosineEmbeddingLoss()
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            GT=torch.zeros((len(target),10))
            for idx in range(len(target)):
                GT[idx][target[idx]]=1

            GT=GT.to(device)
            
            test_loss += loss_function(output, GT, torch.Tensor(output.size(0)).to(device).fill_(1.0)).item() # sum up batch loss
            #test_loss += F.cross_entropy(output, target, size_average=False).item()
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    if display:
        print('   Test set: Average loss: {:.6f}, Accuracy: {}/{} ({:.2f}%)'.format(
            test_loss, correct, len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))
    return 100. * correct / len(test_loader.dataset)

def plot_accs(accs_accross_runs, display_str, comment='N/A', save_img=True):
    plt.figure();
    #epochs = list(range(1, len(accs_accross_runs[0]) + 1))
    max_acc_display = ''
    for i, run_accs in enumerate(accs_accross_runs):
        max_acc = max(run_accs)
        max_epochs = [index + 1 for index, acc in enumerate(run_accs) if acc == max_acc]
        max_acc_display = f' - max acc: {max_acc}% at epochs {max_epochs}'
        plt.plot(run_accs, label=f'Run #{i + 1}' + max_acc_display)
    plt.xlabel('epochs')
    plt.ylabel('Test accuracy (%)')
    plt.legend();
    plt.title(f'Test accuracies for \n{display_str}Note: {comment}');

    if save_img:
        scenario = display_str[display_str.index(' ') + 1: display_str.index('/')].replace(' ','')
        img_name = img_file_path + f'{scenario}' + generate_image_suffix()
        #print(img_name)
        plt.savefig(img_name, bbox_inches='tight')

def generate_image_suffix():
    return f'_{time.time()%10000000:.0f}' + '.png'

def performance_summary(accumulated_accs):
    scenarios = len(accumulated_accs)
    str_output = f'\nCurrent performance summary over {scenarios} completed scenario(s): '
    top_20_percent_count = math.ceil(scenarios/10) # Top k performances
    k = top_20_percent_count
    top_20_final_accs = [(1, 0, 0) for _ in range(1, k + 1)] # (scenario, final acc means, std)
    top_20_max_accs = [(1, 0, 0, 0) for _ in range(1, k + 1)] # (scenario, run, epoch, max_acc)
    top_20_least_variant = [(1, 0, 999) for _ in range(1, k + 1)] # (scenario, final acc means, std)
    for scenario, scenario_accs in enumerate(accumulated_accs):
        
        scenario_accs = np.array(scenario_accs)
        run_final_accs_mean, run_final_acc_std = scenario_accs[:, -1].mean(), scenario_accs[:, -1].std()
        #print(f'scen {scenario} - acc {run_final_accs_mean:.3f} - std {run_final_acc_std:.3f}')
        idx = np.argpartition(scenario_accs, -k, axis=None)
        top_k_scenario_max_indices = [unravel_index(i, scenario_accs.shape) for i in np.sort(idx[-k:])]
        #print(top_k_scenario_max_indices)
        for run, epoch in top_k_scenario_max_indices:
            #print(run, epoch)
            top_scenario_max_acc = scenario_accs[run, epoch]
            top_20_max_accs.append((scenario, run, epoch, top_scenario_max_acc))
        #print(top_20_max_accs)
        if k < len(top_20_max_accs):
            global_max_accs = [top_scenario_max_acc for (_, _, _, top_scenario_max_acc) in top_20_max_accs]
            idx = np.argpartition(global_max_accs, -k, axis=None)
            top_20_max_accs = [top_20_max_accs[i] for i in sorted(idx[-k:].tolist(), reverse=True)]
        #for i, (max_scen, max_acc_mean, max_acc_std) in enumerate(top_20_final_accs):
        top_20_final_accs.append((scenario, run_final_accs_mean, run_final_acc_std))
        if k < len(top_20_final_accs):
            global_final_accs = [run_final_accs_mean for (_, run_final_accs_mean, _) in top_20_final_accs]
            idx = np.argpartition(global_final_accs, -k, axis=None)
            top_20_final_accs = [top_20_final_accs[i]  for i in sorted(idx[-k:].tolist(), reverse=True)]
        top_20_least_variant.append((scenario, run_final_accs_mean, run_final_acc_std))
        #print(top_20_least_variant)
        if k < len(top_20_least_variant):
            global_final_stds = [run_final_acc_std for (_, _, run_final_acc_std) in top_20_least_variant]
            #print(global_final_stds)
            idx = np.argpartition(global_final_stds, k, axis=None)
            #print(idx)
            top_20_least_variant = [top_20_least_variant[i] for i in sorted(idx[:k].tolist())]
    str_output += f'\n - Top {k} final average accuracy:'
    for (scenario, run_final_accs_mean, run_final_acc_std) in top_20_final_accs:
        str_output += f'\n   + Scenario: {scenario + 1} - Final average accuracy: {run_final_accs_mean:.2f} +- {run_final_acc_std:.2f}%'
    str_output += f'\n - Top {k} max accuracy:'
    for (scenario, run, epoch, top_scenario_max_acc) in top_20_max_accs:
        str_output += f'\n   + Scenario: {scenario + 1} - run {run + 1} - epoch {epoch + 1} - Max accuracy: {top_scenario_max_acc:.2f}'
    str_output += f'\n - Top {k} least final accuracy variation:'
    for (scenario, run_final_accs_mean, run_final_acc_std) in top_20_least_variant:
        str_output += f'\n   + Scenario: {scenario + 1} - Final average accuracy: {run_final_accs_mean:.2f} +- {run_final_acc_std:.2f}%'
    return str_output

In [3]:
import torch.nn as nn 
import torch.nn.functional as F
num_classes = 10
in_channels = 3

def conv_block(in_channels, out_channels, drop_out=0, pool=False):
    layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1), 
              nn.BatchNorm2d(out_channels), 
              nn.ReLU(inplace=True), nn.Dropout(drop_out)
              ]
    if pool: layers.append(nn.MaxPool2d(2))
    return nn.Sequential(*layers)

class NET(nn.Module):
    def __init__(self, in_channels, num_classes, drop_out):
#         super().__init__()
#         # Use a pretrained model
#         self.network = models.resnet34(pretrained=True)
#         # Replace last layer
#         num_ftrs = self.network.fc.in_features
#         self.network.fc = nn.Linear(num_ftrs, num_classes)
        super().__init__()
        
        self.conv1 = conv_block(in_channels, 64, drop_out)
        self.conv2 = conv_block(64, 128, drop_out, pool=True)
        self.res1 = nn.Sequential(conv_block(128, 128, drop_out), conv_block(128, 128, drop_out))
        self.dropout = nn.Dropout(drop_out)
        self.conv3 = conv_block(128, 256, drop_out, pool=True)
        self.conv4 = conv_block(256, 512, drop_out, pool=True)
        self.res2 = nn.Sequential(conv_block(512, 512, drop_out), conv_block(512, 512, drop_out))
        self.conv5 = conv_block(512, 1028, drop_out, pool=True)
        self.res3 = nn.Sequential(conv_block(1028, 1028, drop_out), conv_block(1028, 1028, drop_out))
        
        self.classifier = nn.Sequential(nn.MaxPool2d(2), 
                                        nn.Flatten(), 
                                        nn.Linear(1028, num_classes))
        
    
#     def forward(self, xb):
        
#         return torch.relu(self.network(xb))
    def forward(self, xb):
        out = self.conv1(xb)
        out = self.conv2(out)
        out = self.res1(out) + out
        out = self.conv3(out)
        out = self.dropout(out)
        out = self.conv4(out)
        out = self.dropout(out)
        out = self.res2(out) + out
        out = self.conv5(out)
        out = self.res3(out) + out
        out = self.classifier(out)
        return out



The below tries  2 random problem instances. In your development you may choose to prototype with 1 problem instances but keep in mind for small sample problems the variance is high so continously evaluating on several subsets will be important.

In [None]:
#%%time
import time
import pickle
from scipy.stats import loguniform
from numpy.random import RandomState
import torchvision
import numpy as np
import torch
import torch.optim as optim
from torch.utils.data import Subset
from torchvision import datasets, transforms


search_plot = True
log_enabled = True
save_image = True
summarize = True
comment = 'ResNet9 + Adam optim + Data Augmentation + Random search'
save_state = {}

if summarize:
    if not log_enabled:
        raise NameError('log_enabled should be True in order to enable summarize!')
    if os.path.exists(save_state_file_path):
        with open(save_state_file_path ,'rb') as dataHandle:
            save_state = pickle.load(dataHandle)
accumulated_accs = []
# Epochs: 300 - lr: - 0.001 - dropout: 0 - Weight_decay: 1e-05 - Grad_clip: 0.005 
#epochs_list = [200, 500, 800, 1000]
if len(save_state) == 0:
    epochs_list = [700]
    save_state['epochs'] =  epochs_list
    #grad_clips = [0.005, 0.01, 0.1, 1]
    grad_clips = sorted(list(loguniform(1e-4, 1).rvs(5, random_state=0)))
    save_state['grad_clips'] = grad_clips
    #weight_decays = [1e-3, 1e-4, 1e-5]
    weight_decays = sorted(list(loguniform(1e-5, 1e-3).rvs(5, random_state=0)))
    save_state['weight_decays'] = weight_decays
    lrs = [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05]
    save_state['lrs'] = lrs
    #lrs = [0.01, 0.005, 1e-3, 1e-4]
    #drop_outs = [0, 0.1, 0.2]
    drop_outs = [0, 0.1]
    save_state['drop_outs'] = drop_outs
    if summarize:
        with open(save_state_file_path, 'wb') as dataHandle:
            pickle.dump(save_state, dataHandle)
    
    print(f'First time run on profile {output_file_name}') 
else:
    epochs_list = save_state['epochs']
    grad_clips = save_state['grad_clips']
    weight_decays = save_state['weight_decays']
    lrs = save_state['lrs']
    drop_outs = save_state['drop_outs']
    accumulated_accs = save_state['accs']
    print(f'Successfully loaded save state from profile {output_file_name}') 

batch_size = 128
runs = 5
epoch_display_range = 100

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
#device = torch.device('cpu')
device_name = torch.cuda.get_device_name(0) if device == torch.device('cuda') else 'cpu'
scenario_count = len(epochs_list) * len(weight_decays) * len(lrs) * len(drop_outs) * len(grad_clips)
print(f'Total scenarios: {scenario_count}')
for key, value in save_state.items():
    if key != 'accs':
        print(f'{key}: {value}')  

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                  std=[0.229, 0.224, 0.225])

transform_train = transforms.Compose([
                                    transforms.RandomCrop(32, padding=4, padding_mode='reflect'),
                                    transforms.RandomGrayscale(),
                                    transforms.RandomHorizontalFlip(),
                                    torchvision.transforms.RandomAffine(degrees=30),
                                    transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2), 
                                    #transforms.ColorJitter(), 
                                    transforms.ToTensor(), 
                                    normalize]) #careful to keep this one same
transform_val = transforms.Compose([transforms.ToTensor(), normalize]) 


print('Running on {}'.format(device_name))

##### Cifar Data
cifar_data = datasets.CIFAR10(root='.',train=True, transform=transform_train, download=True)
    
#We need two copies of this due to weird dataset api 
cifar_data_val = datasets.CIFAR10(root='.',train=True, transform=transform_val, download=True)


# Extract a subset of 100 (class balanced) samples per class
training_done = False
count = 1
scenario = 1
next_run = 1
previous_runs_accs = []
previous_train_times = []
previous_eval_times = []
previous_exec_times = []

ran_in_middle = False

if log_enabled:
    if os.path.exists(progress_file):
        with open(progress_file, 'r') as file_read:
            progress_content = file_read.readlines()
        # print(progress_content)
        if progress_content[0].replace('\n', '') == output_file_name:
            previous_scenario = progress_content[1].replace('\n', '')
            previous_runs_accs = eval(progress_content[2].replace('\n', ''))
            previous_runs = len(previous_runs_accs)
            print(f'Previous progress on {output_file_name} stopped at scenario {previous_scenario}/{scenario_count}' +\
                 f', run {previous_runs}/{runs}')
            if previous_runs >= runs: # Already complete the previous scenario
                scenario = int(previous_scenario) + 1

            else: # Previous scenario just completed partially, resume in the next run
                ran_in_middle = True
                scenario = int(previous_scenario)
                next_run = previous_runs + 1
                previous_execution_times = eval(progress_content[3].replace('\n', ''))
                prev_accs_accross_runs_plot = eval(progress_content[4].replace('\n', ''))
                for i, previous_execution_time in enumerate(previous_execution_times):
                    previous_train_times.append(previous_execution_time[0]) 
                    previous_eval_times.append(previous_execution_time[1]) 
                    previous_exec_times.append(previous_execution_time[2])  
            if scenario > scenario_count:
                training_done = True
                print('Training was already done!')
            else:
                print(f'Will resume training at scenario: {scenario}, run# {next_run}')


if not training_done:
    for epochs in epochs_list:
        for lr in lrs:
            for drop_out in drop_outs:
                for weight_decay in weight_decays:
                    for grad_clip in grad_clips:

                        if not ran_in_middle: 

                            accs = []
                            train_times = []
                            evaluation_times = []
                            total_times = []
                            run_execution_times = []
                            accs_accross_runs_plot = []
                        else:
                            if count < scenario:
                                count += 1
                                continue #skip until reaching the scenario to run
                            accs = previous_runs_accs
                            train_times = previous_train_times
                            evaluation_times = previous_eval_times
                            total_times = previous_exec_times
                            run_execution_times = previous_execution_times
                            accs_accross_runs_plot = prev_accs_accross_runs_plot
                        #scenario += 1
                        scenario_description = 'Scenario %d/%d - Epochs: %d - lr: - %s - dropout: %s - Weight_decay: %s - Grad_clip: %s'%\
                        (scenario, scenario_count, epochs, lr, drop_out, weight_decay, grad_clip)
                        print('\n' + scenario_description)
                        
                        for seed in range(next_run, runs + 1):
                            start_time = time.time()

                            prng = RandomState(seed)
                            random_permute = prng.permutation(np.arange(0, 5000))
                            indx_train = np.concatenate([np.where(np.array(cifar_data.targets) == classe)[0][random_permute[0:10]] for classe in range(0, 10)])
                            indx_val = np.concatenate([np.where(np.array(cifar_data_val.targets) == classe)[0][random_permute[10:210]] for classe in range(0, 10)])


                            train_data = Subset(cifar_data, indx_train)
                            val_data = Subset(cifar_data_val, indx_val)

                            print('  Run# [%d/%d] - Num Samples For Training %d - Num Samples For Val %d'%(seed, runs, train_data.indices.shape[0],val_data.indices.shape[0]))

                            train_loader = torch.utils.data.DataLoader(train_data,
                                                                        batch_size=batch_size, 
                                                                        shuffle=True)

                            val_loader = torch.utils.data.DataLoader(val_data,
                                                                    batch_size=batch_size, 
                                                                    shuffle=False)

                            model = NET(in_channels, num_classes, drop_out)
                            model.to(device)
                            optimizer = torch.optim.Adam(model.parameters(), 
                                                        lr=lr, 
                                                        #momentum=0.9,
                                                        weight_decay=weight_decay)
                            sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, lr, epochs=epochs, 
                                                                            steps_per_epoch=len(train_loader))
                            test_accs = []
                            eval_time = 0
                            for epoch in range(epochs):
                                print_condition = epoch%epoch_display_range==0 or epoch==epochs-1
                                train(model, device, train_loader, optimizer, epoch, grad_clip=grad_clip,
                                    sched=sched, display=print_condition)
                                if search_plot:
                                    eval_start = time.time()
                                    test_acc = test(model, device, val_loader, display=print_condition)
                                    eval_time = time.time() - eval_start
                                    test_accs.append(test_acc)

                                    
                            train_time = time.time() - start_time    
                            train_times.append(train_time)
                            final_eval_start = time.time()
                            final_acc = test_accs[-1] if search_plot else test(model, device, val_loader)
                            accs.append(final_acc)
                            final_eval_time = eval_time if search_plot else time.time() - final_eval_start
                            evaluation_times.append(final_eval_time)
                            if search_plot:
                                accs_accross_runs_plot.append(test_accs)
                            total_time = time.time() - start_time
                            total_times.append(total_time)
                            run_execution_times.append((train_time, final_eval_time, total_time))
                            if log_enabled:
                                progress_str = f'{output_file_name}\n{scenario}\n{accs}\n{run_execution_times}' +\
                                f'\n{accs_accross_runs_plot}'

                                with open(progress_file, 'w') as progress_write:
                                    progress_write.write(progress_str)
                            if device == torch.device('cuda'):
                                del optimizer
                                gc.collect()
                                torch.cuda.empty_cache()
                            print('  Run execution time: train: %.3f (s) - eval: %.3f (s)- total: %.3f (s)'%\
                                  (train_time, final_eval_time, total_time))
                        accs = np.array(accs)
                        train_times = np.array(train_times)
                        evaluation_times = np.array(evaluation_times)
                        total_times = np.array(total_times)
                        
                        accuracy_description = '\n  Final acc over %d instances: %.2f +- %.2f\n'%(runs, accs.mean(), accs.std())
                        # print(train_times.mean(), evaluation_times.mean(), total_times.mean())
                        display_str = '  %s'%(scenario_description) +\
                        '\n  Train time %.3f +- %.3f (s) - eval time %.3f +- %.3f (s) - total: %.3f +- %.3f (s) on %s'%\
                        (train_times.mean(), train_times.std(), evaluation_times.mean(), evaluation_times.std(),
                             total_times.mean(), total_times.std(), device_name) + accuracy_description
                        accumulated_accs.append(accs_accross_runs_plot)
                        #progress_str = f'{output_file_name}\n{scenario}\n{accs}'
                        print(display_str)
                        if search_plot:
                            plot_str = display_str # scenario_description + accuracy_description
                            plot_accs(accs_accross_runs_plot, plot_str, comment, save_image)
                        if summarize:
                            save_state['accs'] = accumulated_accs
                            with open(save_state_file_path, 'wb') as dataHandle:
                                pickle.dump(save_state, dataHandle)
                            summary = performance_summary(accumulated_accs)
                            print(summary)
                            display_str += summary
                        
                        if log_enabled:
                            mode = 'a' if os.path.exists(output_file_path) else 'w'

                            with open(output_file_path, mode) as output_write:
                                output_write.write('\n' + display_str)
                        ran_in_middle = False
                        next_run = 1
                        scenario += 1

Successfully loaded save state from profile report_ADAM_cosine_improve_ResNet9_RandomSearch_new.txt
Total scenarios: 300
epochs: [700]
grad_clips: [0.004950159553733192, 0.015119336467640998, 0.01567667719550606, 0.02576638574613588, 0.07257005721594274]
weight_decays: [7.035737028722145e-05, 0.00012296071107325705, 0.0001252065381499946, 0.00016051911333587627, 0.000269388301928541]
lrs: [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05]
drop_outs: [0, 0.1]
Running on Tesla P100-PCIE-16GB
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting ./cifar-10-python.tar.gz to .
Files already downloaded and verified
Previous progress on report_ADAM_cosine_improve_ResNet9_RandomSearch_new.txt stopped at scenario 84/300, run 3/5
Will resume training at scenario: 84, run# 4

Scenario 84/300 - Epochs: 700 - lr: - 0.0005 - dropout: 0.1 - Weight_decay: 0.00012296071107325705 - Grad_clip: 0.02576638574613588
  Run# [4/5] - Num Samples For Training 100 - Num Samples For Val 2000
   Test set: Average loss: 0.007183, Accuracy: 204/2000 (10.20%)
   Test set: Average loss: 0.004804, Accuracy: 565/2000 (28.25%)
   Test set: Average loss: 0.004721, Accuracy: 647/2000 (32.35%)
   Test set: Average loss: 0.004954, Accuracy: 687/2000 (34.35%)
   Test set: Average loss: 0.004904, Accuracy: 664/2000 (33.20%)
   Test set: Average loss: 0.004944, Accuracy: 678/2000 (33.90%)
   Test set: Average loss: 0.004546, Accuracy: 762/2000 (38.10%)
   Test set: Average loss: 0.004553, Accuracy: 759/2000 (37.95%)
  Run execution time: train: 827.166 (