# 1. Library and Data Loading

## 1.1 Load the required Libraries

In [None]:
import torch, torchvision
import torchvision.transforms as transforms
import json
import os
import statistics
from fastai.learner import Learner
import torch.nn.functional as F
from fastai.metrics import accuracy, Precision, Recall
import fastai.callback.schedule
from fastai.optimizer import OptimWrapper
from torch import optim
from functools import partial
from torchvision import datasets
from torch.utils.data import DataLoader
from torch.utils.data import Dataset, TensorDataset
from torch import nn
from fastai.data.core import DataLoaders
from fastai.data.all import *
from fastai.vision.all import *
from sklearn.model_selection import train_test_split

## 1.2 Data Loading

Create helper functions to automatically generate dataloaders for both the training and test datasets.

In [None]:
def create_training_dataloaders(dataset):
    '''
    A helper function that automatically creates a dataloader for a given dataset
    where the dataset is the training data.
    Automatically splits the dataloader into a training and validation dataset, with
    the split being 80:20 training:validation.
    '''

    filename = repr(dataset).split()[1].strip("'").strip("2024_A2_datasets/")
    if filename == 'CIFAR10.npz':
        X_train = normalise_data(torch.tensor(dataset['X_tr'], dtype=torch.float32).permute(0, 3, 1, 2))
    else:
        X_train = normalise_data(torch.tensor(dataset['X_tr'], dtype=torch.float32).unsqueeze(1))
    Y_train = torch.tensor(dataset['S_tr'])

    # Create a datablock to handle the training information, with the validation set being
    # a randomly selected 20% of the training data
    train_data_block = DataBlock(
        blocks=(TransformBlock, CategoryBlock),
        get_x=lambda i: X_train[i],
        get_y=lambda i: Y_train[i].item(),
        splitter=RandomSplitter(valid_pct=0.2)
    )

    # Create the training dataloaders for the information
    train_dataloader = train_data_block.dataloaders(range(len(X_train)))
    train_dataloader.to('cuda')
    return train_dataloader

In [None]:
def create_test_dataloader(dataset):
    '''
    A helper function that automatically creates a dataloader for a given
    test dataset.
    '''
    filename = repr(dataset).split()[1].strip("'").strip("2024_A2_datasets/")
    if filename == 'CIFAR10.npz':
        test_dataset = TensorDataset(normalise_data(torch.tensor(dataset['X_ts'], dtype=torch.float32).permute(0, 3, 1, 2)), torch.tensor(dataset['Y_ts']))
    else:
        test_dataset = TensorDataset(normalise_data(torch.tensor(dataset['X_ts'], dtype=torch.float32).unsqueeze(1)), torch.tensor(dataset['Y_ts']))

    test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)
    test_dataloader.to('cuda')
    return test_dataloader

In [None]:
def create_train_valid_split(dataset):
    '''
    Helper function to create a train/valid split for the training data.
    '''
    filename = repr(dataset).split()[1].strip("'").strip("2024_A2_datasets/")
    if filename == 'CIFAR10.npz':
        X_train = normalise_data(torch.tensor(dataset['X_tr'], dtype=torch.float32).permute(0, 3, 1, 2))
    else:
        X_train = normalise_data(torch.tensor(dataset['X_tr'], dtype=torch.float32).unsqueeze(1))

    Y_train = torch.tensor(dataset['S_tr'])

    return train_test_split(X_train, Y_train, test_size=0.1, random_state=42)

# 2. Preprocessing

A helper preprocessing function to normalise data to the range of 0-1. This function is used in the data loading functions to normalise the data provided to the dataloaders.

In [None]:
def normalise_data(tensor) :
    return torch.div(tensor, 255.0)

# 3. Custom Model and Loss Definitions

## 3.1 CNN Model Definition

In [None]:
class Net(nn.Sequential):
    def __init__(self, input_dimension_size, conv_layers, conv_dimensions, fc_layers, fc_neurons, rgb):
        # Setup related stuff
        layers = []
        output_dim = input_dimension_size - conv_layers*conv_dimensions + conv_layers
        rgb_channels = 3 if rgb else 1

        for i in range(conv_layers):
            layers.append(nn.Conv2d(rgb_channels, rgb_channels, conv_dimensions))
            layers.append(nn.LeakyReLU())

        layers.append(nn.Flatten())

        if fc_layers > 1:
            layers.append(nn.Linear(rgb_channels * output_dim**2, fc_neurons))
            layers.append(nn.LeakyReLU())
        else:
            layers.append(nn.Linear(rgb_channels * output_dim**2, 4))

        if fc_layers > 2:
            for i in range(fc_layers - 2):
                layers.append(nn.Linear(fc_neurons, fc_neurons))
                layers.append(nn.LeakyReLU())

        if fc_layers >= 2:
            layers.append(nn.Linear(fc_neurons, 4))

        super().__init__(*layers)

## 3.2 Custom Loss Definition

In [None]:
class NoisyCrossEntropyLoss(nn.Module):
    def __init__(self, transition_matrix=None):
        super(NoisyCrossEntropyLoss, self).__init__()
        self.register_buffer("transition_matrix", transition_matrix)

    def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
        eps = torch.finfo().smallest_normal
        prob = F.softmax(x, dim=1)
        if self.transition_matrix is not None:
            prob = torch.matmul(prob, self.transition_matrix)
        loss = F.nll_loss(torch.log(prob + eps), y)
        return loss

# 4. Hyperparameter Exploration

## 4.1 Helper Functions

### 4.1.1 Function to find a transition matrix.

In [None]:
def find_transition_matrix(model, train_dataloader, num_classes):
    '''
    Finds the transition matrix for a trained model.
    model: The trained model used to estimate the transition matrix.
    train_dataloader: The pytorch dataloader object that contains all the training information.
    num_classes: The number of unique classes/labels in the dataset.
    '''

    # Set up the model to be in evaluation mode
    model.eval()

    # Create the empty prediction results object, with enough columns
    # for each class
    predictions = torch.empty(0, num_classes)

    # Create the transition matrix object
    transition_matrix = np.zeros((num_classes, num_classes))

    # Go through each training batch
    for data, targets in train_dataloader:
        # Make predictions without calculating gradients, since
        # we are only interested in the outputs instead of training
        with torch.inference_mode():
            outputs = model(data).cpu()
            output = F.softmax(outputs, dim=1)
            predictions = torch.cat((predictions, output))
    # For each class, find the anchor point and set the column of data to that anchor point's probabilities
    for i in range(num_classes):
        transition_matrix[:, i] = predictions[torch.argmax(predictions[:, i]), :]

    # Return the model to training mode
    model.train()

    transition_matrix = torch.from_numpy(transition_matrix).to(torch.float32)
    return transition_matrix

### 4.1.2 Create a training wrapper to isolate the training functionality.

In [None]:
def create_custom_dataloaders(X_train, Y_train, X_valid, Y_valid):
    '''
    A helper function that automatically creates dataloaders.
    '''
    # Create the datasets for training and validation
    train_dataset = TensorDataset(X_train, Y_train)
    valid_dataset = TensorDataset(X_valid, Y_valid)

    # Create the training and validation dataloaders for the information
    train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    valid_dataloader = DataLoader(valid_dataset, batch_size=64, shuffle=False)

    # Move the dataloaders to the GPU
    train_dataloader.to('cuda')
    valid_dataloader.to('cuda')

    return train_dataloader, valid_dataloader

In [None]:
def custom_train(model, training_dataloaders, num_epochs=10, transition_matrix=None):
    '''
    Function that trains a model.
    model: The model being trained.
    training_dataloaders: The training and validation dataloaders.
    num_epochs: The number of epochs to train this model for.
    transition_matrix: The transition matrix to use during training. Note this is optional.
    '''
    # Create the required optimiser and learner objects, using an Adam optimiser,
    # a cross-entropy loss, and validation accuracy as the main metric.
    opt_func = partial(OptimWrapper, opt=optim.Adam)

    # Create the learner object that contains all this information
    loss_fn = NoisyCrossEntropyLoss(transition_matrix) if transition_matrix != None else NoisyCrossEntropyLoss()
    learn = Learner(
        training_dataloaders,
        model,
        loss_func=loss_fn,
        opt_func=opt_func,
        metrics=accuracy
    )

    # Make sure we get training metrics too
    learn.recorder.train_metrics = True

    # Train the model
    learn.fit(n_epoch=num_epochs)

    # Return the model and the recorder
    return learn.model, learn.recorder

### 4.1.3 Create a testing wrapper to isolate the testing functionality.

In [None]:
def custom_test(dataset, model):
    # Set the model to evaluation mode
    model.eval()

    # Test the model
    counter = 0
    correct = 0.0
    # For each test example, count whether the model produces the correct class
    with torch.inference_mode():
        for test in create_test_dataloader(dataset):
            output = model(test[0])
            predicted_class = torch.argmax(output)
            if predicted_class == test[1].item():
                correct += 1
            counter += 1
    test_accuracy = correct/counter

    # Bring the model back to training mode
    model.train()

    return test_accuracy

### 4.1.4 Set up each of the hyperparameters.

In [None]:
conv_layers = [1, 2, 5, 10]
conv_dimension_size = [2, 4, 6, 8, 10]
fc_layers = [1, 2, 5, 10]
fc_neurons = [100, 500, 1000]
num_epochs = [10, 100]

### 4.1.5 Create a custom grid search function to undertake grid search.

In [None]:
from IPython.display import clear_output
def custom_grid_search(dataset, **hyperparameters):

    # Set up some variables to manage the grid search
    counter = 0
    total_searches = len(hyperparameters['conv_layers']) * len(hyperparameters['conv_dimension_size']) * len(hyperparameters['fc_layers']) * len(hyperparameters['fc_neurons']) * len(hyperparameters['num_epochs'])
    best_model = {
        'trial': None,
        'valid': False,
        'num_epochs': None,
        'conv_layers': None,
        'conv_dimension_size': None,
        'fc_layers': None,
        'fc_neurons': None,
        'avg_val_accuracy': 0,
        'transition_matrix': None
    }

    match dataset:
        case 'CIFAR10':
            data = np.load('2024_A2_datasets/CIFAR10.npz')
            input_dim_size = 32
            rgb = True
            transition_matrix = hyperparameters['find_transition_matrix']
        case 'FashionMINIST0.3':
            data = np.load('2024_A2_datasets/FashionMNIST0.3.npz')
            input_dim_size = 28
            rgb = False
            transition_matrix = torch.tensor([
                [0.7, 0.3, 0, 0],
                [0, 0.7, 0.3, 0],
                [0, 0, 0.7, 0.3],
                [0.3, 0, 0, 0.7]
            ])
        case 'FashionMINIST0.6':
            data = np.load('2024_A2_datasets/FashionMNIST0.6.npz')
            input_dim_size = 28
            rgb = False
            transition_matrix = torch.tensor([
                [0.4, 0.2, 0.2, 0.2],
                [0.2, 0.4, 0.2, 0.2],
                [0.2, 0.2, 0.4, 0.2],
                [0.2, 0.2, 0.2, 0.4]
            ])

    # Conduct the grid search
    for epochs in hyperparameters['num_epochs']:
        for num_conv_layers in hyperparameters['conv_layers']:
            for dimension_size in hyperparameters['conv_dimension_size']:
                for num_fc_layers in hyperparameters['fc_layers']:
                    for num_fc_neurons in hyperparameters['fc_neurons']:
                        counter += 1

                        # Check if this model has already been trained
                        if os.path.exists(f'{dataset}_cnn_results/trial_{counter}'):
                            continue

                        # Check if this combination of hyperparameters will be valid
                        if input_dim_size - num_conv_layers*dimension_size + num_conv_layers < 1:
                            with open(f'{dataset}_cnn_results/trial_{counter}', 'w') as f:
                                f.write(json.dumps({
                                    'trial': counter,
                                    'valid': False,
                                    'num_epochs': epochs,
                                    'conv_layers': num_conv_layers,
                                    'conv_dimension_size': dimension_size,
                                    'fc_layers': num_fc_layers,
                                    'fc_neurons': num_fc_neurons,
                                }))
                                continue

                        X_train, X_valid, Y_train, Y_valid = create_train_valid_split(data)

                        # Print some information for the user
                        print(f'Running model {counter} out of {total_searches} ({float(counter)/total_searches * 100:.2f}%)')
                        print(f'''Current model:
                        num_epochs: {epochs}
                        conv_layers: {num_conv_layers}
                        conv_dimension_size: {dimension_size}
                        fc_layers: {num_fc_layers}
                        fc_neurons: {num_fc_neurons}\n
                        ''')
                        print(f'''Best model: {best_model['avg_val_accuracy'] * 100:.2f}%
                        num_epochs: {best_model['num_epochs']}
                        conv_layers: {best_model['conv_layers']}
                        conv_dimension_size: {best_model['conv_dimension_size']}
                        fc_layers: {best_model['fc_layers']}
                        fc_neurons: {best_model['fc_neurons']}\n
                        ''')

                        # Define the model to train
                        model = Net(input_dim_size, num_conv_layers, dimension_size, num_fc_layers, num_fc_neurons, rgb)

                        # Create the training/validation dataloaders
                        dls = create_custom_dataloaders(X_train, Y_train, X_valid, Y_valid)
                        train_dataloaders = DataLoaders(dls[0], dls[1])

                        # Find the transition matrix of the data (if needed), otherwise just train the model
                        # using its existin transition matrix
                        if hyperparameters['find_transition_matrix']:
                            model, recorder = custom_train(model, train_dataloaders, num_epochs=10, transition_matrix=None)
                            transition_matrix = find_transition_matrix(model, train_dataloaders.train, 4)

                            # Once the transition matrix is found, create a new model that uses the transition matrix
                            model = Net(input_dim_size, num_conv_layers, dimension_size, num_fc_layers, num_fc_neurons, rgb)
                            model.cuda()
                            model, recorder = custom_train(model, train_dataloaders, num_epochs=epochs, transition_matrix=transition_matrix)
                        else:
                            model, recorder = custom_train(model, train_dataloaders, num_epochs=epochs, transition_matrix=transition_matrix)

                        print(model)

                        clear_output()

                        # Set up the model's result information
                        model_info = {
                            'trial': counter,
                            'valid': True,
                            'num_epochs': epochs,
                            'conv_layers': num_conv_layers,
                            'conv_dimension_size': dimension_size,
                            'fc_layers': num_fc_layers,
                            'fc_neurons': num_fc_neurons,
                            'avg_val_accuracy': recorder.values[0][-1],
                            'transition_matrix': transition_matrix.tolist()
                        }

                        # Save the trial information to the hard disk
                        with open(f'{dataset}_cnn_results/trial_{counter}', 'w') as f:
                            f.write(json.dumps(model_info))

                        # Replace the best model with the new best model
                        if model_info['avg_val_accuracy'] > best_model['avg_val_accuracy']:
                            best_model = model_info

## 4.2 Dataset 1: FashionMINIST0.3.npz

In [None]:
custom_grid_search('FashionMINIST0.3', find_transition_matrix=False, conv_layers=conv_layers, conv_dimension_size=conv_dimension_size, fc_layers=fc_layers, fc_neurons=fc_neurons, num_epochs=num_epochs)

## 4.3 Dataset 2: FashionMINIST0.6.npz

In [None]:
custom_grid_search('FashionMINIST0.6', find_transition_matrix=False, conv_layers=conv_layers, conv_dimension_size=conv_dimension_size, fc_layers=fc_layers, fc_neurons=fc_neurons, num_epochs=num_epochs)

## 4.4 Dataset 3: CIFAR.npz

In [None]:
custom_grid_search('CIFAR10', find_transition_matrix=True, conv_layers=conv_layers, conv_dimension_size=conv_dimension_size, fc_layers=fc_layers, fc_neurons=fc_neurons, num_epochs=num_epochs)

# 5. Results

## 5.1 Find the best performing model for each dataset (judged by highest validation accuracy)

In [None]:
def get_best_model(dataset):
    path = f'{dataset}_cnn_results'

    best_model = None
    best_accuracy = 0
    for trial in os.listdir(path):
        with open(f'{path}/{trial}', 'r') as f:
            results = json.loads(f.read())
            if results['valid'] and results['avg_val_accuracy'] > best_accuracy:
                best_model = results
                best_accuracy = results['avg_val_accuracy']

    return best_model

In [None]:
best_cifar_cnn = get_best_model('CIFAR10')
best_fashion3_cnn = get_best_model('FashionMINIST0.3')
best_fashion6_cnn = get_best_model('FashionMINIST0.6')

print(best_cifar_cnn)
print(best_fashion3_cnn)
print(best_fashion6_cnn)

{'trial': 282, 'valid': True, 'num_epochs': 100, 'conv_layers': 1, 'conv_dimension_size': 8, 'fc_layers': 2, 'fc_neurons': 1000, 'avg_val_accuracy': 0.5945000052452087, 'transition_matrix': [[0.9996451139450073, 9.148226620681044e-11, 3.619066410465166e-05, 5.073326065030415e-06], [0.0002931593044195324, 0.9999998807907104, 5.171649536350742e-05, 2.4422119437872425e-08], [5.638430957333185e-05, 9.323639460490085e-08, 0.999903678894043, 5.463606385092135e-07], [5.37204277861747e-06, 5.1986872584564026e-09, 8.429781701124739e-06, 0.9999943971633911]]}
{'trial': 33, 'valid': True, 'num_epochs': 10, 'conv_layers': 1, 'conv_dimension_size': 6, 'fc_layers': 5, 'fc_neurons': 1000, 'avg_val_accuracy': 0.6637499928474426, 'transition_matrix': [[0.699999988079071, 0.30000001192092896, 0.0, 0.0], [0.0, 0.699999988079071, 0.30000001192092896, 0.0], [0.0, 0.0, 0.699999988079071, 0.30000001192092896], [0.30000001192092896, 0.0, 0.0, 0.699999988079071]]}
{'trial': 42, 'valid': True, 'num_epochs': 10,

## 5.2 Evaluate model performance on test set
For each CNN model, train it on a random 80:20 split of training:validation data, find the transition matrix if necessary, and then obtain the test accuracy. Repeat this 10 times.

In [None]:
def train_transition_test_loop(dataset, best_model, find_the_transition_matrix=True):

    # Set up variables for the relevant dataset
    match dataset:
        case 'CIFAR10':
            data = np.load('2024_A2_datasets/CIFAR10.npz')
            input_dim_size = 32
            rgb = True
            transition_matrix = None
            find_the_transition_matrix = find_the_transition_matrix
        case 'FashionMNIST0.3':
            data = np.load('2024_A2_datasets/FashionMNIST0.3.npz')
            input_dim_size = 28
            rgb = False
            transition_matrix = None
            #DEBUG - uncomment for submission
    # torch.tensor([
            #     [0.7, 0.3, 0, 0],
            #     [0, 0.7, 0.3, 0],
            #     [0, 0, 0.7, 0.3],
            #     [0.3, 0, 0, 0.7]
            # ])
        case 'FashionMNIST0.6':
            data = np.load('2024_A2_datasets/FashionMNIST0.6.npz')
            input_dim_size = 28
            rgb = False
            transition_matrix = None
            #DEBUG - uncomment for submission
            # torch.tensor([
            #     [0.4, 0.2, 0.2, 0.2],
            #     [0.2, 0.4, 0.2, 0.2],
            #     [0.2, 0.2, 0.4, 0.2],
            #     [0.2, 0.2, 0.2, 0.4]
            # ])



    # Conduct 10 experiments on the datasets
    test_accuracies = []
    for i in range(10):
        # Create the training dataloaders (random split of 80:20 training:validation)
        train_valid_dataloaders = create_training_dataloaders(data)

        # Create the model
        model = Net(
            input_dim_size,
            best_model['conv_layers'],
            best_model['conv_dimension_size'],
            best_model['fc_layers'],
            best_model['fc_neurons'],
            rgb
        )

        # Move the model to the GPU
        model.cuda()

        # Find the transition matrix of the data (if needed), otherwise just train the model
        # using its existing transition matrix
        if find_the_transition_matrix:
            model, recorder = custom_train(model, train_valid_dataloaders, num_epochs=10, transition_matrix=None)
            transition_matrix = find_transition_matrix(model, train_valid_dataloaders.train, 4)

            # Once the transition matrix is found, create a new model that uses the transition matrix
            model = Net(
                input_dim_size,
                best_model['conv_layers'],
                best_model['conv_dimension_size'],
                best_model['fc_layers'],
                best_model['fc_neurons'],
                rgb
            )
            model.cuda()
            model, recorder = custom_train(model, train_valid_dataloaders, num_epochs=best_model['num_epochs'], transition_matrix=transition_matrix)
        else:
            model, recorder = custom_train(model, train_valid_dataloaders, num_epochs=best_model['num_epochs'], transition_matrix=transition_matrix)

        # Test the model on the test dataset
        test_accuracy = custom_test(data, model)
        test_accuracies.append(test_accuracy)

    if transition_matrix:
        return test_accuracies, statistics.mean(test_accuracies), statistics.pstdev(test_accuracies), transition_matrix.tolist()

    return test_accuracies, statistics.mean(test_accuracies), statistics.pstdev(test_accuracies), transition_matrix

In [None]:
if not os.path.exists('cnn_cifar_stats'):
    cifar_stats = train_transition_test_loop('CIFAR10', best_cifar_cnn, find_the_transition_matrix=True)
    with open('cnn_cifar_stats', 'w') as f:
        f.write(str(cifar_stats))
if not os.path.exists('cnn_fashion3_stats'):
    fashion3_stats = train_transition_test_loop('FashionMNIST0.3', best_fashion3_cnn, find_the_transition_matrix=False)
    with open('cnn_fashion3_stats', 'w') as f:
        f.write(str(fashion3_stats))
if not os.path.exists('cnn_fashion6_stats'):
    fashion6_stats = train_transition_test_loop('FashionMNIST0.6', best_fashion6_cnn, find_the_transition_matrix=False)
    with open('cnn_fashion6_stats', 'w') as f:
        f.write(str(fashion6_stats))

# Train the two fashion datasets again to see what kind of transition matricies they produce, evaluating the effectiveness
# of creating transition matricies
if not os.path.exists('cnn_fashion3_stats_tm'):
    fashion3_stats_tm = train_transition_test_loop('FashionMNIST0.3', best_fashion3_cnn, find_the_transition_matrix=True)
    with open('cnn_fashion3_stats_tm', 'w') as f:
        f.write(str(fashion3_stats_tm))
if not os.path.exists('cnn_fashion6_stats_tm'):
    fashion6_stats_tm = train_transition_test_loop('FashionMNIST0.6', best_fashion6_cnn, find_the_transition_matrix=True)
    with open('cnn_fashion6_stats_tm', 'w') as f:
        f.write(str(fashion6_stats_tm))

if not os.path.exists('cnn_cifar_without_tm'):
    cifar_without_tm = train_transition_test_loop('CIFAR10', best_cifar_cnn, find_the_transition_matrix=False)
    with open('cnn_cifar_without_tm', 'w') as f:
        f.write(str(cifar_without_tm))

if not os.path.exists('cnn_fashion3_without_tm'):
    fashion3_without_tm = train_transition_test_loop('FashionMNIST0.3', best_fashion3_cnn, find_the_transition_matrix=False)
    with open('cnn_fashion3_without_tm', 'w') as f:
        f.write(str(fashion3_without_tm))

if not os.path.exists('cnn_fashion6_without_tm'):
    fashion6_without_tm = train_transition_test_loop('FashionMNIST0.6', best_fashion6_cnn, find_the_transition_matrix=False)
    with open('cnn_fashion6_without_tm', 'w') as f:
        f.write(str(fashion6_without_tm))

print(f'CIFAR stats: {cifar_stats}\n')
print(f'Fashion3 stats: {fashion3_stats}\n')
print(f'Fashion6 stats: {fashion6_stats}\n')
print()
print(f'Fashion3 predicted transition matrix: {fashion3_stats_tm}\n')
print(f'Fashion6 predicted transition matrix: {fashion6_stats_tm}\n')

epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.134896,0.47125,1.101318,0.541,00:01
1,1.021765,0.5755,1.077713,0.55875,00:01
2,0.967927,0.599312,1.070238,0.57175,00:01
3,0.939797,0.617625,1.025563,0.5885,00:01
4,0.866223,0.648188,1.023313,0.603,00:01
5,0.839341,0.669625,1.034716,0.60125,00:01
6,0.743126,0.706,1.07448,0.58525,00:01
7,0.677847,0.735062,1.145095,0.58575,00:01
8,0.622993,0.772375,1.25506,0.56625,00:01
9,0.495768,0.814687,1.336233,0.5605,00:02


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.109755,0.49375,1.105173,0.53625,00:02
1,1.001264,0.569687,1.067709,0.559,00:03
2,0.984496,0.594375,1.064471,0.56025,00:03
3,0.935357,0.6225,1.037245,0.58225,00:03
4,0.89043,0.64625,1.013358,0.59475,00:03
5,0.850891,0.66725,1.020254,0.5915,00:03
6,0.792324,0.690562,1.074054,0.5915,00:03
7,0.738545,0.717,1.096299,0.57725,00:03
8,0.652567,0.754062,1.195201,0.5705,00:03
9,0.576711,0.792188,1.179139,0.56175,00:03


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.136005,0.47325,1.057651,0.57,00:02
1,1.05149,0.554187,1.076671,0.5445,00:03
2,1.017357,0.587563,1.012456,0.58525,00:03
3,0.956152,0.614563,1.036631,0.57825,00:03
4,0.892383,0.642187,1.015641,0.5925,00:03
5,0.812251,0.67675,1.043516,0.606,00:03
6,0.745752,0.70975,1.093874,0.5805,00:03
7,0.647429,0.753688,1.130713,0.6025,00:03
8,0.551017,0.799125,1.221132,0.57875,00:03
9,0.440452,0.849187,1.333732,0.5585,00:03


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.093088,0.486,1.086229,0.5525,00:03
1,1.029601,0.572625,1.143577,0.5285,00:03
2,0.96312,0.60825,1.047743,0.5645,00:03
3,0.908414,0.632875,1.068709,0.576,00:03
4,0.857102,0.659312,1.056841,0.587,00:03
5,0.794573,0.685375,1.106405,0.56725,00:03
6,0.717366,0.71825,1.119033,0.57,00:03
7,0.64527,0.757875,1.217345,0.563,00:03
8,0.552848,0.801188,1.250565,0.562,00:03
9,0.43864,0.843188,1.389776,0.5705,00:03


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.232854,0.381125,1.287032,0.443,00:03
1,1.103559,0.50425,1.059276,0.54925,00:03
2,1.025049,0.567187,1.057984,0.557,00:03
3,0.987782,0.5995,0.998341,0.59075,00:03
4,0.945059,0.617625,1.003583,0.592,00:03
5,0.942801,0.628062,1.004113,0.58725,00:03
6,0.891145,0.642687,0.955328,0.62175,00:03
7,0.847375,0.66225,0.962974,0.6175,00:03
8,0.84997,0.671188,0.993497,0.6025,00:03
9,0.791995,0.68575,1.003776,0.61525,00:03


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.166806,0.436125,1.127963,0.489,00:03
1,1.07123,0.545375,1.031619,0.5745,00:03
2,1.025257,0.589063,1.017924,0.589,00:03
3,0.956149,0.613437,1.027455,0.58975,00:03
4,0.902236,0.643875,1.00174,0.60175,00:03
5,0.827614,0.674875,1.072111,0.58175,00:03
6,0.742118,0.712188,1.077986,0.5855,00:03
7,0.663892,0.757187,1.101918,0.5815,00:03
8,0.529746,0.811688,1.198876,0.58,00:03
9,0.433659,0.860875,1.2885,0.56775,00:03


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.113526,0.483625,1.063426,0.559,00:03
1,1.018398,0.575875,0.996604,0.594,00:03
2,0.962835,0.606938,0.999244,0.596,00:03
3,0.904198,0.634938,0.998174,0.58375,00:03
4,0.845656,0.661,0.994563,0.60325,00:03
5,0.76871,0.6985,1.020141,0.59575,00:03
6,0.689073,0.7385,1.062114,0.58,00:03
7,0.587677,0.787188,1.125494,0.57125,00:03
8,0.487021,0.835187,1.226565,0.56775,00:03
9,0.348612,0.884125,1.331609,0.57375,00:03


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.075743,0.528063,1.05078,0.56625,00:03
1,0.991785,0.592625,1.102983,0.55875,00:03
2,0.918345,0.634562,1.020036,0.5885,00:03
3,0.84597,0.667,1.025892,0.57925,00:03
4,0.74292,0.710312,1.097191,0.56025,00:03
5,0.636541,0.76525,1.184414,0.5565,00:03
6,0.484174,0.824,1.28041,0.55325,00:03
7,0.363664,0.882563,1.438404,0.545,00:03
8,0.242909,0.926063,1.641908,0.541,00:03
9,0.167989,0.953375,1.824508,0.53975,00:03


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.092716,0.486562,1.040376,0.56575,00:03
1,1.018949,0.570125,1.046652,0.561,00:03
2,0.9578,0.607687,0.987815,0.599,00:03
3,0.924379,0.626875,0.976285,0.59925,00:03
4,0.866109,0.6575,0.994005,0.59675,00:03
5,0.78212,0.685562,0.997004,0.5915,00:03
6,0.721864,0.715312,1.144977,0.55625,00:03
7,0.634662,0.758062,1.064147,0.584,00:03
8,0.517229,0.808375,1.206166,0.5655,00:03
9,0.450234,0.847625,1.268425,0.57225,00:03


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.128369,0.47,1.129867,0.5185,00:03
1,1.034907,0.569188,1.036886,0.57075,00:03
2,0.961513,0.605375,1.036385,0.5835,00:03
3,0.891372,0.6345,1.081792,0.5655,00:03
4,0.825088,0.676813,1.043027,0.59075,00:03
5,0.73336,0.71575,1.082494,0.58925,00:03
6,0.606816,0.773125,1.156836,0.57475,00:03
7,0.491391,0.82625,1.274141,0.56075,00:03
8,0.336808,0.886625,1.438707,0.552,00:03
9,0.2327,0.929625,1.668334,0.566,00:03


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,0.772482,0.632396,0.75368,0.656458,00:03
1,0.734544,0.664479,0.818743,0.660417,00:04
2,0.706876,0.674635,0.739801,0.662708,00:04
3,0.69336,0.680781,0.750282,0.665417,00:04
4,0.675689,0.689479,0.75496,0.659375,00:04
5,0.647127,0.695938,0.791066,0.657083,00:04
6,0.615441,0.70474,0.803864,0.660208,00:04
7,0.605462,0.716042,0.89249,0.659167,00:04
8,0.570599,0.729323,0.878404,0.657917,00:04
9,0.549957,0.745677,1.026962,0.633333,00:04


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,0.773406,0.626458,0.765215,0.659375,00:03
1,0.740747,0.65974,0.748568,0.666042,00:04
2,0.723605,0.668698,0.756804,0.665417,00:04
3,0.707378,0.67401,0.743529,0.668333,00:04
4,0.690871,0.679115,0.746696,0.668542,00:04
5,0.669338,0.685417,0.784823,0.661458,00:04
6,0.657932,0.691562,0.770627,0.667292,00:04
7,0.643467,0.696094,0.849713,0.665833,00:04
8,0.638662,0.701979,0.797906,0.664375,00:04
9,0.605391,0.711458,0.841872,0.658542,00:04


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,0.775208,0.612344,0.765728,0.656667,00:03
1,0.73337,0.662135,0.739305,0.661042,00:03
2,0.71251,0.672135,0.750853,0.671042,00:04
3,0.681579,0.677969,0.761416,0.667917,00:04
4,0.675111,0.683542,0.745997,0.670417,00:04
5,0.651526,0.693177,0.822829,0.66,00:04
6,0.636758,0.700104,0.813553,0.65625,00:04
7,0.615843,0.70901,0.795014,0.66125,00:04
8,0.580771,0.720521,0.885854,0.653333,00:04
9,0.559534,0.737292,0.903768,0.651458,00:04


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,0.77574,0.636094,0.76966,0.661875,00:03
1,0.737052,0.662031,0.754947,0.670208,00:04
2,0.726143,0.669062,0.739681,0.674375,00:04
3,0.71774,0.677031,0.73358,0.674583,00:04
4,0.687545,0.680781,0.732081,0.677292,00:04
5,0.67299,0.686302,0.742644,0.677292,00:05
6,0.660155,0.692969,0.758651,0.669583,00:04
7,0.647484,0.698646,0.794734,0.667292,00:04
8,0.628901,0.704687,0.807556,0.671042,00:04
9,0.612099,0.710312,0.813823,0.671667,00:04


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,0.770609,0.621094,0.764834,0.649583,00:03
1,0.737063,0.668802,0.749379,0.657083,00:03
2,0.699921,0.678542,0.746099,0.660208,00:04
3,0.663789,0.686823,0.767048,0.66,00:04
4,0.660393,0.693542,0.762931,0.66,00:04
5,0.638593,0.701406,0.793363,0.653125,00:04
6,0.614915,0.710312,0.821219,0.653958,00:04
7,0.605854,0.719792,0.812096,0.654583,00:04
8,0.56401,0.73224,0.974169,0.635833,00:04
9,0.533884,0.747656,0.933099,0.637083,00:04


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,0.783329,0.628646,0.747473,0.664792,00:03
1,0.747186,0.659583,0.732773,0.668333,00:03
2,0.726225,0.667396,0.735984,0.669792,00:04
3,0.712435,0.674375,0.734383,0.666042,00:04
4,0.686311,0.676667,0.747444,0.668958,00:04
5,0.684802,0.680729,0.735227,0.671875,00:04
6,0.679932,0.686719,0.75877,0.670417,00:04
7,0.658245,0.691302,0.756855,0.671875,00:04
8,0.645379,0.695365,0.764112,0.67,00:04
9,0.639985,0.700729,0.769667,0.671667,00:04


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,0.778838,0.628698,0.783109,0.658125,00:03
1,0.750357,0.661823,0.742235,0.665417,00:03
2,0.71406,0.668385,0.734226,0.671667,00:03
3,0.705469,0.675208,0.746389,0.66125,00:04
4,0.68859,0.681198,0.759959,0.664583,00:04
5,0.684995,0.686927,0.736537,0.675,00:04
6,0.651087,0.693542,0.767935,0.670208,00:04
7,0.648145,0.697135,0.7875,0.670417,00:04
8,0.626675,0.706042,0.803904,0.669375,00:04
9,0.602453,0.713177,0.871947,0.669375,00:04


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,0.849266,0.539167,0.842389,0.629167,00:03
1,0.79448,0.640573,0.777764,0.650417,00:03
2,0.769474,0.649948,0.766736,0.655,00:04
3,0.763158,0.654948,0.77365,0.65375,00:04
4,0.747818,0.658333,0.754941,0.661042,00:04
5,0.737649,0.65974,0.745425,0.666042,00:04
6,0.753525,0.661979,0.743106,0.659792,00:04
7,0.738229,0.663437,0.728903,0.670208,00:04
8,0.728688,0.668437,0.727261,0.66625,00:04
9,0.726735,0.668177,0.716169,0.67,00:04


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,0.785683,0.612135,0.766661,0.658958,00:03
1,0.746635,0.662344,0.743754,0.66625,00:03
2,0.712484,0.67151,0.75337,0.6625,00:04
3,0.698971,0.676562,0.752561,0.669583,00:04
4,0.674831,0.685469,0.765212,0.668125,00:04
5,0.669677,0.688646,0.764138,0.667083,00:04
6,0.650982,0.696771,0.78187,0.667708,00:04
7,0.636964,0.702552,0.868276,0.650417,00:04
8,0.612106,0.70901,0.827496,0.660208,00:04
9,0.583842,0.721875,1.002301,0.658958,00:04


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,0.773209,0.622396,0.782609,0.64625,00:03
1,0.732563,0.667656,0.766966,0.650417,00:03
2,0.717694,0.677135,0.75151,0.654583,00:04
3,0.697176,0.681458,0.747499,0.654583,00:04
4,0.678985,0.690833,0.756046,0.650417,00:04
5,0.660995,0.693073,0.756699,0.659375,00:04
6,0.641205,0.699948,0.776055,0.654167,00:04
7,0.625489,0.706979,0.858419,0.653125,00:04
8,0.61294,0.715208,0.841115,0.647083,00:04
9,0.596711,0.726562,0.854877,0.648125,00:04


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.360574,0.349219,1.35385,0.376875,00:03
1,1.350541,0.375052,1.351041,0.374792,00:03
2,1.349077,0.376094,1.35486,0.366667,00:03
3,1.346637,0.381563,1.355444,0.377083,00:03
4,1.336688,0.384427,1.351369,0.375208,00:03
5,1.329663,0.387344,1.354111,0.375625,00:03
6,1.326543,0.39276,1.361283,0.361458,00:03
7,1.321741,0.397708,1.371279,0.357292,00:03
8,1.297135,0.407969,1.375758,0.359167,00:03
9,1.283555,0.418281,1.405729,0.355417,00:03


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.353004,0.352292,1.349942,0.381042,00:03
1,1.349156,0.373125,1.352784,0.379583,00:03
2,1.345794,0.376719,1.347477,0.380208,00:03
3,1.337794,0.3825,1.348906,0.385,00:03
4,1.334912,0.384167,1.346685,0.387917,00:03
5,1.331655,0.39125,1.348971,0.385625,00:03
6,1.322516,0.396094,1.355054,0.378125,00:03
7,1.308348,0.405625,1.363361,0.375833,00:03
8,1.285501,0.420104,1.37692,0.36625,00:03
9,1.262578,0.434844,1.394046,0.355833,00:03


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.348785,0.364688,1.348229,0.372708,00:03
1,1.34624,0.378854,1.347909,0.374167,00:03
2,1.341076,0.384427,1.349071,0.371667,00:03
3,1.333182,0.391406,1.353429,0.3725,00:03
4,1.32752,0.399635,1.359169,0.362917,00:03
5,1.306572,0.409219,1.377459,0.357083,00:03
6,1.287858,0.420938,1.391267,0.357292,00:03
7,1.252999,0.440885,1.40994,0.340208,00:03
8,1.215614,0.464583,1.428966,0.3475,00:03
9,1.162432,0.493958,1.467779,0.3275,00:03


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.354202,0.364792,1.353652,0.369792,00:03
1,1.347138,0.37849,1.351106,0.376042,00:03
2,1.343354,0.382812,1.354333,0.368333,00:03
3,1.331812,0.388333,1.356244,0.373958,00:03
4,1.329765,0.392448,1.364504,0.359167,00:03
5,1.312644,0.405052,1.369031,0.357083,00:03
6,1.283,0.422396,1.377619,0.362292,00:03
7,1.255295,0.441875,1.404061,0.353958,00:03
8,1.209245,0.466823,1.448577,0.339792,00:03
9,1.164152,0.495365,1.474558,0.326042,00:03


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.353096,0.365156,1.349858,0.376042,00:03
1,1.34933,0.378802,1.351786,0.373125,00:03
2,1.336848,0.388438,1.354036,0.376875,00:03
3,1.327087,0.393021,1.360002,0.3675,00:03
4,1.315048,0.400833,1.368762,0.366875,00:03
5,1.301857,0.410885,1.37636,0.359792,00:03
6,1.274271,0.431563,1.383286,0.354375,00:03
7,1.239088,0.456302,1.407707,0.338958,00:03
8,1.17268,0.489635,1.470445,0.335625,00:03
9,1.123322,0.524115,1.469648,0.3125,00:03


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.359221,0.348229,1.349288,0.3725,00:03
1,1.354725,0.37599,1.345925,0.384375,00:03
2,1.34486,0.378698,1.345296,0.386042,00:03
3,1.343813,0.38151,1.346207,0.384167,00:03
4,1.333347,0.384583,1.34682,0.380625,00:03
5,1.336864,0.39,1.34743,0.3775,00:03
6,1.322816,0.393646,1.354794,0.377083,00:03
7,1.31877,0.403594,1.35956,0.37875,00:03
8,1.298753,0.410469,1.368984,0.370625,00:03
9,1.263611,0.427656,1.387004,0.347083,00:03


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.35375,0.363802,1.3533,0.372708,00:03
1,1.346838,0.375677,1.347004,0.381667,00:03
2,1.35012,0.379844,1.350577,0.377292,00:03
3,1.343249,0.385156,1.348683,0.376667,00:03
4,1.334597,0.390312,1.357831,0.372083,00:03
5,1.317278,0.399375,1.368316,0.369375,00:03
6,1.29577,0.412656,1.379638,0.370625,00:03
7,1.266871,0.43099,1.399713,0.358333,00:03
8,1.240985,0.450312,1.423349,0.344167,00:03
9,1.179774,0.475208,1.482164,0.332292,00:03


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.358978,0.365312,1.350068,0.375833,00:03
1,1.346339,0.379323,1.350517,0.376042,00:03
2,1.334155,0.385938,1.352895,0.372708,00:03
3,1.330974,0.389375,1.357358,0.366458,00:03
4,1.316777,0.396042,1.367364,0.363125,00:03
5,1.299196,0.406875,1.37693,0.358333,00:03
6,1.277965,0.425833,1.3963,0.3525,00:03
7,1.243017,0.448438,1.41816,0.341875,00:03
8,1.198549,0.479427,1.455784,0.33,00:03
9,1.142542,0.515885,1.503403,0.327708,00:03


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.35524,0.358021,1.349386,0.372917,00:03
1,1.350443,0.376771,1.347989,0.382708,00:03
2,1.342059,0.382292,1.353544,0.377083,00:03
3,1.331009,0.386927,1.354544,0.3725,00:03
4,1.320342,0.394844,1.362773,0.370625,00:03
5,1.309088,0.40349,1.366889,0.366458,00:03
6,1.289401,0.420417,1.374058,0.358125,00:03
7,1.25635,0.437031,1.406125,0.351875,00:03
8,1.225742,0.457865,1.423898,0.329167,00:03
9,1.173126,0.485521,1.451681,0.339583,00:03


epoch,train_loss,train_accuracy,valid_loss,valid_accuracy,time
0,1.352259,0.361354,1.349916,0.383333,00:03
1,1.350355,0.379115,1.347161,0.380208,00:03
2,1.3432,0.383802,1.349179,0.3825,00:03
3,1.340418,0.389115,1.35058,0.376875,00:03
4,1.327478,0.393958,1.355215,0.375833,00:03
5,1.312054,0.404219,1.363052,0.377917,00:03
6,1.290305,0.41776,1.386116,0.3575,00:03
7,1.264244,0.437969,1.402189,0.339375,00:03
8,1.217914,0.465156,1.436907,0.345833,00:03
9,1.175491,0.49349,1.468287,0.335833,00:03


CIFAR stats: ([0.491, 0.482, 0.447], [0.247, 0.243, 0.261])



NameError: name 'fashion3_stats' is not defined