In [1]:
import os
from pathlib import Path
import sys

sys.path.insert(0, str(Path(os.getcwd()).parent.parent))

In [2]:
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.nn import functional as F

from src.data_analysis.model_training import FCNArtifact, CNNArtifact

In [3]:
# This function helps count the number of parameters in a model
def num_params(model):
  return np.sum([np.prod(i.shape) for i in model.parameters()]) # if len(i.shape) > 1])

In [None]:
model_100k = FCNArtifact(
    optimizer=torch.optim.Adam,
    objective=nn.CrossEntropyLoss(),
    num_epochs=40,
    hidden_layer_dims=[128, 64],
    weight_init_method='kaiming_uniform',
    batch_norm=True,
    activation_function=F.relu,
    dropout_rate_layers=[.5, .5],  # No dropout
    optimizer_hyperparams={'lr': 0.001, 'betas': (0.9, 0.999)},
    dataset_name='fashion',
)
print(num_params(model_100k.model).item())

In [10]:
model_100k.runner(record_experiment=True, record_experiment_kwargs={"to_compare_with_cnn": True})

epoch:39 train loss:0.1925 train accuracy:0.9250: 100%|██████████| 4240/4240 [02:04<00:00, 33.92it/s]


Mean Accuracy Across Each Batch of the test set: 0.8849 ± 0.01620


In [5]:
model_200k = FCNArtifact(
    optimizer=torch.optim.Adam,
    objective=nn.CrossEntropyLoss(),
    num_epochs=40,
    hidden_layer_dims=[220, 128],
    weight_init_method='kaiming_uniform',
    batch_norm=True,
    activation_function=F.relu,
    dropout_rate_layers=[.5, .5],  # No dropout
    optimizer_hyperparams={'lr': 0.001, 'betas': (0.9, 0.999)},
    dataset_name='fashion',
)
print(num_params(model_200k.model).item())

202974


In [6]:
model_200k.runner(record_experiment=True, record_experiment_kwargs={"to_compare_with_cnn": True})

epoch:39 train loss:0.2587 train accuracy:0.9125: 100%|██████████| 4240/4240 [02:01<00:00, 35.02it/s]


Mean Accuracy Across Each Batch of the test set: 0.8955 ± 0.01999


In [7]:
model_50k = FCNArtifact(
    optimizer=torch.optim.Adam,
    objective=nn.CrossEntropyLoss(),
    num_epochs=40,
    hidden_layer_dims=[64, 32],
    weight_init_method='kaiming_uniform',
    batch_norm=True,
    activation_function=F.relu,
    dropout_rate_layers=[.5, .5],  # No dropout
    optimizer_hyperparams={'lr': 0.001, 'betas': (0.9, 0.999)},
    dataset_name='fashion',
)
print(num_params(model_50k.model).item())

52842


In [8]:
model_50k.runner(record_experiment=True, record_experiment_kwargs={"to_compare_with_cnn": True})

epoch:39 train loss:0.3670 train accuracy:0.8708: 100%|██████████| 4240/4240 [02:02<00:00, 34.59it/s]


Mean Accuracy Across Each Batch of the test set: 0.8741 ± 0.02396


In [6]:
# TODO: Determine where and what you want to record in the model experiments this time!

In [6]:
weight_init_method_list = ['kaiming_uniform', 'random_normal', 'xavier_uniform']
learning_rates = [0.0001, 0.001, 0.01, 0.1]
optimizer_list = [torch.optim.SGD, torch.optim.Adam, torch.optim.RMSprop]
momentums = [0.9, 0.95, 0.99]

def run_all_experiments():
    for num_epochs in [30]:
        for weight_init_method in weight_init_method_list:
            for lr in learning_rates:
                for optimizer in optimizer_list:
                    if optimizer == torch.optim.SGD:
                        optimizer_hyperparams = {'lr': lr, 'momentum': np.random.choice(momentums)}
                    elif optimizer == torch.optim.Adam:
                        optimizer_hyperparams = {'lr': lr, 'betas': tuple(np.random.choice(momentums, 2))}
                    elif optimizer == torch.optim.RMSprop:
                        optimizer_hyperparams = {'lr': lr, 'alpha': np.random.choice(momentums)}
                    model = CNNArtifact(
                        optimizer=optimizer,
                        objective=nn.CrossEntropyLoss(),
                        num_epochs=num_epochs,
                        weight_init_method=weight_init_method,
                        batch_norm=True,
                        optimizer_hyperparams=optimizer_hyperparams,
                        dataset_name='fashion',
                    )
                    model.runner(record_experiment=True)


In [7]:
run_all_experiments()

epoch:29 train loss:0.3700 train accuracy:0.8792: 100%|██████████| 3180/3180 [02:47<00:00, 19.00it/s]


Mean Accuracy Across Each Batch of the test set: 0.8641 ± 0.01862


epoch:29 train loss:0.2388 train accuracy:0.9125: 100%|██████████| 3180/3180 [02:42<00:00, 19.57it/s]


Mean Accuracy Across Each Batch of the test set: 0.9021 ± 0.02198


epoch:29 train loss:0.2841 train accuracy:0.9208: 100%|██████████| 3180/3180 [02:42<00:00, 19.57it/s]


Mean Accuracy Across Each Batch of the test set: 0.9077 ± 0.01698


epoch:29 train loss:0.1933 train accuracy:0.9333: 100%|██████████| 3180/3180 [02:39<00:00, 19.88it/s]


Mean Accuracy Across Each Batch of the test set: 0.9076 ± 0.01675


epoch:29 train loss:0.1707 train accuracy:0.9375: 100%|██████████| 3180/3180 [02:42<00:00, 19.53it/s]


Mean Accuracy Across Each Batch of the test set: 0.9128 ± 0.03840


epoch:29 train loss:0.1487 train accuracy:0.9333: 100%|██████████| 3180/3180 [02:47<00:00, 19.02it/s]


Mean Accuracy Across Each Batch of the test set: 0.9146 ± 0.02094


epoch:29 train loss:0.1858 train accuracy:0.9333: 100%|██████████| 3180/3180 [02:54<00:00, 18.21it/s]


Mean Accuracy Across Each Batch of the test set: 0.9141 ± 0.01784


epoch:29 train loss:2.2999 train accuracy:0.3167: 100%|██████████| 3180/3180 [02:47<00:00, 19.03it/s]      


Mean Accuracy Across Each Batch of the test set: 0.0991 ± 0.01972


epoch:29 train loss:0.1441 train accuracy:0.9458: 100%|██████████| 3180/3180 [02:49<00:00, 18.77it/s]


Mean Accuracy Across Each Batch of the test set: 0.9186 ± 0.02250


epoch:29 train loss:0.8043 train accuracy:0.7208: 100%|██████████| 3180/3180 [02:45<00:00, 19.17it/s]


Mean Accuracy Across Each Batch of the test set: 0.8348 ± 0.02300


epoch:29 train loss:61866317517095436288.0000 train accuracy:0.0958: 100%|██████████| 3180/3180 [02:50<00:00, 18.63it/s]


Mean Accuracy Across Each Batch of the test set: 0.0991 ± 0.02061


epoch:29 train loss:0.4545 train accuracy:0.8500: 100%|██████████| 3180/3180 [02:47<00:00, 18.95it/s]


Mean Accuracy Across Each Batch of the test set: 0.8647 ± 0.03514


epoch:29 train loss:1.1858 train accuracy:0.8583: 100%|██████████| 3180/3180 [02:46<00:00, 19.09it/s]


Mean Accuracy Across Each Batch of the test set: 0.8940 ± 0.01878


epoch:29 train loss:1.1412 train accuracy:0.8958: 100%|██████████| 3180/3180 [02:48<00:00, 18.89it/s]


Mean Accuracy Across Each Batch of the test set: 0.8769 ± 0.01943


epoch:29 train loss:1.8655 train accuracy:0.8458: 100%|██████████| 3180/3180 [02:49<00:00, 18.78it/s]


Mean Accuracy Across Each Batch of the test set: 0.8654 ± 0.02419


epoch:29 train loss:0.2956 train accuracy:0.8958: 100%|██████████| 3180/3180 [02:45<00:00, 19.25it/s]


Mean Accuracy Across Each Batch of the test set: 0.8837 ± 0.02233


epoch:29 train loss:0.2159 train accuracy:0.9042: 100%|██████████| 3180/3180 [02:43<00:00, 19.40it/s]


Mean Accuracy Across Each Batch of the test set: 0.9096 ± 0.01752


epoch:29 train loss:0.1715 train accuracy:0.9500: 100%|██████████| 3180/3180 [02:43<00:00, 19.48it/s]


Mean Accuracy Across Each Batch of the test set: 0.9035 ± 0.01795


epoch:29 train loss:0.9820 train accuracy:0.6750: 100%|██████████| 3180/3180 [02:41<00:00, 19.70it/s]


Mean Accuracy Across Each Batch of the test set: 0.7883 ± 0.02718


epoch:29 train loss:0.2447 train accuracy:0.9167: 100%|██████████| 3180/3180 [02:45<00:00, 19.20it/s]


Mean Accuracy Across Each Batch of the test set: 0.9065 ± 0.01981


epoch:29 train loss:0.2232 train accuracy:0.9333: 100%|██████████| 3180/3180 [02:44<00:00, 19.33it/s]


Mean Accuracy Across Each Batch of the test set: 0.9062 ± 0.03188


epoch:29 train loss:2.2919 train accuracy:0.0958: 100%|██████████| 3180/3180 [02:46<00:00, 19.10it/s]


Mean Accuracy Across Each Batch of the test set: 0.1172 ± 0.02078


epoch:29 train loss:0.5618 train accuracy:0.8208: 100%|██████████| 3180/3180 [02:44<00:00, 19.32it/s]


Mean Accuracy Across Each Batch of the test set: 0.8737 ± 0.01898


epoch:29 train loss:2.3055 train accuracy:0.1000: 100%|██████████| 3180/3180 [02:46<00:00, 19.10it/s]


Mean Accuracy Across Each Batch of the test set: 0.1035 ± 0.02799


epoch:29 train loss:0.4436 train accuracy:0.8375: 100%|██████████| 3180/3180 [02:44<00:00, 19.27it/s]


Mean Accuracy Across Each Batch of the test set: 0.8518 ± 0.02148


epoch:29 train loss:0.2077 train accuracy:0.9125: 100%|██████████| 3180/3180 [02:45<00:00, 19.20it/s]


Mean Accuracy Across Each Batch of the test set: 0.9022 ± 0.01690


epoch:29 train loss:0.1952 train accuracy:0.9208: 100%|██████████| 3180/3180 [02:47<00:00, 18.98it/s]


Mean Accuracy Across Each Batch of the test set: 0.9017 ± 0.02331


epoch:29 train loss:0.2922 train accuracy:0.8917: 100%|██████████| 3180/3180 [02:47<00:00, 19.01it/s]


Mean Accuracy Across Each Batch of the test set: 0.9033 ± 0.03028


epoch:29 train loss:0.1134 train accuracy:0.9542: 100%|██████████| 3180/3180 [02:46<00:00, 19.12it/s]


Mean Accuracy Across Each Batch of the test set: 0.9180 ± 0.01456


epoch:29 train loss:0.0995 train accuracy:0.9583: 100%|██████████| 3180/3180 [02:44<00:00, 19.28it/s]


Mean Accuracy Across Each Batch of the test set: 0.9131 ± 0.01875


epoch:29 train loss:0.1298 train accuracy:0.9458: 100%|██████████| 3180/3180 [02:43<00:00, 19.50it/s]


Mean Accuracy Across Each Batch of the test set: 0.9108 ± 0.02339


epoch:29 train loss:0.1381 train accuracy:0.9542: 100%|██████████| 3180/3180 [02:41<00:00, 19.71it/s]


Mean Accuracy Across Each Batch of the test set: 0.9175 ± 0.01780


epoch:29 train loss:0.1801 train accuracy:0.9292: 100%|██████████| 3180/3180 [02:41<00:00, 19.66it/s]


Mean Accuracy Across Each Batch of the test set: 0.9139 ± 0.01610


epoch:29 train loss:0.3589 train accuracy:0.8708: 100%|██████████| 3180/3180 [02:40<00:00, 19.79it/s]


Mean Accuracy Across Each Batch of the test set: 0.9076 ± 0.01770


epoch:29 train loss:0.3253 train accuracy:0.8583: 100%|██████████| 3180/3180 [02:43<00:00, 19.50it/s]


Mean Accuracy Across Each Batch of the test set: 0.8925 ± 0.02022


epoch:29 train loss:0.9245 train accuracy:0.7833: 100%|██████████| 3180/3180 [02:41<00:00, 19.64it/s]


Mean Accuracy Across Each Batch of the test set: 0.8290 ± 0.01987


In [None]:
cnn_50k = CNNArtifact(
    optimizer=torch.optim.Adam,
    objective=nn.CrossEntropyLoss(),
    num_epochs=40,
    weight_init_method='kaiming_uniform',
    batch_norm=True,
    optimizer_hyperparams={'lr': 0.001, 'betas': (0.9, 0.999)},
    dataset_name='fashion',
)
print(num_params(cnn_50k.model).item())

In [4]:
cnn_50k = CNNArtifact(
    optimizer=torch.optim.Adam,
    objective=nn.CrossEntropyLoss(),
    num_epochs=40,
    weight_init_method='kaiming_uniform',
    batch_norm=True,
    optimizer_hyperparams={'lr': 0.001, 'betas': (0.9, 0.999)},
    dataset_name='fashion',
)
print(num_params(cnn_50k.model).item())

54890


In [5]:
cnn_50k.runner(record_experiment=True)

  0%|          | 0/4240 [00:00<?, ?it/s]

epoch:39 train loss:0.2551 train accuracy:0.9125: 100%|██████████| 4240/4240 [03:39<00:00, 19.30it/s]


Mean Accuracy Across Each Batch of the test set: 0.9204 ± 0.02211
