In [1]:
import os
import sys
from pathlib import Path

sys.path.insert(0, str(Path(os.getcwd()).parent.parent))

import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.nn import functional as F

from src.data_analysis.model_training import FCNArtifact, CNNArtifact

In [2]:
# This function helps count the number of parameters in a model
def num_params(model):
  return np.sum([np.prod(i.shape) for i in model.parameters()]) # if len(i.shape) > 1])

In [3]:
model_100k = FCNArtifact(
    optimizer=torch.optim.Adam,
    objective=nn.CrossEntropyLoss(),
    num_epochs=50,
    hidden_layer_dims=[128, 64],
    weight_init_method='kaiming_uniform',
    batch_norm=True,
    activation_function=F.relu,
    dropout_rate_layers=[.5, .5],  # No dropout
    optimizer_hyperparams={'lr': 0.001, 'betas': (0.9, 0.999)},
    dataset_name='fashion',
)
print(num_params(model_100k.model).item())

109770


In [4]:
model_200k = FCNArtifact(
    optimizer=torch.optim.Adam,
    objective=nn.CrossEntropyLoss(),
    num_epochs=50,
    hidden_layer_dims=[220, 128],
    weight_init_method='kaiming_uniform',
    batch_norm=True,
    activation_function=F.relu,
    dropout_rate_layers=[.5, .5],  # No dropout
    optimizer_hyperparams={'lr': 0.001, 'betas': (0.9, 0.999)},
    dataset_name='fashion',
)
print(num_params(model_200k.model).item())

202974


In [5]:
model_50k = FCNArtifact(
    optimizer=torch.optim.Adam,
    objective=nn.CrossEntropyLoss(),
    num_epochs=50,
    hidden_layer_dims=[64, 32],
    weight_init_method='kaiming_uniform',
    batch_norm=True,
    activation_function=F.relu,
    dropout_rate_layers=[.5, .5],  # No dropout
    optimizer_hyperparams={'lr': 0.001, 'betas': (0.9, 0.999)},
    dataset_name='fashion',
)
print(num_params(model_50k.model).item())

52842


In [6]:
model_50k.runner(record_experiment=True, record_experiment_kwargs={"to_compare_with_cnn": True})
model_100k.runner(record_experiment=True, record_experiment_kwargs={"to_compare_with_cnn": True})
model_200k.runner(record_experiment=True, record_experiment_kwargs={"to_compare_with_cnn": True})

epoch:49 train loss:0.2560 train accuracy:0.9083: 100%|██████████| 5300/5300 [02:30<00:00, 35.14it/s]


Mean Accuracy Across Each Batch of the test set: 0.8802 ± 0.01578


epoch:49 train loss:0.2583 train accuracy:0.9125: 100%|██████████| 5300/5300 [02:32<00:00, 34.68it/s]


Mean Accuracy Across Each Batch of the test set: 0.8889 ± 0.02109


epoch:49 train loss:0.1479 train accuracy:0.9250: 100%|██████████| 5300/5300 [02:32<00:00, 34.69it/s]


Mean Accuracy Across Each Batch of the test set: 0.8965 ± 0.01884


In [6]:
# TODO: Determine where and what you want to record in the model experiments this time!

## Train CNN's

In [5]:
num_epochs_list = [50, 75]
learning_rates = [0.0001, 0.0003, 0.0005, 0.0009, 0.001]

def run_all_experiments():
    for num_epochs in num_epochs_list:
        for lr in learning_rates:
            optimizer_hyperparams = {'lr': lr, 'betas': (0.9, 0.999)}
            model = CNNArtifact(
                optimizer=torch.optim.Adam,
                objective=nn.CrossEntropyLoss(),
                num_epochs=num_epochs,
                weight_init_method="xavier_uniform",
                batch_norm=True,
                optimizer_hyperparams=optimizer_hyperparams,
                dataset_name='fashion',
            )
            model.runner(record_experiment=True)


In [6]:
run_all_experiments()

epoch:49 train loss:0.1190 train accuracy:0.9542: 100%|██████████| 5300/5300 [06:39<00:00, 13.25it/s]


Mean Accuracy Across Each Batch of the test set: 0.9176 ± 0.02218


epoch:49 train loss:0.0739 train accuracy:0.9750: 100%|██████████| 5300/5300 [06:39<00:00, 13.28it/s]


Mean Accuracy Across Each Batch of the test set: 0.9183 ± 0.01717


epoch:49 train loss:0.0781 train accuracy:0.9667: 100%|██████████| 5300/5300 [06:56<00:00, 12.72it/s]


Mean Accuracy Across Each Batch of the test set: 0.9218 ± 0.01802


epoch:49 train loss:0.0300 train accuracy:0.9875: 100%|██████████| 5300/5300 [06:28<00:00, 13.65it/s]


Mean Accuracy Across Each Batch of the test set: 0.9229 ± 0.02101


epoch:49 train loss:0.0467 train accuracy:0.9792: 100%|██████████| 5300/5300 [06:15<00:00, 14.11it/s]


Mean Accuracy Across Each Batch of the test set: 0.9236 ± 0.01666


epoch:74 train loss:0.0872 train accuracy:0.9667: 100%|██████████| 7950/7950 [09:13<00:00, 14.36it/s]


Mean Accuracy Across Each Batch of the test set: 0.9200 ± 0.01671


epoch:74 train loss:0.0248 train accuracy:0.9875: 100%|██████████| 7950/7950 [09:10<00:00, 14.44it/s]


Mean Accuracy Across Each Batch of the test set: 0.9196 ± 0.01659


epoch:74 train loss:0.0506 train accuracy:0.9833: 100%|██████████| 7950/7950 [09:06<00:00, 14.55it/s]


Mean Accuracy Across Each Batch of the test set: 0.9219 ± 0.01947


epoch:74 train loss:0.0528 train accuracy:0.9750: 100%|██████████| 7950/7950 [09:08<00:00, 14.49it/s]


Mean Accuracy Across Each Batch of the test set: 0.9202 ± 0.01921


epoch:74 train loss:0.0618 train accuracy:0.9708: 100%|██████████| 7950/7950 [09:07<00:00, 14.53it/s]


Mean Accuracy Across Each Batch of the test set: 0.9238 ± 0.01965


In [None]:
cnn_10k = CNNArtifact(
    optimizer=torch.optim.Adam,
    objective=nn.CrossEntropyLoss(),
    num_epochs=50,
    weight_init_method='xavier_uniform',
    conv_out_channels_list=[6, 10, 16],
    optimizer_hyperparams={'lr': 0.001, 'betas': (0.9, 0.999)},
    dataset_name='fashion',
)
print(num_params(cnn_10k.model).item())

9980


In [34]:
cnn_20k = CNNArtifact(
    optimizer=torch.optim.Adam,
    objective=nn.CrossEntropyLoss(),
    num_epochs=50,
    weight_init_method='xavier_uniform',
    conv_out_channels_list=[8, 15, 30],
    optimizer_hyperparams={'lr': 0.001, 'betas': (0.9, 0.999)},
    dataset_name='fashion',
)
print(num_params(cnn_20k.model).item())

20071


In [37]:
cnn_50k = CNNArtifact(
    optimizer=torch.optim.Adam,
    objective=nn.CrossEntropyLoss(),
    num_epochs=50,
    weight_init_method='xavier_uniform',
    conv_out_channels_list=[16, 30, 60],
    optimizer_hyperparams={'lr': 0.001, 'betas': (0.9, 0.999)},
    dataset_name='fashion',
)
print(num_params(cnn_50k.model).item())

50392


In [38]:
cnn_10k.runner(record_experiment=True)
cnn_20k.runner(record_experiment=True)
cnn_50k.runner(record_experiment=True)

epoch:49 train loss:0.3208 train accuracy:0.8708: 100%|██████████| 5300/5300 [04:02<00:00, 21.82it/s]


Mean Accuracy Across Each Batch of the test set: 0.9002 ± 0.02288


epoch:49 train loss:0.1665 train accuracy:0.9375: 100%|██████████| 5300/5300 [04:15<00:00, 20.73it/s]


Mean Accuracy Across Each Batch of the test set: 0.9091 ± 0.01510


epoch:49 train loss:0.1111 train accuracy:0.9625: 100%|██████████| 5300/5300 [05:19<00:00, 16.59it/s]


Mean Accuracy Across Each Batch of the test set: 0.9152 ± 0.01882


In [None]:
cnn_50k = CNNArtifact(
    optimizer=torch.optim.Adam,
    objective=nn.CrossEntropyLoss(),
    num_epochs=40,
    weight_init_method='kaiming_uniform',
    batch_norm=True,
    optimizer_hyperparams={'lr': 0.001, 'betas': (0.9, 0.999)},
    dataset_name='fashion',
)
print(num_params(cnn_50k.model).item())

In [4]:
cnn_50k = CNNArtifact(
    optimizer=torch.optim.Adam,
    objective=nn.CrossEntropyLoss(),
    num_epochs=40,
    weight_init_method='kaiming_uniform',
    batch_norm=True,
    optimizer_hyperparams={'lr': 0.001, 'betas': (0.9, 0.999)},
    dataset_name='fashion',
)
print(num_params(cnn_50k.model).item())

54890


In [5]:
cnn_50k.runner(record_experiment=True)

  0%|          | 0/4240 [00:00<?, ?it/s]

epoch:39 train loss:0.2551 train accuracy:0.9125: 100%|██████████| 4240/4240 [03:39<00:00, 19.30it/s]


Mean Accuracy Across Each Batch of the test set: 0.9204 ± 0.02211
