In [1]:
import os
from pathlib import Path
import sys

sys.path.insert(0, str(Path(os.getcwd()).parent.parent))

import torch
from torch import nn
from torch.nn import functional as F

import numpy as np
import matplotlib.pyplot as plt

from src.data_analysis.model_training_helpers import ModelTrainingArtifact

In [2]:
# This function helps count the number of parameters in a model
def num_params(model):
  return np.sum([np.prod(i.shape) for i in model.parameters()]) # if len(i.shape) > 1])


model_100k = ModelTrainingArtifact(
    optimizer=torch.optim.Adam,
    objective=nn.CrossEntropyLoss(),
    num_epochs=3,
    hidden_layer_dims=[128, 64],
    weight_init_method='kaiming_uniform',
    batch_norm=True,
    activation_function=F.relu,
    dropout_rate_layers=[.5, .5],  # No dropout
    optimizer_hyperparams={'lr': 0.001, 'betas': (0.9, 0.999)},
    dataset_name='fashion',
)
print(num_params(model_100k.model).item())
model_100k.train()
model_100k.test(record_experiment=False)

100%|██████████| 26.4M/26.4M [00:16<00:00, 1.64MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 161kB/s]
100%|██████████| 4.42M/4.42M [00:01<00:00, 3.21MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 1.55MB/s]


109770


epoch:2 train loss:0.5311 train accuracy:0.8000: 100%|██████████| 318/318 [00:09<00:00, 33.51it/s]


Mean Accuracy Across Each Batch of the test set: 0.8534 ± 0.02185


In [4]:
model_200k = ModelTrainingArtifact(
    optimizer=torch.optim.Adam,
    objective=nn.CrossEntropyLoss(),
    num_epochs=3,
    hidden_layer_dims=[220, 128],
    weight_init_method='kaiming_uniform',
    batch_norm=True,
    activation_function=F.relu,
    dropout_rate_layers=[.5, .5],  # No dropout
    optimizer_hyperparams={'lr': 0.001, 'betas': (0.9, 0.999)},
    dataset_name='fashion',
)
print(num_params(model_200k.model).item())

model_200k.train()
model_200k.test(record_experiment=False)

202974


epoch:2 train loss:0.3807 train accuracy:0.8542: 100%|██████████| 318/318 [00:09<00:00, 34.08it/s]]


Mean Accuracy Across Each Batch of the test set: 0.8625 ± 0.02709


In [5]:
model_50k = ModelTrainingArtifact(
    optimizer=torch.optim.Adam,
    objective=nn.CrossEntropyLoss(),
    num_epochs=3,
    hidden_layer_dims=[64, 32],
    weight_init_method='kaiming_uniform',
    batch_norm=True,
    activation_function=F.relu,
    dropout_rate_layers=[.5, .5],  # No dropout
    optimizer_hyperparams={'lr': 0.001, 'betas': (0.9, 0.999)},
    dataset_name='fashion',
)
print(num_params(model_50k.model).item())

model_50k.train()
model_50k.test(record_experiment=False)

52842


epoch:2 train loss:0.4925 train accuracy:0.8250: 100%|██████████| 318/318 [00:09<00:00, 34.23it/s]


Mean Accuracy Across Each Batch of the test set: 0.8347 ± 0.02587


In [6]:
# TODO: Determine where and what you want to record in the model experiments this time!