This notebook is used to illustrate the example of using PyTorch for 1D classification problem with `FashionMNIST` dataset.

# Preprocessing

In [1]:
# import libraries
import functools
import logging
import os
import random
import tempfile
from datetime import datetime

# define logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
os.environ["RAY_DEDUP_LOGS"] = "0"

import numpy as np
import pandas as pd
import ray
import torch
from ray import train, tune
from sklearn import metrics
from torch import nn
from torch.utils import data
from torchvision import datasets, transforms

In [2]:
# define parameters
WORKDIR = os.path.dirname(os.getcwd())

test_size = 0.15
random_state = 10
image_size = 16

multiclass = True
num_cpus = 6
loss_fn = nn.CrossEntropyLoss()
epochs = 200
num_candidates = 10
early_stopping = 10

In [3]:
# fix notebook random seed
np.random.seed(random_state)
random.seed(random_state)
torch.manual_seed(random_state)

<torch._C.Generator at 0x12eb0dc50>

In [4]:
# fetch utils tools
%run utils/utils.ipynb
# fetch network pool
%run utils/classification_pipe.ipynb

Load EarlyStopper
Load DNN1DClassifier model
Load CNN1DClassifier model
Load classifier train function


# Data Downloading

In [5]:
# transformer
compose = transforms.Compose(
    [
        transforms.ToTensor(),
    ]
)
resize = transforms.Resize((image_size, image_size))

# train and val data
train_dataset = datasets.FashionMNIST(
    root="../data",
    train=True,
    download=True,
    transform=compose,
)
train_dataset = data.TensorDataset(
    resize(train_dataset.data)
    .to(torch.float32)
    .view(train_dataset.data.shape[0], 1, -1),
    train_dataset.targets,
)
# test data
test_dataset = datasets.FashionMNIST(
    root="../data",
    train=True,
    download=False,
    transform=compose,
)
test_dataset = data.TensorDataset(
    resize(test_dataset.data).to(torch.float32).view(test_dataset.data.shape[0], 1, -1),
    test_dataset.targets,
)

print(f"The shape of the train feature: {train_dataset[:][0].shape}")
print(f"The shape of the test feature: {test_dataset[:][0].shape}")

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ../data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting ../data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ../data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ../data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100.0%
1.5%

Extracting ../data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ../data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ../data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100.0%


Extracting ../data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ../data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ../data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100.0%


Extracting ../data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/FashionMNIST/raw

The shape of the train feature: torch.Size([60000, 1, 256])
The shape of the test feature: torch.Size([60000, 1, 256])


# Model Training and Hyperparameter Tuning

## 1D DNN Model

In [6]:
# parameter grid for DNN model
config_dnn = {
    "model_parameters": {
        "hidden_layers": tune.choice(
            [
                [512, 256],
                [128, 128],
                [128, 128, 128],
                [128, 256, 128],
                [256, 512, 512, 256],
            ]
        )
    },
    "optimizer": tune.choice(["Adam", "SGD"]),
    "lr": tune.loguniform(1e-4, 1e-1),
    "batch_size": tune.choice([256, 512, 1024, 2048]),
}

In [7]:
# restart ray kernel
ray.shutdown()
ray.init(num_cpus=num_cpus, ignore_reinit_error=True)
# define tuner
tuner_dnn = tune.Tuner(
    trainable=tune.with_resources(
        trainable=functools.partial(
            train_classifier,
            network_name="DNN1DClassifier",
            train_ray=ray.put(train_dataset),
            loss_fn=loss_fn,
            val_ray=None,
            val_size=test_size,
            last_checkpoint=None,
            class_weight=False,
            num_workers=num_cpus,
            multiclass=multiclass,
            epochs=epochs,
            early_stopping=early_stopping,
            visual_batch=2000,
            verbose=0,
            random_state=random_state,
        ),
        resources={"cpu": 2},
    ),
    param_space=config_dnn,
    tune_config=tune.tune_config.TuneConfig(
        metric="loss",
        mode="min",
        scheduler=tune.schedulers.ASHAScheduler(
            max_t=1000,
            grace_period=1000,
            reduction_factor=2,
        ),
        num_samples=num_candidates,
    ),
    run_config=ray.air.config.RunConfig(
        name=f"example_fashionmnist_1d_dnn_{datetime.now().strftime('%Y%m%d')}",
        storage_path=None,
        verbose=1,
    ),
)

2024-02-27 11:07:27,919	INFO worker.py:1715 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m
[36m(func pid=22226)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/ywang/ray_results/example_fashionmnist_1d_dnn_20240227/train_classifier_037cc_00000_0_batch_size=256,lr=0.0001,hidden_layers=128_128,optimizer=SGD_2024-02-27_11-07-28/checkpoint_000000)
[36m(func pid=22228)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/ywang/ray_results/example_fashionmnist_1d_dnn_20240227/train_classifier_037cc_00002_2_batch_size=256,lr=0.0003,hidden_layers=512_256,optimizer=SGD_2024-02-27_11-07-28/checkpoint_000000)
[36m(func pid=22227)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/ywang/ray_results/example_fashionmnist_1d_dnn_20240227/train_classifier_037cc_00001_1_batch_size=512,lr=0.0056,hidden_layers=256_512_512_256,optimizer=SGD_2024-02-27_11-07-28/checkpoint

In [8]:
# hyperparameter tuning
results_dnn = tuner_dnn.fit()

0,1
Current time:,2024-02-27 11:37:16
Running for:,00:29:47.31
Memory:,12.3/16.0 GiB

Trial name,status,loc,batch_size,lr,model_parameters/hid den_layers,optimizer,iter,total time (s),loss,accuracy,f1
train_classifier_037cc_00000,TERMINATED,127.0.0.1:22226,256,0.000115413,"[128, 128]",SGD,200,917.831,0.519137,0.831,0.83015
train_classifier_037cc_00001,TERMINATED,127.0.0.1:22227,512,0.00562296,"[256, 512, 512, 256]",SGD,60,314.805,0.299229,0.893667,0.89399
train_classifier_037cc_00002,TERMINATED,127.0.0.1:22228,256,0.000321612,"[512, 256]",SGD,200,1063.81,0.361782,0.872667,0.872657
train_classifier_037cc_00003,TERMINATED,127.0.0.1:22227,2048,0.0281712,"[128, 256, 128]",Adam,21,88.3526,0.429009,0.855778,0.858664
train_classifier_037cc_00004,TERMINATED,127.0.0.1:22227,256,0.00687984,"[128, 256, 128]",Adam,28,139.511,0.353827,0.895333,0.895473
train_classifier_037cc_00005,TERMINATED,127.0.0.1:22227,512,0.00419414,"[128, 128]",SGD,185,907.358,0.301116,0.894222,0.894291
train_classifier_037cc_00006,TERMINATED,127.0.0.1:22226,1024,0.00131832,"[128, 128]",Adam,20,95.3436,0.3031,0.894333,0.89378
train_classifier_037cc_00007,TERMINATED,127.0.0.1:22226,512,0.000207112,"[128, 128]",Adam,44,227.556,0.32592,0.886111,0.887978
train_classifier_037cc_00008,TERMINATED,127.0.0.1:22228,2048,0.00635502,"[128, 128, 128]",SGD,189,718.248,0.312733,0.890667,0.890787
train_classifier_037cc_00009,TERMINATED,127.0.0.1:22226,512,0.000990026,"[128, 256, 128]",Adam,26,176.628,0.367203,0.888444,0.888136


2024-02-27 11:37:16,070	INFO tune.py:1042 -- Total run time: 1787.34 seconds (1787.30 seconds for the tuning loop).


In [10]:
# the best result
best_result_dnn = results_dnn.get_best_result("loss", "min")
print("Best trial config: {}".format(best_result_dnn.config))
print("Best trial final validation loss: {}".format(best_result_dnn.metrics["loss"]))
print(
    "Best trial final validation accuracy: {}".format(
        best_result_dnn.metrics["accuracy"]
    )
)
print("Best trial final validation F1-score: {}".format(best_result_dnn.metrics["f1"]))

# the model with the best parameters and weights
best_model_dnn = DNN1DClassifier(
    input_size=train_dataset[0][0].shape[-1],
    output_size=torch.max(train_dataset[:][1]).item() + 1,
    **best_result_dnn.config["model_parameters"],
)
model_state_dnn = torch.load(
    os.path.join(best_result_dnn.checkpoint.to_directory(), "checkpoint.pt")
)["model_state"]
best_model_dnn.load_state_dict(model_state_dnn)
print(f"Model structure: {best_model_dnn}")

Best trial config: {'model_parameters': {'hidden_layers': [256, 512, 512, 256]}, 'optimizer': 'SGD', 'lr': 0.005622962806072959, 'batch_size': 512}
Best trial final validation loss: 0.29922914505004883
Best trial final validation accuracy: 0.8936666666666667
Best trial final validation F1-score: 0.8939898357752099
Model structure: DNN1DClassifier(
  (net): Sequential(
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (linear_0): Linear(in_features=256, out_features=256, bias=True)
    (norm_0): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu_0): ReLU()
    (linear_1): Linear(in_features=256, out_features=512, bias=True)
    (norm_1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu_1): ReLU()
    (linear_2): Linear(in_features=512, out_features=512, bias=True)
    (norm_2): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu_2): ReLU()
    (linear_3): Linear(in_

In [11]:
# validation on test data
best_model_dnn.eval()
with torch.no_grad():
    test_pred_dnn = best_model_dnn(test_dataset[:][0])
    test_loss_dnn = loss_fn(test_pred_dnn, test_dataset[:][1])

if multiclass:
    test_pred_dnn = torch.argmax(test_pred_dnn, dim=1).numpy()
else:
    test_pred_dnn = test_pred_dnn.detach().numpy().flatten() > 0.5

# metrics
accuracy_test_dnn = metrics.accuracy_score(test_dataset[:][1].numpy(), test_pred_dnn)
f1_test_dnn = metrics.f1_score(
    test_dataset[:][1].numpy(),
    test_pred_dnn,
    average="weighted" if multiclass else "binary",
)

In [12]:
# remove temporary checkpoint files
!rm -rf f"{best_result_dnn.checkpoint.to_directory()}"

## 1D CNN Model

In [13]:
# parameter grid for DNN model
config_cnn = {
    "model_parameters": {
        "cnn_outputs": tune.choice(
            [
                [4, 8, 4],
                [4, 4],
                [2, 2],
            ]
        ),
        "kernel_sizes": tune.choice([2, 3, 4]),
        "max_pools": tune.choice([2, 4, 8]),
        "linear_layers": tune.choice(
            [
                [512, 256],
                [128, 128],
                [128, 128, 128],
                [128, 256, 128],
                [256, 512, 512, 256],
            ]
        ),
    },
    "optimizer": tune.choice(["Adam", "SGD"]),
    "lr": tune.loguniform(1e-4, 1e-1),
    "batch_size": tune.choice([256, 512, 1024, 2048]),
}

In [14]:
# restart ray kernel
ray.shutdown()
ray.init(num_cpus=num_cpus, ignore_reinit_error=True)
# define tuner
tuner_cnn = tune.Tuner(
    trainable=tune.with_resources(
        trainable=functools.partial(
            train_classifier,
            network_name="CNN1DClassifier",
            train_ray=ray.put(train_dataset),
            loss_fn=loss_fn,
            val_ray=None,
            val_size=test_size,
            last_checkpoint=None,
            class_weight=False,
            num_workers=num_cpus,
            multiclass=multiclass,
            epochs=epochs,
            early_stopping=early_stopping,
            visual_batch=2000,
            verbose=0,
            random_state=random_state,
        ),
        resources={"cpu": 2},
    ),
    param_space=config_cnn,
    tune_config=tune.tune_config.TuneConfig(
        metric="loss",
        mode="min",
        scheduler=tune.schedulers.ASHAScheduler(
            max_t=1000,
            grace_period=1000,
            reduction_factor=2,
        ),
        num_samples=num_candidates,
    ),
    run_config=ray.air.config.RunConfig(
        name=f"example_fashionmnist_1d_cnn_{datetime.now().strftime('%Y%m%d')}",
        storage_path=None,
        verbose=1,
    ),
)

2024-02-27 11:43:33,522	INFO worker.py:1715 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m
[36m(func pid=29900)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/ywang/ray_results/example_fashionmnist_1d_cnn_20240227/train_classifier_0e370_00002_2_batch_size=256,lr=0.0009,cnn_outputs=2_2,kernel_sizes=2,linear_layers=128_128_128,max_pools=8,optim_2024-02-27_11-43-34/checkpoint_000000)
[36m(func pid=29898)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/ywang/ray_results/example_fashionmnist_1d_cnn_20240227/train_classifier_0e370_00000_0_batch_size=2048,lr=0.0001,cnn_outputs=4_8_4,kernel_sizes=4,linear_layers=128_256_128,max_pools=2,op_2024-02-27_11-43-34/checkpoint_000000)
[36m(func pid=29899)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/ywang/ray_results/example_fashionmnist_1d_cnn_20240227/train_classifier_0e370_00001_1_batch_size=2048,lr=0.

In [15]:
# hyperparameter tuning
results_cnn = tuner_cnn.fit()

0,1
Current time:,2024-02-27 12:25:01
Running for:,00:41:27.55
Memory:,9.3/16.0 GiB

Trial name,status,loc,batch_size,lr,model_parameters/cnn _outputs,model_parameters/ker nel_sizes,model_parameters/lin ear_layers,model_parameters/max _pools,optimizer,iter,total time (s),loss,accuracy,f1
train_classifier_0e370_00000,TERMINATED,127.0.0.1:29898,2048,0.000138258,"[4, 8, 4]",4,"[128, 256, 128]",2,SGD,200,2480.77,1.16928,0.701556,0.691865
train_classifier_0e370_00001,TERMINATED,127.0.0.1:29899,2048,0.0372007,"[4, 8, 4]",4,"[512, 256]",2,Adam,25,405.694,0.359247,0.886667,0.886107
train_classifier_0e370_00002,TERMINATED,127.0.0.1:29900,256,0.000947403,"[2, 2]",2,"[128, 128, 128]",8,Adam,43,431.408,1.02732,0.612,0.603234
train_classifier_0e370_00003,TERMINATED,127.0.0.1:29899,256,0.00028409,"[2, 2]",4,"[128, 128, 128]",4,Adam,40,407.079,0.485527,0.831889,0.831269
train_classifier_0e370_00004,TERMINATED,127.0.0.1:29900,1024,0.0300863,"[2, 2]",4,"[128, 128]",4,SGD,97,812.997,0.42561,0.848111,0.846146
train_classifier_0e370_00005,TERMINATED,127.0.0.1:29899,256,0.000101676,"[4, 4]",4,"[512, 256]",4,SGD,200,1669.54,0.669438,0.764222,0.762196
train_classifier_0e370_00006,TERMINATED,127.0.0.1:29900,2048,0.0209648,"[4, 4]",2,"[128, 128, 128]",2,Adam,18,140.194,0.44329,0.860111,0.860684
train_classifier_0e370_00007,TERMINATED,127.0.0.1:29900,512,0.00561114,"[2, 2]",3,"[512, 256]",4,Adam,24,209.45,0.484569,0.830333,0.830974
train_classifier_0e370_00008,TERMINATED,127.0.0.1:29900,256,0.0130616,"[4, 4]",4,"[128, 128, 128]",2,Adam,19,211.54,0.323325,0.896889,0.895554
train_classifier_0e370_00009,TERMINATED,127.0.0.1:29900,1024,0.000119179,"[4, 4]",3,"[256, 512, 512, 256]",2,Adam,21,177.305,0.351546,0.882,0.882021


2024-02-27 12:25:01,780	INFO tune.py:1042 -- Total run time: 2487.57 seconds (2487.54 seconds for the tuning loop).


In [16]:
# the best result
best_result_cnn = results_cnn.get_best_result("loss", "min")
print("Best trial config: {}".format(best_result_cnn.config))
print("Best trial final validation loss: {}".format(best_result_cnn.metrics["loss"]))
print(
    "Best trial final validation accuracy: {}".format(
        best_result_cnn.metrics["accuracy"]
    )
)
print("Best trial final validation F1-score: {}".format(best_result_cnn.metrics["f1"]))

# the model with the best parameters and weights
best_model_cnn = CNN1DClassifier(
    input_shape=(train_dataset[0][0].shape[-2], train_dataset[0][0].shape[-1]),
    output_size=torch.max(train_dataset[:][1]).item() + 1,
    **best_result_cnn.config["model_parameters"],
)
model_state_cnn = torch.load(
    os.path.join(best_result_cnn.checkpoint.to_directory(), "checkpoint.pt")
)["model_state"]
best_model_cnn.load_state_dict(model_state_cnn)
print(f"Model structure: {best_model_cnn}")

Best trial config: {'model_parameters': {'cnn_outputs': [4, 4], 'kernel_sizes': 4, 'max_pools': 2, 'linear_layers': [128, 128, 128]}, 'optimizer': 'Adam', 'lr': 0.013061553676457685, 'batch_size': 256}
Best trial final validation loss: 0.32332488894462585
Best trial final validation accuracy: 0.8968888888888888
Best trial final validation F1-score: 0.8955535172934407
Model structure: CNN1DClassifier(
  (net): Sequential(
    (cnn_0): Conv1d(1, 4, kernel_size=(4,), stride=(1,))
    (norm_0): BatchNorm1d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu_0): ReLU()
    (maxpool_0): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (cnn_1): Conv1d(4, 4, kernel_size=(4,), stride=(1,))
    (norm_1): BatchNorm1d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu_1): ReLU()
    (maxpool_1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (fully_connection): Flatten(start_dim=1, end_

In [17]:
# validation on test data
best_model_cnn.eval()
with torch.no_grad():
    test_pred_cnn = best_model_cnn(test_dataset[:][0])
    test_loss_cnn = loss_fn(test_pred_cnn, test_dataset[:][1])

if multiclass:
    test_pred_cnn = torch.argmax(test_pred_cnn, dim=1).numpy()
else:
    test_pred_cnn = test_pred_cnn.detach().numpy().flatten() > 0.5

# metrics
accuracy_test_cnn = metrics.accuracy_score(test_dataset[:][1].numpy(), test_pred_cnn)
f1_test_cnn = metrics.f1_score(
    test_dataset[:][1].numpy(),
    test_pred_cnn,
    average="weighted" if multiclass else "binary",
)

In [18]:
# remove temporary checkpoint files
!rm -rf f"{best_result_cnn.checkpoint.to_directory()}"

## Model Comparison

In [19]:
pd.DataFrame(
    {
        "Model": ["DNN-1D", "CNN-1D"],
        "Loss": [test_loss_dnn, test_loss_cnn],
        "Accuracy": [accuracy_test_dnn, accuracy_test_cnn],
        "F1-Score": [f1_test_dnn, f1_test_cnn],
    }
)

Unnamed: 0,Model,Loss,Accuracy,F1-Score
0,DNN-1D,tensor(0.3261),0.8863,0.886429
1,CNN-1D,tensor(0.3686),0.8803,0.87831
