This notebook is used to illustrate the example of using PyTorch for 1D classification problem with `FashionMNIST` dataset.

# Preprocessing

In [1]:
# import libraries
import functools
import logging
import os
import random
import tempfile
from datetime import datetime

# define logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
os.environ["RAY_DEDUP_LOGS"] = "0"

import numpy as np
import pandas as pd
import ray
import torch
from ray import train, tune
from sklearn import metrics
from torch import nn
from torch.utils import data
from torchvision import datasets, transforms

In [2]:
# define parameters
test_size = 0.15
random_state = 10
image_size = 16

multiclass = True
num_cpus = 6
loss_fn = nn.CrossEntropyLoss()
epochs = 200
num_candidates = 10
early_stopping = 10

In [3]:
# fix notebook random seed
np.random.seed(random_state)
random.seed(random_state)
torch.manual_seed(random_state)

<torch._C.Generator at 0x12df75c50>

In [4]:
# fetch utils tools
%run utils/utils.ipynb
# fetch network pool
%run utils/model_pipe.ipynb

Load EarlyStopper
Load DNN1DNet model
Load CNN1DNet model
Load classifier train function


# Data Downloading

In [5]:
# transformer
compose = transforms.Compose(
    [
        transforms.ToTensor(),
    ]
)
resize = transforms.Resize((image_size, image_size))

# train and val data
train_dataset = datasets.FashionMNIST(
    root="../data",
    train=True,
    download=True,
    transform=compose,
)
train_dataset = data.TensorDataset(
    resize(train_dataset.data)
    .to(torch.float32)
    .view(train_dataset.data.shape[0], 1, -1),
    train_dataset.targets,
)
# test data
test_dataset = datasets.FashionMNIST(
    root="../data",
    train=True,
    download=False,
    transform=compose,
)
test_dataset = data.TensorDataset(
    resize(test_dataset.data).to(torch.float32).view(test_dataset.data.shape[0], 1, -1),
    test_dataset.targets,
)

print(f"The shape of the train feature: {train_dataset[:][0].shape}")
print(f"The shape of the test feature: {test_dataset[:][0].shape}")

The shape of the train feature: torch.Size([60000, 1, 256])
The shape of the test feature: torch.Size([60000, 1, 256])


# Model Training and Hyperparameter Tuning

## 1D DNN Model

In [6]:
# parameter grid for DNN model
config_dnn = {
    "model_parameters": {
        "hidden_layers": tune.choice(
            [
                [512, 256],
                [128, 128],
                [128, 128, 128],
                [128, 256, 128],
                [256, 512, 512, 256],
            ]
        )
    },
    "optimizer": tune.choice(["Adam", "SGD"]),
    "lr": tune.loguniform(1e-4, 1e-1),
    "batch_size": tune.choice([256, 512, 1024, 2048]),
}

In [7]:
# restart ray kernel
ray.shutdown()
ray.init(num_cpus=num_cpus, ignore_reinit_error=True)
# define tuner
tuner_dnn = tune.Tuner(
    trainable=tune.with_resources(
        trainable=functools.partial(
            train_classifier,
            network_name="DNN1DNet",
            train_ray=ray.put(train_dataset),
            loss_fn=loss_fn,
            val_ray=None,
            val_size=test_size,
            last_checkpoint=None,
            class_weight=False,
            num_workers=num_cpus,
            multiclass=multiclass,
            epochs=epochs,
            early_stopping=early_stopping,
            visual_batch=2000,
            verbose=0,
            random_state=random_state,
        ),
        resources={"cpu": 2},
    ),
    param_space=config_dnn,
    tune_config=tune.tune_config.TuneConfig(
        metric="loss",
        mode="min",
        scheduler=tune.schedulers.ASHAScheduler(
            max_t=1000,
            grace_period=1000,
            reduction_factor=2,
        ),
        num_samples=num_candidates,
    ),
    run_config=ray.air.config.RunConfig(
        name=f"example_fashionmnist_1d_dnn_{datetime.now().strftime('%Y%m%d')}",
        storage_path=None,
        verbose=1,
    ),
)

2024-03-15 15:36:58,773	INFO worker.py:1715 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m
[36m(func pid=67182)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/ywang/ray_results/example_fashionmnist_1d_dnn_20240315/train_classifier_7cb61_00000_0_batch_size=256,lr=0.0001,hidden_layers=128_128,optimizer=SGD_2024-03-15_15-37-02/checkpoint_000000)
[36m(func pid=67184)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/ywang/ray_results/example_fashionmnist_1d_dnn_20240315/train_classifier_7cb61_00002_2_batch_size=256,lr=0.0003,hidden_layers=512_256,optimizer=SGD_2024-03-15_15-37-02/checkpoint_000000)
[36m(func pid=67183)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/ywang/ray_results/example_fashionmnist_1d_dnn_20240315/train_classifier_7cb61_00001_1_batch_size=512,lr=0.0056,hidden_layers=256_512_512_256,optimizer=SGD_2024-03-15_15-37-02/checkpoint

In [8]:
# hyperparameter tuning
results_dnn = tuner_dnn.fit()

0,1
Current time:,2024-03-15 16:07:30
Running for:,00:30:27.85
Memory:,10.4/16.0 GiB

Trial name,status,loc,batch_size,lr,model_parameters/hid den_layers,optimizer,iter,total time (s),loss,accuracy,f1
train_classifier_7cb61_00000,TERMINATED,127.0.0.1:67182,256,0.000115413,"[128, 128]",SGD,200,1067.96,0.540836,0.821778,0.820402
train_classifier_7cb61_00001,TERMINATED,127.0.0.1:67183,512,0.00562296,"[256, 512, 512, 256]",SGD,67,433.616,0.293038,0.898222,0.898002
train_classifier_7cb61_00002,TERMINATED,127.0.0.1:67184,256,0.000321612,"[512, 256]",SGD,200,1217.68,0.357406,0.879111,0.878665
train_classifier_7cb61_00003,TERMINATED,127.0.0.1:67183,2048,0.0281712,"[128, 256, 128]",Adam,21,110.118,0.328512,0.889,0.888325
train_classifier_7cb61_00004,TERMINATED,127.0.0.1:67183,256,0.00687984,"[128, 256, 128]",Adam,24,167.084,0.340751,0.895,0.893909
train_classifier_7cb61_00005,TERMINATED,127.0.0.1:67183,512,0.00419414,"[128, 128]",SGD,172,778.219,0.304289,0.891222,0.891955
train_classifier_7cb61_00006,TERMINATED,127.0.0.1:67182,1024,0.00131832,"[128, 128]",Adam,25,124.976,0.347194,0.882,0.883022
train_classifier_7cb61_00007,TERMINATED,127.0.0.1:67182,512,0.000207112,"[128, 128]",Adam,42,193.96,0.290101,0.897667,0.897152
train_classifier_7cb61_00008,TERMINATED,127.0.0.1:67184,2048,0.00635502,"[128, 128, 128]",SGD,200,604.717,0.311026,0.893222,0.89305
train_classifier_7cb61_00009,TERMINATED,127.0.0.1:67182,512,0.000990026,"[128, 256, 128]",Adam,25,114.974,0.356021,0.888,0.887323


2024-03-15 16:07:30,185	INFO tune.py:1042 -- Total run time: 1827.88 seconds (1827.84 seconds for the tuning loop).


In [10]:
# the best result
best_result_dnn = results_dnn.get_best_result("loss", "min")
print("Best trial config: {}".format(best_result_dnn.config))
print("Best trial final validation loss: {}".format(best_result_dnn.metrics["loss"]))
print(
    "Best trial final validation accuracy: {}".format(
        best_result_dnn.metrics["accuracy"]
    )
)
print("Best trial final validation F1-score: {}".format(best_result_dnn.metrics["f1"]))

# the model with the best parameters and weights
best_model_dnn = DNN1DNet(
    usage="classification",
    input_size=train_dataset[0][0].shape[-1],
    output_size=torch.max(train_dataset[:][1]).item() + 1,
    **best_result_dnn.config["model_parameters"],
)
model_state_dnn = torch.load(
    os.path.join(best_result_dnn.checkpoint.to_directory(), "checkpoint.pt")
)["model_state"]
best_model_dnn.load_state_dict(model_state_dnn)
print(f"Model structure: {best_model_dnn}")

Best trial config: {'model_parameters': {'hidden_layers': [128, 128]}, 'optimizer': 'Adam', 'lr': 0.00020711211348337457, 'batch_size': 512}
Best trial final validation loss: 0.2901012897491455
Best trial final validation accuracy: 0.8976666666666666
Best trial final validation F1-score: 0.8971520397739045
Model structure: DNN1DNet(
  (net): Sequential(
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (linear_0): Linear(in_features=256, out_features=128, bias=True)
    (norm_0): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu_0): ReLU()
    (linear_1): Linear(in_features=128, out_features=128, bias=True)
    (norm_1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu_1): ReLU()
    (linear_2): Linear(in_features=128, out_features=10, bias=True)
  )
)


In [11]:
# validation on test data
best_model_dnn.eval()
with torch.no_grad():
    test_pred_dnn = best_model_dnn(test_dataset[:][0])
    test_loss_dnn = loss_fn(test_pred_dnn, test_dataset[:][1])

if multiclass:
    test_pred_dnn = torch.argmax(test_pred_dnn, dim=1).numpy()
else:
    test_pred_dnn = test_pred_dnn.detach().numpy().flatten() > 0.5

# metrics
accuracy_test_dnn = metrics.accuracy_score(test_dataset[:][1].numpy(), test_pred_dnn)
f1_test_dnn = metrics.f1_score(
    test_dataset[:][1].numpy(),
    test_pred_dnn,
    average="weighted" if multiclass else "binary",
)

In [12]:
# remove temporary checkpoint files
!rm -rf f"{best_result_dnn.checkpoint.to_directory()}"

## 1D CNN Model

In [13]:
# parameter grid for DNN model
config_cnn = {
    "model_parameters": {
        "cnn_outputs": tune.choice(
            [
                [4, 8, 4],
                [4, 4],
                [2, 2],
            ]
        ),
        "kernel_sizes": tune.choice([2, 3, 4]),
        "max_pools": tune.choice([2, 4, 8]),
        "linear_layers": tune.choice(
            [
                [512, 256],
                [128, 128],
                [128, 128, 128],
                [128, 256, 128],
                [256, 512, 512, 256],
            ]
        ),
    },
    "optimizer": tune.choice(["Adam", "SGD"]),
    "lr": tune.loguniform(1e-4, 1e-1),
    "batch_size": tune.choice([256, 512, 1024, 2048]),
}

In [14]:
# restart ray kernel
ray.shutdown()
ray.init(num_cpus=num_cpus, ignore_reinit_error=True)
# define tuner
tuner_cnn = tune.Tuner(
    trainable=tune.with_resources(
        trainable=functools.partial(
            train_classifier,
            network_name="CNN1DNet",
            train_ray=ray.put(train_dataset),
            loss_fn=loss_fn,
            val_ray=None,
            val_size=test_size,
            last_checkpoint=None,
            class_weight=False,
            num_workers=num_cpus,
            multiclass=multiclass,
            epochs=epochs,
            early_stopping=early_stopping,
            visual_batch=2000,
            verbose=0,
            random_state=random_state,
        ),
        resources={"cpu": 2},
    ),
    param_space=config_cnn,
    tune_config=tune.tune_config.TuneConfig(
        metric="loss",
        mode="min",
        scheduler=tune.schedulers.ASHAScheduler(
            max_t=1000,
            grace_period=1000,
            reduction_factor=2,
        ),
        num_samples=num_candidates,
    ),
    run_config=ray.air.config.RunConfig(
        name=f"example_fashionmnist_1d_cnn_{datetime.now().strftime('%Y%m%d')}",
        storage_path=None,
        verbose=1,
    ),
)

2024-03-15 16:08:22,672	INFO worker.py:1715 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m
[36m(func pid=74472)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/ywang/ray_results/example_fashionmnist_1d_cnn_20240315/train_classifier_ddd40_00002_2_batch_size=256,lr=0.0009,cnn_outputs=2_2,kernel_sizes=2,linear_layers=128_128_128,max_pools=8,optim_2024-03-15_16-08-23/checkpoint_000000)
[36m(func pid=74470)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/ywang/ray_results/example_fashionmnist_1d_cnn_20240315/train_classifier_ddd40_00000_0_batch_size=2048,lr=0.0001,cnn_outputs=4_8_4,kernel_sizes=4,linear_layers=128_256_128,max_pools=2,op_2024-03-15_16-08-23/checkpoint_000000)
[36m(func pid=74471)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/Users/ywang/ray_results/example_fashionmnist_1d_cnn_20240315/train_classifier_ddd40_00001_1_batch_size=2048,lr=0.

In [15]:
# hyperparameter tuning
results_cnn = tuner_cnn.fit()

0,1
Current time:,2024-03-15 18:22:12
Running for:,02:13:49.69
Memory:,11.7/16.0 GiB

Trial name,status,loc,batch_size,lr,model_parameters/cnn _outputs,model_parameters/ker nel_sizes,model_parameters/lin ear_layers,model_parameters/max _pools,optimizer,iter,total time (s),loss,accuracy,f1
train_classifier_ddd40_00000,TERMINATED,127.0.0.1:74470,2048,0.000138258,"[4, 8, 4]",4,"[128, 256, 128]",2,SGD,200,8025.97,1.23145,0.667111,0.653348
train_classifier_ddd40_00001,TERMINATED,127.0.0.1:74471,2048,0.0372007,"[4, 8, 4]",4,"[512, 256]",2,Adam,24,276.267,0.416302,0.858222,0.855187
train_classifier_ddd40_00002,TERMINATED,127.0.0.1:74472,256,0.000947403,"[2, 2]",2,"[128, 128, 128]",8,Adam,48,291.63,0.946958,0.644222,0.638314
train_classifier_ddd40_00003,TERMINATED,127.0.0.1:74471,256,0.00028409,"[2, 2]",4,"[128, 128, 128]",4,Adam,45,302.006,0.440523,0.848444,0.847903
train_classifier_ddd40_00004,TERMINATED,127.0.0.1:74472,1024,0.0300863,"[2, 2]",4,"[128, 128]",4,SGD,116,646.796,0.40779,0.848222,0.847077
train_classifier_ddd40_00005,TERMINATED,127.0.0.1:74471,256,0.000101676,"[4, 4]",4,"[512, 256]",4,SGD,200,7400.76,0.587314,0.799333,0.796865
train_classifier_ddd40_00006,TERMINATED,127.0.0.1:74472,2048,0.0209648,"[4, 4]",2,"[128, 128, 128]",2,Adam,18,1041.49,0.376909,0.875889,0.876323
train_classifier_ddd40_00007,TERMINATED,127.0.0.1:74472,512,0.00561114,"[2, 2]",3,"[512, 256]",4,Adam,21,1043.34,0.613091,0.789667,0.78963
train_classifier_ddd40_00008,TERMINATED,127.0.0.1:74472,256,0.0130616,"[4, 4]",4,"[128, 128, 128]",2,Adam,18,1631.74,0.304354,0.897,0.896807
train_classifier_ddd40_00009,TERMINATED,127.0.0.1:74472,1024,0.000119179,"[4, 4]",3,"[256, 512, 512, 256]",2,Adam,22,1093.27,0.424951,0.866667,0.866584


2024-03-15 18:22:12,935	INFO tune.py:1042 -- Total run time: 8029.71 seconds (8029.68 seconds for the tuning loop).


In [16]:
# the best result
best_result_cnn = results_cnn.get_best_result("loss", "min")
print("Best trial config: {}".format(best_result_cnn.config))
print("Best trial final validation loss: {}".format(best_result_cnn.metrics["loss"]))
print(
    "Best trial final validation accuracy: {}".format(
        best_result_cnn.metrics["accuracy"]
    )
)
print("Best trial final validation F1-score: {}".format(best_result_cnn.metrics["f1"]))

# the model with the best parameters and weights
best_model_cnn = CNN1DNet(
    usage="classification",
    input_shape=(train_dataset[0][0].shape[-2], train_dataset[0][0].shape[-1]),
    output_size=torch.max(train_dataset[:][1]).item() + 1,
    **best_result_cnn.config["model_parameters"],
)
model_state_cnn = torch.load(
    os.path.join(best_result_cnn.checkpoint.to_directory(), "checkpoint.pt")
)["model_state"]
best_model_cnn.load_state_dict(model_state_cnn)
print(f"Model structure: {best_model_cnn}")

Best trial config: {'model_parameters': {'cnn_outputs': [4, 4], 'kernel_sizes': 4, 'max_pools': 2, 'linear_layers': [128, 128, 128]}, 'optimizer': 'Adam', 'lr': 0.013061553676457685, 'batch_size': 256}
Best trial final validation loss: 0.3043544888496399
Best trial final validation accuracy: 0.897
Best trial final validation F1-score: 0.8968068101589336
Model structure: CNN1DNet(
  (net): Sequential(
    (cnn_0): Conv1d(1, 4, kernel_size=(4,), stride=(1,))
    (norm_0): BatchNorm1d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu_0): ReLU()
    (maxpool_0): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (cnn_1): Conv1d(4, 4, kernel_size=(4,), stride=(1,))
    (norm_1): BatchNorm1d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu_1): ReLU()
    (maxpool_1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (fully_connection): Flatten(start_dim=1, end_dim=-1)
    (linear_0

In [17]:
# validation on test data
best_model_cnn.eval()
with torch.no_grad():
    test_pred_cnn = best_model_cnn(test_dataset[:][0])
    test_loss_cnn = loss_fn(test_pred_cnn, test_dataset[:][1])

if multiclass:
    test_pred_cnn = torch.argmax(test_pred_cnn, dim=1).numpy()
else:
    test_pred_cnn = test_pred_cnn.detach().numpy().flatten() > 0.5

# metrics
accuracy_test_cnn = metrics.accuracy_score(test_dataset[:][1].numpy(), test_pred_cnn)
f1_test_cnn = metrics.f1_score(
    test_dataset[:][1].numpy(),
    test_pred_cnn,
    average="weighted" if multiclass else "binary",
)

In [18]:
# remove temporary checkpoint files
!rm -rf f"{best_result_cnn.checkpoint.to_directory()}"

## Model Comparison

In [19]:
pd.DataFrame(
    {
        "Model": ["DNN-1D", "CNN-1D"],
        "Loss": [test_loss_dnn, test_loss_cnn],
        "Accuracy": [accuracy_test_dnn, accuracy_test_cnn],
        "F1-Score": [f1_test_dnn, f1_test_cnn],
    }
)

Unnamed: 0,Model,Loss,Accuracy,F1-Score
0,DNN-1D,tensor(0.1844),0.935133,0.934855
1,CNN-1D,tensor(0.1962),0.926133,0.925977
