# Hyperparameter Optimization (Powerspec Model)

In [1]:
import logging
import random
import os
import time
import datetime
import warnings
from itertools import product

import torch
from torch import nn, optim
from torch.optim.lr_scheduler import ExponentialLR
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
import h5py
from tqdm.notebook import tqdm

from sklearn.model_selection import train_test_split, KFold

from utils import load_mel_data, count_parameters
from models import *

warnings.filterwarnings("ignore")

# Make code deterministic
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

## Config and global variables

In [2]:
# Set file paths
BASE_PATH = os.path.join("/home/source/experiments/")
RESULTS_BASE_PATH = os.path.join(BASE_PATH, "results")
LOG_FILE_PATH = os.path.join(BASE_PATH, "exp03_train.log")
MODEL_BASE_PATH = os.path.join(BASE_PATH, 'exp03_models')
TRAIN_RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, "exp03_train.csv")
TEST_RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, "exp03_test.csv")

# Logging config
logging.basicConfig(level=logging.INFO,
                    filename=LOG_FILE_PATH,
                    format='%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S')

# Set information about the dataset
HDF5_FILE_PATH = os.path.join(os.sep, 'home', 'data', "ANNOTATED_BEDTIME_TU7.hdf5")
COLNAMES = ["Time", "X", "Y", "Z", "Annotated Time in Bed"]
SAMPLE_RATE = 100
LABEL_DICT = {False: 0, True: 1}
EXCLUDED_DATASETS = ["subject90067325"]

# Set information about the model, etc.
INPUT_DIM = 160
OUTPUT_DIM = 1

DROPOUT = 0.5 # https://jmlr.org/papers/v15/srivastava14a.html
BATCH_SIZE = 8
CLIP = 1000
MAX_EPOCHS = 256
MIN_EPOCHS = 0
LR_DECAY = .9
REVERSE = False

# Combinations to test
MODELS = [MLP, RNN, LSTM]
HID_DIM = [1,2,4,8,16,32,64]
N_LAYERS = [1,2,4]
INIT_LR = [.7]

# Minimal required loss impprovement
EPSILON = 1e-4

means = torch.Tensor([ -44.4865,  -59.5069,  -66.6517,  -69.3142,  -73.4886,  -76.3867,
                       -78.8996,  -79.9984,  -82.2940,  -84.5616,  -85.4373,  -87.6441,
                       -89.6699,  -90.6575,  -92.5189,  -94.3737,  -95.0330,  -96.6376,
                       -98.0835,  -98.6270,  -99.1676, -100.4611, -101.6439, -102.1052,
                      -102.5688, -103.6380, -104.6083, -104.9140, -105.2301, -106.0782,
                      -106.3457, -106.5847, -106.8263, -107.0242, -107.6015, -107.7007,
                      -107.7551, -108.1157, -107.9962, -107.4323,  -43.0569,  -59.5251,
                       -67.3526,  -70.2022,  -74.8481,  -78.0362,  -80.5892,  -81.7762,
                       -83.7894,  -85.6536,  -86.4882,  -88.3413,  -89.9654,  -90.6949,
                       -92.0792,  -93.7075,  -94.1243,  -95.6003,  -96.9382,  -97.5214,
                       -98.0292,  -99.3300, -100.2594, -100.6239, -100.9032, -101.8789,
                      -103.0002, -103.3289, -103.5210, -104.2072, -104.3552, -104.5816,
                      -104.8962, -105.0697, -105.5263, -105.4751, -105.5339, -105.9084,
                      -105.7753, -105.1467,  -38.7873,  -53.8217,  -62.2838,  -65.5735,
                       -70.3005,  -73.6778,  -76.3870,  -77.4950,  -79.6522,  -81.6870,
                       -82.3242,  -84.2133,  -85.9567,  -86.7088,  -88.3639,  -90.0263,
                       -90.5861,  -92.0502,  -93.4181,  -93.8691,  -94.4211,  -95.7346,
                       -96.9019,  -97.4235,  -97.9199,  -99.0181, -100.0482, -100.3988,
                      -100.7505, -101.6511, -102.0045, -102.3397, -102.6644, -102.9065,
                      -103.5155, -103.6391, -103.7331, -104.1512, -104.0756, -103.5054,
                       -91.2349,  -99.5667, -101.7370, -100.9673, -102.9374, -103.6892,
                      -104.3837, -103.5328, -104.0643, -104.7086, -104.6865, -106.2458,
                      -107.3120, -107.8856, -109.0863, -110.7779, -111.1444, -112.5662,
                      -113.9854, -114.4069, -114.9294, -116.3273, -117.2952, -117.9790,
                      -118.3448, -119.2901, -120.3042, -120.5012, -120.7670, -121.6727,
                      -122.0382, -122.2133, -122.4182, -122.6655, -123.3417, -123.3857,
                      -123.3487, -123.7909, -123.8518, -123.2396])
stds = torch.Tensor([15.5046, 15.3608, 16.0750, 16.4421, 15.7462, 15.9030, 16.2268, 16.3318,
                     16.1988, 16.0585, 15.8400, 15.5930, 15.3356, 14.9931, 14.6579, 14.3591,
                     14.0773, 13.8462, 13.6614, 13.4152, 13.1776, 12.9772, 12.7750, 12.5924,
                     12.3934, 12.2089, 12.0475, 11.9041, 11.7766, 11.6891, 11.6017, 11.5038,
                     11.4223, 11.3544, 11.3014, 11.2560, 11.2241, 11.2038, 11.1968, 11.1596,
                     18.5531, 18.8339, 19.7249, 20.1258, 19.7921, 19.5611, 19.1572, 19.1398,
                     19.2797, 18.9888, 18.9910, 18.9928, 18.3535, 18.2509, 17.5982, 17.3483,
                     16.8869, 16.5173, 16.2034, 15.8361, 15.5189, 15.3361, 14.8879, 14.8251,
                     14.5224, 14.2143, 14.0855, 13.8587, 13.6569, 13.5621, 13.4596, 13.2743,
                     13.0891, 12.9864, 12.9758, 12.8617, 12.7709, 12.6871, 12.7386, 12.6859,
                     13.0402, 13.1427, 13.3539, 13.5384, 12.9476, 12.8824, 13.0041, 13.0158,
                     13.0604, 13.1545, 13.1928, 13.2617, 13.2320, 13.0971, 12.8899, 12.7291,
                     12.5583, 12.3833, 12.2780, 12.1346, 12.0020, 11.9203, 11.8031, 11.7010,
                     11.5610, 11.4368, 11.3395, 11.2283, 11.1275, 11.0572, 10.9849, 10.9131,
                     10.8662, 10.8275, 10.7903, 10.7451, 10.7291, 10.7357, 10.7697, 10.7465,
                     16.6599, 20.7126, 25.2049, 26.0657, 25.0948, 24.5251, 23.9681, 23.7936,
                     23.6884, 23.1676, 22.8169, 22.4963, 21.6597, 20.9830, 20.1299, 19.4319,
                     18.6604, 17.9222, 17.2849, 16.6072, 15.9724, 15.4307, 14.7922, 14.2724,
                     13.6931, 13.1300, 12.6672, 12.1897, 11.7421, 11.3323, 10.9345, 10.5220,
                     10.1526,  9.8123,  9.5070,  9.2037,  8.9509,  8.7694,  8.6179,  8.4587])

## Load data

In [3]:
def load_dataset(file_path, subjects, label_dict, resampled_frequency="1min", means=None, stds=None):

    X, y = zip(*[load_mel_data(file_path, subject, label_dict, sample_rate=SAMPLE_RATE, resampled_frequency=resampled_frequency, colnames=COLNAMES) for subject in tqdm(subjects, desc="Loading data")])

    lengths = [elem.shape[0] for elem in X]

    X, y, lengths = zip(*[(X[ii], y[ii], lengths[ii]) for ii in np.argsort(lengths)[::-1]])
    
    means, stds = torch.cat(X).mean(axis=0), torch.cat(X).std(axis=0)
    
    logging.info(f"means = {means}; stds = {stds}")
    print(f"means = {means}; stds = {stds}")

    class_0, class_1 = zip(*[((elem == 0).sum().numpy()/elem.shape[0], (elem == 1).sum().numpy()/elem.shape[0]) for elem in y])
    logging.info(f"Class 0 (awake): {np.mean(class_0):.2f} +/- {np.std(class_0):.2f}; Class 1 (sleep): {np.mean(class_1):.2f} +/- {np.std(class_1):.2f}")
    print(f"Class 0 (awake): {np.mean(class_0):.2f} +/- {np.std(class_0):.2f}; Class 1 (sleep): {np.mean(class_1):.2f} +/- {np.std(class_1):.2f}")

    X, y, lengths = pad_sequence(X, batch_first=True), pad_sequence(y, batch_first=True), torch.Tensor(lengths)

    if means is not None and stds is not None:
        X = (X - means) / stds
        logging.info("Normalized the input of each channel")
        print("Normalized the input of each channel")

    return X, y, lengths


# Select device (GPU if available)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load available subjects
with h5py.File(HDF5_FILE_PATH) as hdf5_file:
    subjects = [subject for subject in hdf5_file.keys() if subject not in EXCLUDED_DATASETS]

# Load the data
X, y, lengths = load_dataset(HDF5_FILE_PATH, subjects, LABEL_DICT, means=means, stds=stds)
X, y = X.float(), y.float()
X, y, lengths = X.to(device), y.to(device), lengths.to(device)
assert X.shape[0] == y.shape[0]
print(f"Loaded {X.shape[0]} sequences with input shape [{X.shape[1]} x {X.shape[2]}] and output shape [{y.shape[1]}]\n")

Loading data:   0%|          | 0/444 [00:00<?, ?it/s]

means = tensor([ -44.4865,  -59.5069,  -66.6517,  -69.3142,  -73.4886,  -76.3867,
         -78.8996,  -79.9984,  -82.2940,  -84.5616,  -85.4373,  -87.6441,
         -89.6699,  -90.6575,  -92.5189,  -94.3737,  -95.0330,  -96.6376,
         -98.0835,  -98.6270,  -99.1676, -100.4611, -101.6439, -102.1052,
        -102.5688, -103.6380, -104.6083, -104.9140, -105.2301, -106.0782,
        -106.3457, -106.5847, -106.8263, -107.0242, -107.6015, -107.7007,
        -107.7551, -108.1157, -107.9962, -107.4323,  -43.0569,  -59.5251,
         -67.3526,  -70.2022,  -74.8481,  -78.0362,  -80.5892,  -81.7762,
         -83.7894,  -85.6536,  -86.4882,  -88.3413,  -89.9654,  -90.6949,
         -92.0792,  -93.7075,  -94.1243,  -95.6003,  -96.9382,  -97.5214,
         -98.0292,  -99.3300, -100.2594, -100.6239, -100.9032, -101.8789,
        -103.0002, -103.3289, -103.5210, -104.2072, -104.3552, -104.5816,
        -104.8962, -105.0697, -105.5263, -105.4751, -105.5339, -105.9084,
        -105.7753, -105.1467, 

## Create result files

In [4]:
with open(TRAIN_RESULTS_PATH, "w") as f:
    f.write("Combination,Fold,Epoch,Train Loss,Validation Loss,Hidden Dimension,Number of Layers,Initial Learning Rate,Model\n")
logging.info(f"Created training result file at {TRAIN_RESULTS_PATH}")

with open(TEST_RESULTS_PATH, "w") as f:
    f.write("Combination,Fold,Loss,Accuracy,Precision,Recall,F1 Score,Hidden Dimension,Number of Layers,Initial Learning Rate,Model,Ellapsed Time\n")
logging.info(f"Created test result file at {TEST_RESULTS_PATH}")

## Train the models

In [None]:
combinations = [(0, INIT_LR[0], 0, GLM)] + list(product(N_LAYERS, INIT_LR, HID_DIM, MODELS))
n_combinations = len(combinations)
for combination, (n_layers, init_lr, hid_dim, model_constr) in enumerate(tqdm(combinations)):

    logging.info(f"Combination {combination}: hid_dim = {hid_dim}; n_layers = {n_layers}; init_lr = {init_lr}; device = {device}")

    # Do 10-fold cross-validation
    kf = KFold(n_splits=10)
    for fold, (train_idx, test_idx) in enumerate(kf.split(np.arange(X.size(0)))):

        # Create validation data
        train_idx, valid_idx = train_test_split(np.arange(train_idx.shape[0]), test_size=0.2)

        # Create model and init weights
        model = model_constr(INPUT_DIM, hid_dim, OUTPUT_DIM, n_layers, dropout=DROPOUT, batch_first=True)
        logging.info('Model initialized with %s trainable parameters' % count_parameters(model))

        # Init loss and optimizer
        optimizer = optim.SGD(model.parameters(), lr=init_lr) # https://arxiv.org/abs/1409.3215
        scheduler = ExponentialLR(optimizer, gamma=LR_DECAY)
        criterion = nn.BCELoss()
        logging.info(f"Start with learning rate = {init_lr} (decay = {LR_DECAY}); batch size = {BATCH_SIZE}.")

        # Create dataloaders
        train_loader = DataLoader(TensorDataset(X[train_idx], y[train_idx], lengths[train_idx]), batch_size=BATCH_SIZE, shuffle=True)
        valid_loader = DataLoader(TensorDataset(X[valid_idx], y[valid_idx], lengths[valid_idx]), batch_size=BATCH_SIZE)
        test_loader = DataLoader(TensorDataset(X[test_idx], y[test_idx], lengths[test_idx]), batch_size=BATCH_SIZE)
        logging.info(f"Use {len(train_idx)} sequences for training, {len(valid_idx)} sequences for validation and {len(test_idx)} sequences for testing.")

        # Set path and init best loss
        best_model_path = os.path.join(MODEL_BASE_PATH, f'{combination:02d}_best_{n_layers}l_{model.name}{hid_dim}_model_fold_{fold}.pt')
        best_valid_loss = float('inf')
        epoch = 0

        overall_start_time = time.time()

        # Evaluate model without any training
        train_loss, _ = evaluate(model, train_loader, criterion)
        valid_loss, _ = evaluate(model, valid_loader, criterion)

        # Save losses to result file
        with open(TRAIN_RESULTS_PATH, "a") as f:
            f.write(f"{combination},{fold},{epoch},{train_loss},{valid_loss},{hid_dim},{n_layers},{init_lr},{model.name}\n")

        for epoch in range(1, MAX_EPOCHS + 1):

            start_time = time.time()

            train_loss = train(model, train_loader, optimizer, criterion, CLIP)
            valid_loss, _ = evaluate(model, valid_loader, criterion)

            time_diff = int(time.time() - start_time)

            scheduler.step()

            if valid_loss + EPSILON < best_valid_loss:
                # Save losses to result file
                with open(TRAIN_RESULTS_PATH, "a") as f:
                    f.write(f"{combination},{fold},{epoch},{train_loss},{valid_loss},{hid_dim},{n_layers},{init_lr},{model.name}\n")

                # Update best validation loss and save model
                best_valid_loss = valid_loss
                logging.info(f"Updated best validation loss to {best_valid_loss}.")
                torch.save(model.state_dict(), best_model_path)
            else:
                logging.info(f"End training after epoch {epoch} as validation loss does not further decrease.")
                logging.info(f"Best model saved at {best_model_path}")
                break

        time_diff = int(time.time() - overall_start_time)

        # Evaluate model on test set
        logging.info(f"Load model from epoch {epoch-1} from {best_model_path}")
        model.load_state_dict(torch.load(best_model_path))

        test_loss, metrics = evaluate(model, test_loader, criterion)
        accuracy, precision, recall, f1_score = metrics

        with open(TEST_RESULTS_PATH, "a") as f:
            f.write(f"{combination},{fold},{test_loss},{accuracy},{precision},{recall},{f1_score},{hid_dim},{n_layers},{init_lr},{model.name},{time_diff}\n")

  0%|          | 0/64 [00:00<?, ?it/s]