# Hyperparameter Optimization (Powerspec Model)

In [1]:
import logging
import random
import os
import time
import datetime
import warnings
from itertools import product

import torch
from torch import nn, optim
from torch.optim.lr_scheduler import ExponentialLR
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
import h5py
from tqdm.notebook import tqdm

from sklearn.model_selection import train_test_split, KFold

from utils import load_mel_data, count_parameters
from models import *

warnings.filterwarnings("ignore")

# Make code deterministic
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

## Config and global variables

In [2]:
# Set file paths
BASE_PATH = os.path.join("/home/source/experiments/")
RESULTS_BASE_PATH = os.path.join(BASE_PATH, "results")
LOG_FILE_PATH = os.path.join(BASE_PATH, "exp03_train.log")
MODEL_BASE_PATH = os.path.join(BASE_PATH, 'exp03_models')
TRAIN_RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, "exp03_train.csv")
TEST_RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, "exp03_test.csv")

# Logging config
logging.basicConfig(level=logging.INFO,
                    filename=LOG_FILE_PATH,
                    format='%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S')

# Set information about the dataset
HDF5_FILE_PATH = os.path.join(os.sep, 'home', 'data', "ANNOTATED_BEDTIME_TU7.hdf5")
COLNAMES = ["Time", "X", "Y", "Z", "Annotated Time in Bed"]
SAMPLE_RATE = 100
LABEL_DICT = {False: 0, True: 1}
EXCLUDED_DATASETS = ["subject90067325"]

# Set information about the model, etc.
INPUT_DIM = 160
OUTPUT_DIM = 1

DROPOUT = 0.5 # https://jmlr.org/papers/v15/srivastava14a.html
BATCH_SIZE = 8
CLIP = 1000
MAX_EPOCHS = 256
MIN_EPOCHS = 0
LR_DECAY = .9
REVERSE = False

# Combinations to test
MODELS = [MLP, RNN, LSTM]
HID_DIM = [1,2,4,8,16,32,64]
N_LAYERS = [1,2,4]
INIT_LR = [.7]

# Minimal required loss impprovement
EPSILON = 1e-4

means = torch.Tensor([ -48.4207,  -65.1218,  -71.5279,  -76.1308,  -79.9645,  -83.3663,
                       -86.4398,  -89.1959,  -91.6206,  -93.0964,  -95.0577,  -97.5050,
                       -98.9434, -100.2508, -101.4559, -101.9244, -103.0111, -104.7183,
                      -105.5989, -105.6751, -106.4759, -107.9065, -108.5371, -108.4562,
                      -109.0211, -110.2759, -110.0502, -110.5009, -111.6296, -111.2670,
                      -111.6191, -111.9629, -112.2249, -113.2178, -112.7033, -112.8654,
                      -113.0358, -113.1333, -113.2129, -113.2895,  -48.2245,  -66.3324,
                       -73.2612,  -77.7942,  -81.1151,  -84.1829,  -86.7018,  -88.9712,
                       -90.9700,  -92.1339,  -93.9986,  -96.3574,  -97.8250,  -99.1140,
                      -100.1279, -100.4639, -101.3244, -103.0022, -103.9505, -104.0372,
                      -104.8954, -106.1583, -106.6984, -106.5298, -106.8966, -108.3447,
                      -108.1104, -108.5275, -109.7666, -109.1461, -109.5073, -109.7905,
                      -109.8520, -111.1019, -110.4769, -110.5905, -110.9061, -110.7892,
                      -110.9209, -111.0517,  -43.2543,  -61.6885,  -68.9491,  -73.5991,
                       -77.1016,  -80.0631,  -82.7077,  -85.1379,  -87.3117,  -88.6311,
                       -90.4376,  -92.7634,  -94.2068,  -95.5118,  -96.7122,  -97.2450,
                       -98.3478, -100.0788, -101.0015, -101.1428, -101.9611, -103.4203,
                      -104.0778, -104.0826, -104.7114, -106.0103, -105.8721, -106.3384,
                      -107.5007, -107.1565, -107.5132, -107.8857, -108.1693, -109.1731,
                      -108.7007, -108.9047, -109.0983, -109.1834, -109.2635, -109.3219,
                       -88.4789,  -95.3224,  -97.4297,  -98.6238,  -99.8170, -101.9552,
                      -103.8918, -105.9195, -107.9017, -109.0472, -110.9086, -113.2247,
                      -114.7520, -116.0630, -117.1561, -117.7766, -118.7566, -120.5036,
                      -121.3769, -121.2272, -122.1442, -123.5198, -124.1882, -124.2786,
                      -124.6115, -126.1393, -125.7334, -126.0776, -127.6043, -126.9575,
                      -127.5121, -127.9141, -127.7025, -129.1585, -128.4213, -128.5968,
                      -129.2008, -129.0018, -129.1680, -129.2950])

stds = torch.Tensor([15.2365, 16.0121, 15.9737, 16.1929, 15.9387, 15.5965, 15.1987, 14.7376,
                     14.3288, 13.9616, 13.6932, 13.4502, 13.2141, 13.0099, 12.8077, 12.6167,
                     12.4420, 12.3011, 12.1666, 11.9817, 11.9137, 11.8341, 11.7676, 11.7024,
                     11.6222, 11.6012, 11.5212, 11.4829, 11.4804, 11.4021, 11.3955, 11.3809,
                     11.3314, 11.3852, 11.3326, 11.3130, 11.3374, 11.3191, 11.2958, 11.3150,
                     18.7619, 19.9428, 19.2959, 19.1569, 18.9576, 18.9095, 18.3317, 17.8413,
                     17.2935, 16.7246, 16.3572, 15.8488, 15.6116, 15.3324, 14.9735, 14.8424,
                     14.5291, 14.3861, 14.2124, 13.8977, 13.8832, 13.6913, 13.6424, 13.6181,
                     13.3272, 13.4469, 13.1981, 13.0259, 13.2324, 12.9835, 13.0779, 13.0635,
                     12.7663, 13.0255, 12.7892, 12.7116, 12.9838, 12.7755, 12.8132, 12.8965,
                     12.6507, 13.1202, 12.8005, 12.9337, 13.0524, 13.1663, 13.1359, 12.9216,
                     12.6842, 12.4627, 12.2747, 12.1424, 12.0269, 11.9295, 11.8228, 11.7181,
                     11.5907, 11.5112, 11.4187, 11.3004, 11.2326, 11.1814, 11.1253, 11.0703,
                     11.0036, 10.9998, 10.9487, 10.9178, 10.9446, 10.8762, 10.8651, 10.8592,
                     10.8049, 10.8777, 10.8306, 10.8438, 10.9010, 10.8696, 10.8686, 10.8816,
                     22.3002, 25.5131, 24.0863, 23.6062, 23.0077, 22.4646, 21.4507, 20.4049,
                     19.4103, 18.3997, 17.5340, 16.7272, 16.0919, 15.5063, 14.8996, 14.3464,
                     13.8004, 13.3478, 12.9319, 12.4513, 12.1094, 11.7793, 11.4736, 11.1728,
                     10.8574, 10.6516, 10.3594, 10.1412,  9.9928,  9.7343,  9.5811,  9.4065,
                      9.2308,  9.1794,  9.0196,  8.9316,  8.8628,  8.7821,  8.7035,  8.6694])

## Load data

In [3]:
def load_dataset(file_path, subjects, label_dict, resampled_frequency="1min", means=None, stds=None):

    X, y = zip(*[load_mel_data(file_path, subject, label_dict, sample_rate=SAMPLE_RATE, resampled_frequency=resampled_frequency, colnames=COLNAMES) for subject in tqdm(subjects, desc="Loading data")])

    lengths = [elem.shape[0] for elem in X]

    X, y, lengths = zip(*[(X[ii], y[ii], lengths[ii]) for ii in np.argsort(lengths)[::-1]])
    
    means, stds = torch.cat(X).mean(axis=0), torch.cat(X).std(axis=0)
    
    logging.info(f"means = {means}; stds = {stds}")
    print(f"means = {means}; stds = {stds}")

    class_0, class_1 = zip(*[((elem == 0).sum().numpy()/elem.shape[0], (elem == 1).sum().numpy()/elem.shape[0]) for elem in y])
    logging.info(f"Class 0 (awake): {np.mean(class_0):.2f} +/- {np.std(class_0):.2f}; Class 1 (sleep): {np.mean(class_1):.2f} +/- {np.std(class_1):.2f}")
    print(f"Class 0 (awake): {np.mean(class_0):.2f} +/- {np.std(class_0):.2f}; Class 1 (sleep): {np.mean(class_1):.2f} +/- {np.std(class_1):.2f}")

    X, y, lengths = pad_sequence(X, batch_first=True), pad_sequence(y, batch_first=True), torch.Tensor(lengths)

    if means is not None and stds is not None:
        X = (X - means) / stds
        logging.info("Normalized the input of each channel")
        print("Normalized the input of each channel")

    return X, y, lengths


# Select device (GPU if available)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load available subjects
with h5py.File(HDF5_FILE_PATH) as hdf5_file:
    subjects = [subject for subject in hdf5_file.keys() if subject not in EXCLUDED_DATASETS]

# Load the data
X, y, lengths = load_dataset(HDF5_FILE_PATH, subjects, LABEL_DICT, means=means, stds=stds)
X, y = X.float(), y.float()
X, y, lengths = X.to(device), y.to(device), lengths.to(device)
assert X.shape[0] == y.shape[0]
print(f"Loaded {X.shape[0]} sequences with input shape [{X.shape[1]} x {X.shape[2]}] and output shape [{y.shape[1]}]\n")

Loading data:   0%|          | 0/444 [00:00<?, ?it/s]

means = tensor([ -48.4207,  -65.1218,  -71.5279,  -76.1308,  -79.9645,  -83.3663,
         -86.4398,  -89.1959,  -91.6206,  -93.0964,  -95.0577,  -97.5050,
         -98.9434, -100.2508, -101.4559, -101.9244, -103.0111, -104.7183,
        -105.5989, -105.6751, -106.4759, -107.9065, -108.5371, -108.4562,
        -109.0211, -110.2759, -110.0502, -110.5009, -111.6296, -111.2670,
        -111.6191, -111.9629, -112.2249, -113.2178, -112.7033, -112.8654,
        -113.0358, -113.1333, -113.2129, -113.2895,  -48.2245,  -66.3324,
         -73.2612,  -77.7942,  -81.1151,  -84.1829,  -86.7018,  -88.9712,
         -90.9700,  -92.1339,  -93.9986,  -96.3574,  -97.8250,  -99.1140,
        -100.1279, -100.4639, -101.3244, -103.0022, -103.9505, -104.0372,
        -104.8954, -106.1583, -106.6984, -106.5298, -106.8966, -108.3447,
        -108.1104, -108.5275, -109.7666, -109.1461, -109.5073, -109.7905,
        -109.8520, -111.1019, -110.4769, -110.5905, -110.9061, -110.7892,
        -110.9209, -111.0517, 

## Create result files

In [4]:
with open(TRAIN_RESULTS_PATH, "w") as f:
    f.write("Combination,Fold,Epoch,Train Loss,Validation Loss,Hidden Dimension,Number of Layers,Initial Learning Rate,Model\n")
logging.info(f"Created training result file at {TRAIN_RESULTS_PATH}")

with open(TEST_RESULTS_PATH, "w") as f:
    f.write("Combination,Fold,Loss,Accuracy,Precision,Recall,F1 Score,Hidden Dimension,Number of Layers,Initial Learning Rate,Model,Ellapsed Time\n")
logging.info(f"Created test result file at {TEST_RESULTS_PATH}")

## Train the models

In [None]:
combinations = [(0, INIT_LR[0], 0, GLM)] + list(product(N_LAYERS, INIT_LR, HID_DIM, MODELS))
n_combinations = len(combinations)
for combination, (n_layers, init_lr, hid_dim, model_constr) in enumerate(tqdm(combinations)):

    logging.info(f"Combination {combination}: hid_dim = {hid_dim}; n_layers = {n_layers}; init_lr = {init_lr}; device = {device}")

    # Do 10-fold cross-validation
    kf = KFold(n_splits=10)
    for fold, (train_idx, test_idx) in enumerate(kf.split(np.arange(X.size(0)))):

        # Create validation data
        train_idx, valid_idx = train_test_split(np.arange(train_idx.shape[0]), test_size=0.2)

        # Create model and init weights
        model = model_constr(INPUT_DIM, hid_dim, OUTPUT_DIM, n_layers, dropout=DROPOUT, batch_first=True)
        logging.info('Model initialized with %s trainable parameters' % count_parameters(model))

        # Init loss and optimizer
        optimizer = optim.SGD(model.parameters(), lr=init_lr) # https://arxiv.org/abs/1409.3215
        scheduler = ExponentialLR(optimizer, gamma=LR_DECAY)
        criterion = nn.BCELoss()
        logging.info(f"Start with learning rate = {init_lr} (decay = {LR_DECAY}); batch size = {BATCH_SIZE}.")

        # Create dataloaders
        train_loader = DataLoader(TensorDataset(X[train_idx], y[train_idx], lengths[train_idx]), batch_size=BATCH_SIZE, shuffle=True)
        valid_loader = DataLoader(TensorDataset(X[valid_idx], y[valid_idx], lengths[valid_idx]), batch_size=BATCH_SIZE)
        test_loader = DataLoader(TensorDataset(X[test_idx], y[test_idx], lengths[test_idx]), batch_size=BATCH_SIZE)
        logging.info(f"Use {len(train_idx)} sequences for training, {len(valid_idx)} sequences for validation and {len(test_idx)} sequences for testing.")

        # Set path and init best loss
        best_model_path = os.path.join(MODEL_BASE_PATH, f'{combination:02d}_best_{n_layers}l_{model.name}{hid_dim}_model_fold_{fold}.pt')
        best_valid_loss = float('inf')
        epoch = 0

        overall_start_time = time.time()

        # Evaluate model without any training
        train_loss, _ = evaluate(model, train_loader, criterion)
        valid_loss, _ = evaluate(model, valid_loader, criterion)

        # Save losses to result file
        with open(TRAIN_RESULTS_PATH, "a") as f:
            f.write(f"{combination},{fold},{epoch},{train_loss},{valid_loss},{hid_dim},{n_layers},{init_lr},{model.name}\n")

        for epoch in range(1, MAX_EPOCHS + 1):

            start_time = time.time()

            train_loss = train(model, train_loader, optimizer, criterion, CLIP)
            valid_loss, _ = evaluate(model, valid_loader, criterion)

            time_diff = int(time.time() - start_time)

            scheduler.step()

            if valid_loss + EPSILON < best_valid_loss:
                # Save losses to result file
                with open(TRAIN_RESULTS_PATH, "a") as f:
                    f.write(f"{combination},{fold},{epoch},{train_loss},{valid_loss},{hid_dim},{n_layers},{init_lr},{model.name}\n")

                # Update best validation loss and save model
                best_valid_loss = valid_loss
                logging.info(f"Updated best validation loss to {best_valid_loss}.")
                torch.save(model.state_dict(), best_model_path)
            else:
                logging.info(f"End training after epoch {epoch} as validation loss does not further decrease.")
                logging.info(f"Best model saved at {best_model_path}")
                break

        time_diff = int(time.time() - overall_start_time)

        # Evaluate model on test set
        logging.info(f"Load model from epoch {epoch-1} from {best_model_path}")
        model.load_state_dict(torch.load(best_model_path))

        test_loss, metrics = evaluate(model, test_loader, criterion)
        accuracy, precision, recall, f1_score = metrics

        with open(TEST_RESULTS_PATH, "a") as f:
            f.write(f"{combination},{fold},{test_loss},{accuracy},{precision},{recall},{f1_score},{hid_dim},{n_layers},{init_lr},{model.name},{time_diff}\n")

  0%|          | 0/64 [00:00<?, ?it/s]