# Import Packages

In [1]:
# Default packages
import os
from os.path import join
from datetime import date, datetime, timedelta
from joblib import load, dump

# Set current directory to "src"
os.chdir(join(os.getcwd(), os.pardir, "src"))
print(f"Current working directory: {os.getcwd()}")

# Installed packages
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.backend import clear_session
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Defined modules
from util.util import *
# from config.config import *
from process.dataloader import DataTL
from process.embed_gen import EmbedGenTL
from process.evaluate import *
from models import lstm_vae, cnn_vae
from models.lstm_ae import lstm_autoencoder
from models.lstm import basic_lstm
from visualize import plot

# Set TF log level to minimum
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

# Check GPU availability
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Current working directory: e:\Repositories\PCovNet\src
Found GPU at: /device:GPU:0


# Process Config

In [2]:
config = {
    'EXP_NAME': "healthy_models",
    'EXP_PHASE': "phase1",
    'EXP_GROUP': "healthy",
    'AUGMENT': True,
    'LEN_WIN': 24,
    'N_WIN': 7,
    'LATENT_DIM': 6,
    'BATCH_SIZE': 64,
    'VAL_SPLIT': 0.05,
    'LEARNING_RATE': 0.0002,
    'EPOCH': 1000,
    'PATIENCE': 10,
}



In [3]:
# Dictionaries of dataset paths
DATA_DIR_DICT = {
    'phase1': join(os.getcwd(), os.pardir, "data", "raw", "phase1"),
    'phase2': join(os.getcwd(), os.pardir, "data", "raw", "phase2")
}
INFO_DIR_DICT = {
    'covid_phase1': join(os.getcwd(), os.pardir, "data", "external", "covid_phase1_info.csv"),
    'covid_phase2': join(os.getcwd(), os.pardir, "data", "external", "covid_phase2_info.csv"),
    'healthy_phase1': join(os.getcwd(), os.pardir, "data", "external", "healthy_phase1_info.csv"),
    'non-covid_phase1': join(os.getcwd(), os.pardir, "data", "external", "non-covid_phase1_info.csv"),
}

# Import subject info
subject_info = pd.read_csv(
    INFO_DIR_DICT[f"{config['EXP_GROUP']}_{config['EXP_PHASE']}"])


# Assign experiment directory
config['EXP_DIR'] = join(os.getcwd(), os.pardir, "experiment",
                         f"{config['EXP_NAME']}_{config['LEN_WIN']}_{config['N_WIN']}")
# timestamp = datetime.now().strftime(f"%Y-%m-%d %H-%M__")
# config['EXP_DIR'] = join(os.getcwd(), os.pardir, "experiment",
#                          timestamp + config['EXP_NAME'])
handle_dir(config['EXP_DIR'])


# Add DATA_DIR
config['DATA_DIR'] = DATA_DIR_DICT[config['EXP_PHASE']]


# Export config
export_json(config, join(config['EXP_DIR'], "config.json"),
            print_json=True)

# Start logging
with open(join(config['EXP_DIR'], "log.txt"), 'w', encoding='utf-8') as f:
    for key, value in config.items():
        f.write(f"{key}: {value}\n")
    f.write("\n\n")


config
{
    "EXP_NAME": "healthy_models",
    "EXP_PHASE": "phase1",
    "EXP_GROUP": "healthy",
    "AUGMENT": true,
    "LEN_WIN": 24,
    "N_WIN": 7,
    "LATENT_DIM": 6,
    "BATCH_SIZE": 64,
    "VAL_SPLIT": 0.05,
    "LEARNING_RATE": 0.0002,
    "EPOCH": 1000,
    "PATIENCE": 10,
    "EXP_DIR": "e:\\Repositories\\PCovNet\\src\\..\\experiment\\healthy_models_24_7",
    "DATA_DIR": "e:\\Repositories\\PCovNet\\src\\..\\data\\raw\\phase1"
}


# Import & Process Data

In [4]:
if 'Symptom Onset' in subject_info.columns:
    subject_info.drop(['Symptom Onset'], axis=1)

data_obj_path = join(config['EXP_DIR'],
                     f"{config['LEN_WIN']}-{config['N_WIN']}_data.joblib")

if not os.path.isfile(data_obj_path):
    # Prepare data
    data = DataTL(config, subject_info)

    # Print data info
    data.print_info()

    # Export data object
    dump(data, data_obj_path)
else:
    data = load(data_obj_path)


100%|██████████| 63/63 [02:26<00:00,  2.32s/it]

            Subject Info
            Phase:                phase1
            Group:                healthy
            
            Dataset Shape
            VAE Train:            (55208, 24, 1)
            VAE Train Aug:        (441664, 24, 1)
            LSTM Train:           (46136, 7, 24, 1)
        





# VAE Model

#### Assign and Compile VAE Model

In [5]:
# Get VAE model
vae_model = cnn_vae.VAE(n_timesteps=config['LEN_WIN'],
                        n_channels=data.train_dataset_vae.shape[-1],
                        latent_dim=config['LATENT_DIM'])
vae_model.compile(loss=tf.losses.MeanSquaredError(),
                  optimizer=tf.optimizers.Adam(
                      learning_rate=config['LEARNING_RATE']),
                  metrics=[tf.metrics.MeanSquaredError()])

# Show VAE model summary
print("\nVAE Model Summary")
print("=================", end="\n\n")
vae_model.print_summary()



VAE Model Summary

Model: "Encoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 encoder_input (InputLayer)     [(None, 24, 1)]      0           []                               
                                                                                                  
 encoder1 (Conv1D)              (None, 12, 128)      512         ['encoder_input[0][0]']          
                                                                                                  
 encoder2 (Conv1D)              (None, 6, 64)        24640       ['encoder1[0][0]']               
                                                                                                  
 encoder3 (Conv1D)              (None, 3, 32)        6176        ['encoder2[0][0]']               
                                                                        

#### Train VAE Model

In [6]:
# Assign checkpoint paths
vae_ckpt_path = join(
    config['EXP_DIR'], "vae_checkpoint", "ckpt")

# Callbacks for VAE
early_stopping_callback = EarlyStopping(monitor='val_loss',
                                        patience=config['PATIENCE'],
                                        mode='min',
                                        restore_best_weights=True)
checkpoint_callback = ModelCheckpoint(vae_ckpt_path,
                                      monitor='val_loss',
                                      verbose=0,
                                      mode='min',
                                      save_best_only=True,
                                      save_weights_only=True)

if not os.path.isfile(vae_ckpt_path + ".index"):
    # Train VAE
    vae_history = vae_model.fit(data.train_dataset_vae,
                                validation_split=config['VAL_SPLIT'],
                                batch_size=config['BATCH_SIZE'],
                                epochs=config['EPOCH'],
                                shuffle=False,
                                verbose=1,
                                callbacks=[early_stopping_callback, checkpoint_callback])

    # Export model history
    export_history(vae_history, join(
        config['EXP_DIR'], "vae_history.csv"))

    # Plot loss curve
    # print("\nVAE Loss Curve")
    # print("==============", end="\n")
    plot.loss_curve(config, vae_history, ref="_VAE", save_plot=True,
                    close_plot=True)

else:
    vae_model.load_weights(vae_ckpt_path)
    print("VAE model weights loaded from:")
    print(vae_ckpt_path)


Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000


# Generate Embeddings

In [7]:
if not os.path.isfile(join(config['EXP_DIR'], f"vae_embeddings.joblib")):
    # Get embedding dataset
    embed_gen = EmbedGenTL(config, vae_model,
                           data, verbose=True)

    # Save embed_gen object
    dump(embed_gen, join(config['EXP_DIR'],
                         f"vae_embeddings.joblib"))
else:
    # Load embed_gen object
    embed_gen = load(join(config['EXP_DIR'], f"vae_embeddings.joblib"))
    print("Embeddings loaded from:")
    print(join(config['EXP_DIR'], f"vae_embeddings.joblib"))


Calculating embeddings for train dataset... 


100%|██████████| 46136/46136 [12:52<00:00, 59.73it/s] 


# LSTM Model for Embeddings

#### Assign and Compile LSTM Model

In [8]:
# Get LSTM MODEL
lstm_model = lstm_autoencoder(n_timesteps=config['N_WIN'] - 1,
                              n_features=config['LATENT_DIM'])
lstm_model.compile(loss=tf.losses.MeanSquaredError(),
                   optimizer=tf.optimizers.Adam(
                       learning_rate=config['LEARNING_RATE']),
                   metrics=['mse'])

# Show LSTM model summary
print("\nLSTM Model Summary")
print("==================", end="\n\n")
lstm_model.summary()



LSTM Model Summary

Model: "LSTM_Autoencoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 6, 6)]            0         
                                                                 
 encoder1 (LSTM)             (None, 6, 128)            69120     
                                                                 
 encoder2 (LSTM)             (None, 64)                49408     
                                                                 
 repeat_vec (RepeatVector)   (None, 6, 64)             0         
                                                                 
 decoder1 (LSTM)             (None, 6, 64)             33024     
                                                                 
 decoder2 (LSTM)             (None, 6, 128)            98816     
                                                                 
 reconst (TimeDistributed)   

#### Train LSTM Model

In [9]:
# Assign checkpoint paths
lstm_ckpt_path = join(
    config['EXP_DIR'], "lstm_checkpoint", "ckpt")
    
# Callbacks for LSTM
early_stopping_callback = EarlyStopping(monitor='val_loss',
                                        patience=config['PATIENCE'],
                                        mode='min',
                                        restore_best_weights=True)
checkpoint_callback = ModelCheckpoint(lstm_ckpt_path,
                                      monitor='val_loss',
                                      verbose=0,
                                      mode='min',
                                      save_best_only=True,
                                      save_weights_only=True)

if not os.path.isfile(lstm_ckpt_path + ".index"):
    # Train LSTM
    lstm_history = lstm_model.fit(embed_gen.x_train, embed_gen.y_train,
                                  validation_split=0.05,
                                  batch_size=config['BATCH_SIZE'],
                                  epochs=config['EPOCH'],
                                  callbacks=[
                                      early_stopping_callback, checkpoint_callback],
                                  verbose=1)

    # Export model history
    export_history(lstm_history, join(
        config['EXP_DIR'], data.id + "_lstm_history.csv"))

    # Plot loss curve
    # print("\nLSTM Loss Curve")
    # print("===============", end="\n")
    plot.loss_curve(config, lstm_history, ref=data.id + "_LSTM", save_plot=True,
                    close_plot=True)
else:
    lstm_model.load_weights(lstm_ckpt_path)
    print("LSTM model weights loaded from:")
    print(lstm_ckpt_path)


Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
