In [1]:
# Include project path to available custom class at jupyter
import os
import sys
sys.path.insert(0, os.path.abspath('/home/vanessa/PycharmProjects/RecurrentNetworks/'))

# Disable warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Load libs standard python and custom
import numpy as np
import pandas as pd
import datetime

from network_model.custom_ensemble import CustomEnsemble
from network_model.model_class import ModelClass
from utils.experiment_processes import ExperimentProcesses
from utils.log import Log
import utils.definition_network as dn

import datetime

# LAYERS
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding


Using TensorFlow backend.


<h2> Running the main LSTM topologies for multilabel dataset </h2>
<p>
    <ul> When reviewing the experiments, there were some gaps that motivated the tests below:
    <li> <b> 1.0 Train, validate and test experiment lstm_exp14_L3_N64_B20_E96 </b>: experiment with the best result multilabel dataset.
        <ol><li> <b> Test lstm_exp14_L3_N64_B20_E96 + SMHD _ * _ 2640 </b> </li>
            <li> <b> Test lstm_exp14_L3_N64_B20_E96 + SMHD _ * _ 1760 </b> </li>
        </ol>
    </li>
    <li> <b> 2.0 Train, validate and test experiment lstm_exp9_var_L3_N16_B40_E32_D0.2 </b>: it has already been tested with multi-label dataset for the best configuration using Glove6B static embeddings. However, this same model and dataset and personalized word embeddings were tested, generating the set of tests below:
        <ol><li> <b> Test lstm_exp9_var_L3_N16_B40_E32_D0.2 + word embeddings SMHD all users (SMHD _ * _ 1760) </b>, generated with algorithms (skipgram, cbow, glove) </li>
            <li> <b> Test lstm_exp9_var_L3_N16_B40_E32_D0.2 + word embeddings SMHD all users (SMHD _ * _ 2640) </b>, generated with algorithms (skipgram, cbow, glove) </li>
            <li> <b> Test lstm_exp9_var_L3_N16_B40_E32_D0.2 + word embeddings SMHD 3 pathologies (best result dataset between tests 2.1 and 2.2) </b>, generated with algorithms (skipgram, cbow, glove) </li>
            <li> <b> Test lstm_exp9_var_L3_N16_B40_E32_D0.2 + Glove6B, glorot x lecun kernel initializer </b> </li>
        </ol>
    </li>
    </ul>
</p>

In [3]:
### Checks composition of the used dataset train
file_path = "/home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/"
dataset_train = pd.read_pickle(file_path + str("SMHD_train_2640.df"))
dataset_train.groupby('label').size()

label
anxiety               440
anxiety,depression    440
control               880
depression            440
dtype: int64

In [4]:
### Checks composition of the used dataset valid
dataset_valid = pd.read_pickle(file_path + str("SMHD_validation_2640.df"))
dataset_valid.groupby('label').size()

label
anxiety               440
anxiety,depression    440
control               880
depression            440
dtype: int64

In [5]:
### Checks composition of the used dataset test
dataset_test = pd.read_pickle(file_path + str("SMHD_test_2640.df"))
dataset_test.groupby('label').size()

label
anxiety               440
anxiety,depression    440
control               880
depression            440
dtype: int64

In [6]:
dataset = pd.concat([dataset_train, dataset_valid, dataset_test])
dataset.describe()

Unnamed: 0,label,texts
count,6600,6600
unique,4,6600
top,control,"['We get new celebrities. ', 'Wolf Children. V..."
freq,2640,1


### 1.0 Train, validate and test experiment lstm_exp14_L3_N64_B20_E96

In [7]:
def generate_lstm_exp14(exp, name_model):
    exp.pp_data.vocabulary_size = 5000
    exp.pp_data.embedding_size = 300
    exp.pp_data.max_posts = 1750
    exp.pp_data.max_terms_by_post = 300
    exp.pp_data.binary_classifier = True
    exp.pp_data.format_input_data = dn.InputData.POSTS_LIST
    exp.pp_data.remove_stopwords = False
    exp.pp_data.delete_low_tfid = False
    exp.pp_data.min_df = 0
    exp.pp_data.min_tf = 0
    exp.pp_data.random_posts = False
    exp.pp_data.random_users = False
    exp.pp_data.tokenizing_type = 'WE'
    exp.pp_data.word_embedding_custom_file = ''    
    exp.pp_data.use_embedding = dn.UseEmbedding.NONE
    exp.pp_data.embedding_type = dn.EmbeddingType.NONE
    exp.pp_data.load_dataset_type = dn.LoadDataset.TRAIN_DATA_MODEL
    exp.pp_data.type_prediction_label= dn.TypePredictionLabel.MULTI_LABEL_CATEGORICAL

    exp.use_custom_metrics = False
    exp.use_valid_set_for_train = True
    exp.valid_split_from_train_set = 0.0
    exp.imbalanced_classes = False

    we_file_name = 'ET_' + str(exp.pp_data.embedding_type.value) + '_UE_' + str(exp.pp_data.use_embedding.value) +\
                                 '_EF_' + 'glove6B300d_glorot'
    
    lstm = ModelClass(1)
    lstm.loss_function = 'binary_crossentropy'
    lstm.optmizer_function = 'adam'
    lstm.use_embedding_pre_train = exp.pp_data.use_embedding
    lstm.embed_trainable = False

    # Train
    neuronios_by_layer = [64]
    epochs = [96]
    batch_sizes = [20]
    dropouts = [0.2]

    np.random.seed(dn.SEED)
    time_ini_rep = datetime.datetime.now()

    x_train, y_train, x_valid, y_valid, num_words, embedding_matrix = exp.pp_data.load_data()
    exp.set_period_time_end(time_ini_rep, 'Load data')

    for neuronios in neuronios_by_layer:
        for batch_size in batch_sizes:
            for epoch in epochs:
                for dropout in dropouts:
                    lstm.epochs = epoch
                    lstm.batch_size = batch_size
                    lstm.patience_train = epoch/2
                    data_dim = exp.pp_data.max_terms_by_post
                    timesteps = exp.pp_data.max_posts

                    exp.experiment_name = name_model + 'lstm_exp14_L3' + '_N' + str(neuronios) + '_B' +\
                                          str(batch_size) + '_E' + str(epoch) + '_D' + str(dropout) + '_' +\
                                          we_file_name

                    lstm.model = Sequential()
                    lstm.model.add(
                            LSTM(neuronios, activation='tanh', dropout=dropout, recurrent_dropout=dropout, 
                                 return_sequences=True, stateful=True, 
                                 batch_input_shape=(batch_size, timesteps, data_dim),
                                 name='dense_1_' + name_model))
                    lstm.model.add(
                            LSTM(neuronios, activation='tanh', dropout=dropout, recurrent_dropout=dropout,
                                 return_sequences=True, stateful=True,
                                 name='dense_2_' + name_model))
                    lstm.model.add(LSTM(neuronios, activation='tanh', dropout=dropout, recurrent_dropout=dropout,
                                        stateful=True,
                                        name='dense_3_' + name_model))
                    lstm.model.add(Dense(3, activation='sigmoid',
                                         name='dense_4_' + name_model))

                    time_ini_exp = datetime.datetime.now()
                    exp.generate_model_hypeparams(lstm, x_train, y_train, x_valid, y_valid, embedding_matrix)
                    exp.set_period_time_end(time_ini_exp, 'Total experiment')

    del x_train, y_train, x_valid, y_valid, num_words, embedding_matrix

    # Test
    exp.pp_data.load_dataset_type = dn.LoadDataset.TEST_DATA_MODEL
    np.random.seed(dn.SEED)
    time_ini_rep = datetime.datetime.now()
    x_test, y_test = exp.pp_data.load_data()
    exp.set_period_time_end(time_ini_rep, 'Load data')

    for neuronios in neuronios_by_layer:
        for batch_size in batch_sizes:
            for epoch in epochs:
                for dropout in dropouts:
                    exp.experiment_name = name_model + 'lstm_exp14_L3' + '_N' + str(neuronios) + '_B' +\
                                          str(batch_size) + '_E' + str(epoch) + '_D' + str(dropout) + '_' +\
                                          we_file_name
                    lstm.epochs = epoch
                    lstm.batch_size = batch_size
                    lstm.patience_train = epoch/2

                    lstm.model = exp.load_model(dn.PATH_PROJECT + exp.experiment_name + '.h5')
                    exp.save_geral_configs()
                    exp.save_summary_model(lstm.model)
                    exp.predict_samples(lstm, x_test, y_test)
    
    del x_test, y_test, lstm, exp

#### 1.1 Test lstm_exp14_L3_N64_B20_E96 + SMHD_*_2640

In [8]:
print('Initializer experiment 1 (model SMHD_ml_gl_2640)\n'+\
      'Set: kernel_initializer=glorot_uniform=xavier_uniform, dataset=SMHD_2640 multi-label')

exp = ExperimentProcesses('lstm_exp14_L3')
exp.pp_data.set_dataset_source(dataset_name='SMHD', label_set=['control', 'anxiety', 'depression'],
                               total_registers=2640, subdirectory="anx_dep_multilabel")

generate_lstm_exp14(exp, 'exp14_ml_gl_2640')

Initializer experiment 1 (model SMHD_ml_gl_2640)
Set: kernel_initializer=glorot_uniform=xavier_uniform, dataset=SMHD_2640 multi-label
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_2640_VS_5000_TF_0_DF_0_RSW_F_IT_2_RP_F
Preprocess data... /home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_train_2640.df
Load data - Ini: 2020-03-08 13:49:08	End: 2020-03-08 13:50:19	Total: 0:01:11.455724




Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

Training using single GPU or CPU..

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 2200 samples, validate on 2200 samples
Epoch 1/96
Epoch 2/96
Epoch 3/96
Epoch 4/96
Epoch 5/96
Epoch 6/96
Epoch 7/96
Epoch 8/96
Epoch 9/96
Epoch 10/96
Epoch 11/96
Epoch 12/96
Epoch 13/96
Epoch 14/96
Epoch 15/96
Epoch 16/96
Epoch 17/96
Epoch 18/96
Epoch 19/96
Epoch 20/96

Epoch 41/96
Epoch 42/96
Epoch 43/96
Epoch 44/96
Epoch 45/96
Epoch 46/96
Epoch 47/96
Epoch 48/96
Epoch 49/96
Epoch 50/96
Epoch 51/96
Epoch 52/96
Epoch 53/96
Epoch 54/96
Epoch 55/96
Epoch 56/96
Epoch 57/96
Epoch 58/96
Epoch 59/96
Epoch 60/96
Epoch 61/96
Epoch 62/96
Epoch 63/96
Epoch 64/96
Epoch 65/96
Epoch 66/96
Epoch 67/96
Epoch 68/96
Epoch 69/96
Epoch 70/96
Epoch 71/96
Epoch 72/96
Epoch 73/96
Epoch 74/96
Epoch 75/96
Epoch 76/96
Epoch 77/96
Epoch 78/96
Epoch 79/96
Epoch 80/96
Epoch 81/96
Epoch 82/96
Epoch 83/96
Epoch 84/96
Epoch 85/96
Epoch 86/96
Epoch 87/96
Epoch 88/96
Epoch 89/96
Epoch 90/96
Epoch 91/96
Epoch 92/96
Epoch 93/96
Epoch 94/96
Epoch 95/96
Epoch 96/96
Generate Model - Ini: 2020-03-08 13:50:29	End: 2020-03-08 20:49:20	Total: 6:58:51.029581
Total experiment - Ini: 2020-03-08 13:50:29	End: 2020-03-08 20:49:20	Total: 6:58:51.302792
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_2640_VS_5000_TF_0_DF_0_RSW_F_IT_2_RP_F
Preproc

Load data - Ini: 2020-03-08 20:49:20	End: 2020-03-08 20:49:50	Total: 0:00:29.492355


#### 1.2 Test lstm_exp14_L3_N64_B20_E96 + SMHD_*_1760

In [9]:
print('Initializer experiment 1.1 (model SMHD_ml_gl_1760)\n'+\
      'Set: kernel_initializer=glorot_uniform=xavier_uniform, dataset=SMHD_1760 multi-label')

exp = ExperimentProcesses('lstm_exp14_L3')
exp.pp_data.set_dataset_source(dataset_name='SMHD', label_set=['control', 'anxiety', 'depression'],
                               total_registers=1760, subdirectory="anx_dep_multilabel")

generate_lstm_exp14(exp, 'exp14_ml_gl_1760')

Initializer experiment 1.1 (model SMHD_ml_gl_1760)
Set: kernel_initializer=glorot_uniform=xavier_uniform, dataset=SMHD_1760 multi-label
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_1760_VS_5000_TF_0_DF_0_RSW_F_IT_2_RP_F
Preprocess data...
Generate tokenizing
Load data - Ini: 2020-03-08 20:51:02	End: 2020-03-08 20:53:31	Total: 0:02:29.144501
Training using single GPU or CPU..
Train on 1760 samples, validate on 1760 samples
Epoch 1/96
Epoch 2/96
Epoch 3/96
Epoch 4/96
Epoch 5/96
Epoch 6/96
Epoch 7/96
Epoch 8/96
Epoch 9/96
Epoch 10/96
Epoch 11/96
Epoch 12/96
Epoch 13/96
Epoch 14/96
Epoch 15/96
Epoch 16/96
Epoch 17/96
Epoch 18/96
Epoch 19/96
Epoch 20/96
Epoch 21/96
Epoch 22/96
Epoch 23/96
Epoch 24/96
Epoch 25/96
Epoch 26/96
Epoch 27/96
Epoch 28/96
Epoch 29/96
Epoch 30/96
Epoch 31/96
Epoch 32/96
Epoch 33/96
Epoch 34/96
Epoch 35/96
Epoch 36/96
Epoch 37/96
Epoch 38/96
Epoch 39/96
Epoch 40/96
Epoch 41/96
Epoch 42/96
Epoch 43/96
Epoch 44/9

### 2.0 Train, validate and test experiment lstm_exp9_var_L3_N16_B40_E32_D0.2

In [8]:
def generate_model(exp, name_model, we_file_name, kernel_function='glorot_uniform'):
    exp.pp_data.vocabulary_size = 5000

    exp.pp_data.embedding_size = 300
    exp.pp_data.max_posts = 1750
    exp.pp_data.max_terms_by_post = 300
    exp.pp_data.binary_classifier = True
    exp.pp_data.format_input_data = dn.InputData.POSTS_ONLY_TEXT
    exp.pp_data.remove_stopwords = False
    exp.pp_data.delete_low_tfid = False
    exp.pp_data.min_df = 0
    exp.pp_data.min_tf = 0
    exp.pp_data.random_posts = False
    exp.pp_data.random_users = False
    exp.pp_data.tokenizing_type = 'WE'
    exp.pp_data.load_dataset_type = dn.LoadDataset.TRAIN_DATA_MODEL
    exp.pp_data.type_prediction_label= dn.TypePredictionLabel.MULTI_LABEL_CATEGORICAL

    exp.use_custom_metrics = False
    exp.use_valid_set_for_train = True
    exp.valid_split_from_train_set = 0.0
    exp.imbalanced_classes = False

    lstm = ModelClass(1)
    lstm.loss_function = 'binary_crossentropy'
    lstm.optmizer_function = 'adam'
    lstm.epochs = 15
    lstm.batch_size = 32
    lstm.patience_train = 10
    lstm.use_embedding_pre_train = exp.pp_data.use_embedding
    lstm.embed_trainable = (lstm.use_embedding_pre_train == (dn.UseEmbedding.RAND or dn.UseEmbedding.NON_STATIC))

    neuronios_by_layer = [16]
    epochs = [32]
    batch_sizes = [40]
    dropouts = [0.2]

    np.random.seed(dn.SEED)

    time_ini_rep = datetime.datetime.now()
    x_train, y_train, x_valid, y_valid, num_words, embedding_matrix = exp.pp_data.load_data()
    exp.set_period_time_end(time_ini_rep, 'Load data')

    for neuronios in neuronios_by_layer:
        for batch_size in batch_sizes:
            for epoch in epochs:
                for dropout in dropouts:
                    lstm.epochs = epoch
                    lstm.batch_size = batch_size
                    lstm.patience_train = epoch / 2
                    exp.experiment_name = name_model + '_lstm_exp9_var_L3' + '_N' + str(neuronios) + '_B' +\
                                          str(batch_size) + '_E' + str(epoch) + '_D' + str(dropout) + '_' +\
                                          we_file_name

                    lstm.model = Sequential()
                    lstm.model.add(Embedding(exp.pp_data.vocabulary_size, exp.pp_data.embedding_size,
                                             trainable=lstm.embed_trainable, name='emb_' + name_model))
                    lstm.model.add(LSTM(neuronios, kernel_initializer=kernel_function,
                                        activation='tanh', dropout=dropout, recurrent_dropout=dropout,
                                        return_sequences=True, name='dense_1_' + name_model))
                    lstm.model.add(LSTM(neuronios, kernel_initializer=kernel_function,
                                        activation='tanh', dropout=dropout, recurrent_dropout=dropout,
                                        return_sequences=True, name='dense_2_' + name_model))
                    lstm.model.add(LSTM(neuronios, kernel_initializer=kernel_function,
                                        activation='tanh', dropout=dropout, recurrent_dropout=dropout,
                                        name='dense_3_' + name_model))
                    lstm.model.add(Dense(3,
                                         activation='sigmoid',
                                         name='dense_4_' + name_model))

                    time_ini_exp = datetime.datetime.now()
                    exp.generate_model_hypeparams(lstm, x_train, y_train, x_valid, y_valid, embedding_matrix)
                    exp.set_period_time_end(time_ini_exp, 'Total experiment')

    del x_train, y_train, x_valid, y_valid, num_words, embedding_matrix

    # Test
    exp.pp_data.load_dataset_type = dn.LoadDataset.TEST_DATA_MODEL
    np.random.seed(dn.SEED)
    time_ini_rep = datetime.datetime.now()
    x_test, y_test = exp.pp_data.load_data()
    exp.set_period_time_end(time_ini_rep, 'Load data')

    for neuronios in neuronios_by_layer:
        for batch_size in batch_sizes:
            for epoch in epochs:
                for dropout in dropouts:
                    lstm.epochs = epoch
                    lstm.batch_size = batch_size
                    lstm.patience_train = epoch / 2
                    exp.experiment_name = name_model + '_lstm_exp9_var_L3' + '_N' + str(neuronios) + '_B' +\ 
                                          str(batch_size) + '_E' + str(epoch) + '_D' + str(dropout) + '_' +\ 
                                          we_file_name

                    lstm.model = exp.load_model(dn.PATH_PROJECT + exp.experiment_name + '.h5')
                    exp.save_geral_configs()
                    exp.save_summary_model(lstm.model)
                    exp.predict_samples(lstm, x_test, y_test)

    del x_test, y_test, lstm

#### 2.1 Test lstm_exp9_var_L3_N16_B40_E32_D0.2 + word embeddings SMHD all users (SMHD_*_1760)

In [13]:
print('Initializer experiment 1.1 (model SMHD_ml_gl_1760)\n'+\
      'Set: kernel_initializer=glorot_uniform=xavier_uniform, dataset=SMHD_1760 multi-label')

exp = ExperimentProcesses('lstm_exp9_var_L3')
exp.pp_data.set_dataset_source(dataset_name='SMHD', label_set=['control', 'anxiety', 'depression'],
                               total_registers=1760, subdirectory="anx_dep_multilabel")

embedding_types = [dn.EmbeddingType.WORD2VEC_CUSTOM, dn.EmbeddingType.GLOVE_CUSTOM]
use_embeddings = [dn.UseEmbedding.STATIC, dn.UseEmbedding.NON_STATIC]

for embedding_type in embedding_types:
    if embedding_type == dn.EmbeddingType.WORD2VEC_CUSTOM:
        word_embedding_custom_files = ['SMHD-Skipgram-AllUsers-300.bin', 'SMHD-CBOW-AllUsers-300.bin']
    else:
        word_embedding_custom_files = ['SMHD-glove-AllUsers-300.pkl']

    for word_embedding_custom_file in word_embedding_custom_files:
        for use_embedding in use_embeddings:
            exp.pp_data.embedding_type = embedding_type
            exp.pp_data.use_embedding = use_embedding
            exp.pp_data.word_embedding_custom_file = word_embedding_custom_file
            exp.pp_data.load_dataset_type = dn.LoadDataset.TRAIN_DATA_MODEL

            we_file_name = 'ET_' + str(embedding_type.value) + '_UE_' + str(use_embedding.value) +\
                           '_EF_' + word_embedding_custom_file.split('.')[0] + '_glove6B300d_glorot'

            generate_model(exp, 'exp9_' + we_file_name[0:13] + we_file_name[18:30] + '_1760', we_file_name)


Initializer experiment 1.1 (model SMHD_ml_gl_1760)
Set: kernel_initializer=glorot_uniform=xavier_uniform, dataset=SMHD_1760 multi-label
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_1760_VS_5000_TF_0_DF_0_RSW_F_IT_1_RP_F
Preprocess data... /home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_train_1760.df
Load data - Ini: 2020-03-09 09:09:44	End: 2020-03-09 09:10:01	Total: 0:00:17.053792
Training using single GPU or CPU..
Train on 1760 samples, validate on 1760 samples
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32
Generate Model - Ini: 2020-03-09 09:10:03	End: 2020-03-09 12:51:47	T

Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 00026: early stopping
Generate Model - Ini: 2020-03-09 12:53:16	End: 2020-03-09 15:48:01	Total: 2:54:45.193328
Total experiment - Ini: 2020-03-09 12:53:16	End: 2020-03-09 15:48:01	Total: 2:54:45.193915
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_1760_VS_5000_TF_0_DF_0_RSW_F_IT_1_RP_F
Preprocess data... /home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_test_1760.df
Load data - Ini: 2020-03-09 15:48:01	End: 2020-03-09 15:48:11	Total: 0:00:09.706965
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_1760_VS_5000_TF_0_DF_0_RSW_F_IT_1_RP_F
Preprocess data... /home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_train_1760.df
Load Pre-train embeddings
Load data - Ini: 2020-03-09 15:49:14	End: 2020-03-09 15

Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 00024: early stopping
Generate Model - Ini: 2020-03-10 00:48:59	End: 2020-03-10 03:32:24	Total: 2:43:25.678964
Total experiment - Ini: 2020-03-10 00:48:59	End: 2020-03-10 03:32:24	Total: 2:43:25.679532
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_1760_VS_5000_TF_0_DF_0_RSW_F_IT_1_RP_F
Preprocess data... /home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_test_1760.df
Load data - Ini: 2020-03-10 03:32:24	End: 2020-03-10 03:32:33	Total: 0:00:08.824508


#### 2.2 Test lstm_exp9_var_L3_N16_B40_E32_D0.2 + word embeddings SMHD all users (SMHD_*_2640)

In [9]:
print('Initializer experiment 2.2 (model SMHD_ml_gl_2640)\n'+\
      'Set: kernel_initializer=glorot_uniform=xavier_uniform, dataset=SMHD_2640 multi-label')

exp = ExperimentProcesses('lstm_exp9_var_L3')
exp.pp_data.set_dataset_source(dataset_name='SMHD', label_set=['control', 'anxiety', 'depression'],
                               total_registers=2640, subdirectory="anx_dep_multilabel")

embedding_types = [dn.EmbeddingType.WORD2VEC_CUSTOM, dn.EmbeddingType.GLOVE_CUSTOM]
use_embeddings = [dn.UseEmbedding.STATIC, dn.UseEmbedding.NON_STATIC]

for embedding_type in embedding_types:
    if embedding_type == dn.EmbeddingType.WORD2VEC_CUSTOM:
        word_embedding_custom_files = ['SMHD-Skipgram-AllUsers-300.bin', 'SMHD-CBOW-AllUsers-300.bin']
    else:
        word_embedding_custom_files = ['SMHD-glove-AllUsers-300.pkl']

    for word_embedding_custom_file in word_embedding_custom_files:
        for use_embedding in use_embeddings:
            exp.pp_data.embedding_type = embedding_type
            exp.pp_data.use_embedding = use_embedding
            exp.pp_data.word_embedding_custom_file = word_embedding_custom_file
            exp.pp_data.load_dataset_type = dn.LoadDataset.TRAIN_DATA_MODEL

            we_file_name = 'ET_' + str(embedding_type.value) + '_UE_' + str(use_embedding.value) +\
                           '_EF_' + word_embedding_custom_file.split('.')[0] + '_glove6B300d_glorot'

            generate_model(exp, 'exp9_' + we_file_name[0:13] + we_file_name[18:30] + '_2640', we_file_name)

Initializer experiment 2.2 (model SMHD_ml_gl_2640)
Set: kernel_initializer=glorot_uniform=xavier_uniform, dataset=SMHD_2640 multi-label
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_2640_VS_5000_TF_0_DF_0_RSW_F_IT_1_RP_F
Preprocess data... /home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_train_2640.df
Load Pre-train embeddings
Load data - Ini: 2020-03-10 14:48:34	End: 2020-03-10 14:49:01	Total: 0:00:26.907298




Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

Training using single GPU or CPU..

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 2200 samples, validate on 2200 samples
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 1

Epoch 30/32
Epoch 31/32
Epoch 32/32
Generate Model - Ini: 2020-03-10 22:42:19	End: 2020-03-11 03:12:06	Total: 4:29:47.202715
Total experiment - Ini: 2020-03-10 22:42:17	End: 2020-03-11 03:12:06	Total: 4:29:49.589882
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_2640_VS_5000_TF_0_DF_0_RSW_F_IT_1_RP_F
Preprocess data... /home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_test_2640.df
Load data - Ini: 2020-03-11 03:12:06	End: 2020-03-11 03:12:18	Total: 0:00:11.567169
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_2640_VS_5000_TF_0_DF_0_RSW_F_IT_1_RP_F
Preprocess data... /home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_train_2640.df
Load data - Ini: 2020-03-11 03:13:37	End: 2020-03-11 03:14:00	Total: 0:00:23.014744
Training using single GPU or CPU..
Train on 2200 samples, validate on 2200 samples
Epoch 1/32
Epoch 2/32
Epoc

Epoch 00028: early stopping
Generate Model - Ini: 2020-03-11 12:29:12	End: 2020-03-11 16:36:10	Total: 4:06:58.517568
Total experiment - Ini: 2020-03-11 12:29:12	End: 2020-03-11 16:36:10	Total: 4:06:58.518239
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_2640_VS_5000_TF_0_DF_0_RSW_F_IT_1_RP_F
Preprocess data... /home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_test_2640.df
Load data - Ini: 2020-03-11 16:36:10	End: 2020-03-11 16:36:22	Total: 0:00:11.545395


#### 2.3 Test lstm_exp9_var_L3_N16_B40_E32_D0.2 + word embeddings SMHD 3 patologias (SMHD_*_2640)

In [10]:
print('Initializer experiment 2.3 (model SMHD_ml_gl_2640)\n'+\
      'Set: kernel_initializer=glorot_uniform=xavier_uniform, dataset=SMHD_2640 multi-label')

exp = ExperimentProcesses('lstm_exp9_var_L3')
exp.pp_data.set_dataset_source(dataset_name='SMHD', label_set=['control', 'anxiety', 'depression'],
                               total_registers=2640, subdirectory="anx_dep_multilabel")

embedding_types = [dn.EmbeddingType.WORD2VEC_CUSTOM, dn.EmbeddingType.GLOVE_CUSTOM]
use_embeddings = [dn.UseEmbedding.STATIC, dn.UseEmbedding.NON_STATIC]

for embedding_type in embedding_types:
    if embedding_type == dn.EmbeddingType.WORD2VEC_CUSTOM:
        word_embedding_custom_files = ['SMHD-Skipgram-A-D-ADUsers-300.bin', 'SMHD-CBOW-A-D-ADUsers-300.bin']
    else:
        word_embedding_custom_files = ['SMHD-glove-A-D-ADUsers-300.pkl']

    for word_embedding_custom_file in word_embedding_custom_files:
        for use_embedding in use_embeddings:
            exp.pp_data.embedding_type = embedding_type
            exp.pp_data.use_embedding = use_embedding
            exp.pp_data.word_embedding_custom_file = word_embedding_custom_file
            exp.pp_data.load_dataset_type = dn.LoadDataset.TRAIN_DATA_MODEL

            we_file_name = 'ET_' + str(embedding_type.value) + '_UE_' + str(use_embedding.value) +\
                           '_EF_' + word_embedding_custom_file.split('.')[0] + '_glove6B300d_glorot'

            generate_model(exp, 'exp9_' + we_file_name[0:13] + we_file_name[18:30] + '_2640', we_file_name)

Initializer experiment 2.3 (model SMHD_ml_gl_2640)
Set: kernel_initializer=glorot_uniform=xavier_uniform, dataset=SMHD_2640 multi-label
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_2640_VS_5000_TF_0_DF_0_RSW_F_IT_1_RP_F
Preprocess data... /home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_train_2640.df
Load Pre-train embeddings
Load data - Ini: 2020-03-11 16:37:52	End: 2020-03-11 16:38:16	Total: 0:00:24.774836
Training using single GPU or CPU..
Train on 2200 samples, validate on 2200 samples
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32
Generate Model - Ini: 2020-03-11 16:38:25	

Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32
Generate Model - Ini: 2020-03-11 21:14:07	End: 2020-03-12 01:52:17	Total: 4:38:09.648901
Total experiment - Ini: 2020-03-11 21:14:07	End: 2020-03-12 01:52:17	Total: 4:38:09.649495
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_2640_VS_5000_TF_0_DF_0_RSW_F_IT_1_RP_F
Preprocess data... /home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_test_2640.df
Load data - Ini: 2020-03-12 01:52:17	End: 2020-03-12 01:52:28	Total: 0:00:11.575014
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_2640_VS_5000_TF_0_DF_0_RSW_F_IT_1_RP_F
Preprocess data... /home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_train_2640.df
Load Pre-train embeddings
Load data -

Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32
Generate Model - Ini: 2020-03-12 10:09:56	End: 2020-03-12 14:36:14	Total: 4:26:17.949636
Total experiment - Ini: 2020-03-12 10:09:44	End: 2020-03-12 14:36:14	Total: 4:26:30.194739
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_2640_VS_5000_TF_0_DF_0_RSW_F_IT_1_RP_F
Preprocess data... /home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_test_2640.df
Load data - Ini: 2020-03-12 14:36:14	End: 2020-03-12 14:36:27	Total: 0:00:13.176678
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_2640_VS_5000_TF_0_DF_0_RSW_F_IT_1_RP_F
Preprocess data... /home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_train_2640.df
Load data - Ini: 2020-03-12 14:38:05	End: 2020-03-12 14:38:30	Total: 0:00:25.123471
Training using single GPU or CPU..
Tr

#### 2.4 Test lstm_exp9_var_L3_N16_B40_E32_D0.2 +  Glove6B, glorot x lecun kernel initializer

In [10]:
print('Initializer experiment 2.3 (model SMHD_ml_gl_2640)\n'+\
      'Set: kernel_initializer=glorot_uniform=xavier_uniform, dataset=SMHD_2640 multi-label')

exp = ExperimentProcesses('lstm_exp9_var_L3')
exp.pp_data.set_dataset_source(dataset_name='SMHD', label_set=['control', 'anxiety', 'depression'],
                               total_registers=2640, subdirectory="anx_dep_multilabel")

use_embeddings = [dn.UseEmbedding.STATIC, dn.UseEmbedding.NON_STATIC]
kernel_functions = ['lecun_uniform', 'glorot_uniform']
for kernel_function in kernel_functions:
    for use_embedding in use_embeddings:
        exp.pp_data.embedding_type = dn.EmbeddingType.GLOVE_6B
        exp.pp_data.use_embedding = use_embedding
        exp.pp_data.word_embedding_custom_file = ''
        exp.pp_data.load_dataset_type = dn.LoadDataset.TRAIN_DATA_MODEL

        we_file_name = 'ET_' + str(embedding_type.value) + '_UE_' + str(use_embedding.value) +\
                       '_EF_' + word_embedding_custom_file.split('.')[0] + '_glove6B300d_' + kernel_function

        generate_model(exp, 'exp9_' + we_file_name[0:13] + we_file_name[18:30] + '_2640', we_file_name, 
                       kernel_function)

Initializer experiment 2.3 (model SMHD_ml_gl_2640)
Set: kernel_initializer=glorot_uniform=xavier_uniform, dataset=SMHD_2640 multi-label
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_2640_VS_5000_TF_0_DF_0_RSW_F_IT_1_RP_F
Preprocess data... /home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_train_2640.df
Load Pre-train embeddings
Load data - Ini: 2020-03-11 16:37:52	End: 2020-03-11 16:38:16	Total: 0:00:24.774836
Training using single GPU or CPU..
Train on 2200 samples, validate on 2200 samples
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32
Generate Model - Ini: 2020-03-11 16:38:25	

Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32
Generate Model - Ini: 2020-03-11 21:14:07	End: 2020-03-12 01:52:17	Total: 4:38:09.648901
Total experiment - Ini: 2020-03-11 21:14:07	End: 2020-03-12 01:52:17	Total: 4:38:09.649495
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_2640_VS_5000_TF_0_DF_0_RSW_F_IT_1_RP_F
Preprocess data... /home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_test_2640.df
Load data - Ini: 2020-03-12 01:52:17	End: 2020-03-12 01:52:28	Total: 0:00:11.575014
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_2640_VS_5000_TF_0_DF_0_RSW_F_IT_1_RP_F
Preprocess data... /home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_train_2640.df
Load Pre-train embeddings
Load data -

Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32
Generate Model - Ini: 2020-03-12 10:09:56	End: 2020-03-12 14:36:14	Total: 4:26:17.949636
Total experiment - Ini: 2020-03-12 10:09:44	End: 2020-03-12 14:36:14	Total: 4:26:30.194739
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_2640_VS_5000_TF_0_DF_0_RSW_F_IT_1_RP_F
Preprocess data... /home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_test_2640.df
Load data - Ini: 2020-03-12 14:36:14	End: 2020-03-12 14:36:27	Total: 0:00:13.176678
Loading data... /home/vanessa/PycharmProjects/RecurrentNetworks/tokenizers/anx_dep_multilabel/SMHD_TR_2640_VS_5000_TF_0_DF_0_RSW_F_IT_1_RP_F
Preprocess data... /home/vanessa/PycharmProjects/RecurrentNetworks/dataset/anx_dep_multilabel/SMHD_train_2640.df
Load data - Ini: 2020-03-12 14:38:05	End: 2020-03-12 14:38:30	Total: 0:00:25.123471
Training using single GPU or CPU..
Tr