In [1]:
import numpy as np
from gensim.models import Word2Vec
# from keras.callbacks import Callback, EarlyStopping
# from keras.models import Model
# from keras.layers import Input, Dense, Embedding, SpatialDropout1D, concatenate, Bidirectional, GlobalAveragePooling1D, GlobalMaxPooling1D, CuDNNGRU, CuDNNLSTM, GRU, LSTM, Reshape, TimeDistributed
from keras.preprocessing.text import Tokenizer, text_to_word_sequence
# from keras.preprocessing.sequence import pad_sequences
# from keras_self_attention import SeqSelfAttention
from sklearn.metrics import log_loss, roc_auc_score, accuracy_score, hamming_loss, f1_score
from sklearn.preprocessing import MultiLabelBinarizer
from skopt import gp_minimize
from skopt.space import Real, Integer, Categorical
from tqdm import tnrange, tqdm_notebook
from utils import process_data, multilabel_confusion_matrix, get_embedding_matrix, get_cat_labels, data_generator, get_all

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

Using TensorFlow backend.


In [2]:
class torch_tagger(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size, embedding_matrix=None):
        super().__init__()
        self.hidden_dim = hidden_dim
        if embedding_matrix is None:
            self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        else:
            weight = torch.FloatTensor(embedding_matrix)
            self.word_embeddings = nn.Embedding.from_pretrained(weight)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, bidirectional=True)
        self.hidden2tag = nn.Linear(hidden_dim * 2, tagset_size)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        lstm_out, _ = self.lstm(embeds.view(len(sentence), 1, -1))
        tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
        tag_scores = torch.sigmoid(tag_space)
        
        return tag_scores

In [3]:
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), 'checkpoint.pt')
        self.val_loss_min = val_loss

In [6]:
def no_pad_time_tuning(param, notes_train, labels_train, up_notes_train, up_labels_train, gold_labels_train, notes_test, labels_test, gold_labels_test, verbose=1):
    
    up = int(param['up'])
    window_size = int(param['window_size'])
    embed_size = int(param['embed_size'] * 10)
    latent_dim = int(param['latent_dim'] * 64)
    dropout_rate = param['dropout_rate']
    epochs = 30 #param['epochs']
    max_features = 60000 #param['max_features']
    category = False #param['category']
    embedding = True #param['embedding']
    model_type = 'CuDNNLSTM' #param['model_type']
    
    # upsampling
    if up > 0:
        if verbose != 0: print('upsampling for %d times...' % (up))
        notes_train = [note + up * up_note for note, up_note in zip(notes_train, up_notes_train)]
        labels_train = [label + up * up_label for label, up_label in zip(labels_train, up_labels_train)]
        if verbose != 0: print('upsampling done\n')
    notes = notes_train + notes_test
    labels = labels_train + labels_test
    gold_labels = gold_labels_train + gold_labels_test
    
    # prepare features
    if verbose != 0: print('preparing features ...')
    X_txt = [' '.join(i) for i in notes]
    X_train_txt = [' '.join(i) for i in notes_train]
    X_test_txt = [' '.join(i) for i in notes_test]
    tokenizer = Tokenizer(num_words=max_features, filters='')
    tokenizer.fit_on_texts(X_txt)
    X_seq = tokenizer.texts_to_sequences(X_txt) 
    X_train_seq = tokenizer.texts_to_sequences(X_train_txt) 
    X_test_seq = tokenizer.texts_to_sequences(X_test_txt) 
    word_index = tokenizer.word_index
    nb_words = min(max_features, len(word_index))
    if verbose != 0: print('preparing features done\n')

    # prepare embedding matrix
    if embedding:
        if verbose != 0: print('preparing embedding matrix ...')
        w2v = Word2Vec(notes, size=embed_size, window=window_size, min_count=1, workers=4)
        embedding_index = dict(zip(w2v.wv.index2word, w2v.wv.vectors))
        embedding_matrix = get_embedding_matrix(embedding_index=embedding_index, word_index=word_index, max_features=max_features, embed_size=embed_size)
        if verbose != 0: print('preparing embedding matrix done\n')
        
    # prepare targets
    if verbose != 0: print('preparing targets ...')
    if category:
        # prepare cagtegory label targets
        labels = [[set([get_cat_labels(i) for i in list(j)]) for j in k] for k in labels]
        labels_train = [[set([get_cat_labels(i) for i in list(j)]) for j in k] for k in labels_train]
        labels_test = [[set([get_cat_labels(i) for i in list(j)]) for j in k] for k in labels_test]
    all_labels = [label for notes_label in labels for label in notes_label]
    mlb = MultiLabelBinarizer()
    mlb.fit(all_labels)
    num_labels = len(mlb.classes_)
    Y_train = []
    Y_test = []
    for i in labels_train:
        l = mlb.transform(i)
        Y_train.append(l)
    for i in labels_test:
        l = mlb.transform(i)
        Y_test.append(l)
    if verbose != 0: print('preparing targets done\n')

    # model summary
    model = torch_tagger(embed_size, latent_dim, nb_words, num_labels, embedding_matrix).cuda()
    loss_function = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.1)
    early_stopping = EarlyStopping(patience=1, verbose=True)
    if verbose != 0: print('\nmodel summary:')
    if verbose != 0: print(model)

    # model training
    if verbose != 0: print('\ntraining model ...')
    for epoch in tnrange(epochs):  
        train_loss = 0.0
        model.train()
        for x, y in tqdm_notebook(zip(X_train_seq, Y_train), total=len(Y_train)):
            optimizer.zero_grad()
            sentence_in = torch.tensor(x).cuda()
            targets = torch.FloatTensor(y).cuda()
            tag_scores = model(sentence_in)
            loss = loss_function(tag_scores, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss = train_loss/len(Y_train)

        valid_loss = 0.0
        model.eval()
        Y_pred = []
        for i, (x, y) in tqdm_notebook(enumerate(zip(X_test_seq[:513], Y_test[:513])), total=len(Y_test[:513])):   
            sentence_in = torch.tensor(x).cuda()
            targets = torch.FloatTensor(y).cuda()
            tag_scores = model(sentence_in)
            loss = loss_function(tag_scores, targets)
            valid_loss += loss.item()# * sentence_in.size(0)
            Y_pred.append(tag_scores.detach().cpu().numpy())
        valid_loss = valid_loss/len(Y_test[:513])
        Y_pred_concat = np.concatenate(Y_pred)
        Y_pred_ham = Y_pred_concat > 0.5
        Y_val = np.concatenate(Y_test[:513])
        roc = roc_auc_score(Y_val, Y_pred_concat, average='micro')
        loss = log_loss(Y_val, Y_pred_concat)
        ham = hamming_loss(Y_val, Y_pred_ham)
        sub = accuracy_score(Y_val, Y_pred_ham)
        f1 = f1_score(Y_val, Y_pred_ham, average='micro')
        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(epoch + 1, train_loss, valid_loss))
        print("Adiitional val metrics: - ROC-AUC: %.6f - Log-Loss: %.6f - Hamming-Loss: %.6f - Subset-Accuracy: %.6f - F1-Score: %.6f" % (roc, loss, ham, sub, f1))
        
        early_stopping(valid_loss, model)
        if early_stopping.early_stop:
            print("Early stopping")
            break

    # confusion matrix 
    if verbose == 2: 
        cm = multilabel_confusion_matrix(Y_val, np.where(Y_pred_concat > 0.5, 1, 0))
        for i, j in zip(cm, mlb.classes_):
            print(j+':\n', i,'\n')

    # prepare gold label targets
    if verbose != 0: print('predicting gold label targets ...')
    gold_labels_pred = [{i for s in mlb.inverse_transform(y_pred>0.5) for i in s if i != 'O'} for y_pred in Y_pred]
    gmlb = MultiLabelBinarizer()
    gmlb.fit(gold_labels)
    num_gold_labels = len(gmlb.classes_)
    Y_gold_test = gmlb.transform(gold_labels_test[:513])
    Y_gold_pred = gmlb.transform(gold_labels_pred)
    if verbose != 0: print('predicting gold label targets done\n')

    # confusion matrix for gold label
    if verbose == 2: 
        gcm = multilabel_confusion_matrix(np.concatenate(Y_gold_test), np.concatenate(Y_gold_pred))
        for i, j in zip(gcm, gmlb.classes_):
            print(j+':\n', i,'\n')

    # f1 scores for gold label
    f1 = f1_score(Y_gold_test, Y_gold_pred, average='micro')
    print('Parameters: up = %d, window_size = %d, embed_size = %d, latent_dim = %d, dropout_rate = %.3f' % (up, window_size, embed_size, latent_dim, dropout_rate))
    print('F1 Scores for global labels:\nALL (average="micro"):', f1)
    
    with open("results.txt","a") as f:
        f.write('Parameters: up = %d, window_size = %d, embed_size = %d, latent_dim = %d, dropout_rate = %.3f\n' % (up, window_size, embed_size, latent_dim, dropout_rate))
        f.write('F1 Scores for global labels(average="micro"): %.3f\n' % f1)
        
    
    if verbose == 2: 
        f1_all = f1_score(Y_gold_test, Y_gold_pred, average=None)
        for i, j in zip(f1_all, gmlb.classes_):
            print(j+': '+str(i))
    
    print('\n\n')
          
    return f1

def my_bayes_opt(space):
    param = {
        'up': space[0],               # Times of upsampling for training data
        'window_size': space[1],                # Window size for word2vec
        'embed_size': space[2],                # Length of the vector that we willl get from the embedding layer
        'latent_dim': space[3],               # Hidden layers dimension 
        'dropout_rate': space[4]}#,             # Rate of the dropout layers
        #'epochs': space[0],                    # Number of epochs
        #'max_features': space[0],           # Max num of vocabulary
        #'category': space[0],               # Is categoty labels
        #'embedding': space[0],               # Using pre-made embedidng matrix as weight
        #'model_type': space[0]
        #}
    f1 = no_pad_time_tuning(param, notes_train, labels_train, up_notes_train, up_labels_train, gold_labels_train, notes_test, labels_test, gold_labels_test)
    return (-f1)     

In [7]:
if __name__ == "__main__":
    
    # loading data 
    notes_train_1, labels_train_1, up_notes_train_1, up_labels_train_1, gold_labels_train_1 = get_all('/host_home/data/i2b2/2014/training/training-RiskFactors-Complete-Set1') 
    notes_train_2, labels_train_2, up_notes_train_2, up_labels_train_2, gold_labels_train_2 = get_all('/host_home/data/i2b2/2014/training/training-RiskFactors-Complete-Set2') 

    notes_train = notes_train_1 + notes_train_2
    labels_train = labels_train_1 + labels_train_2
    up_notes_train = up_notes_train_1 + up_notes_train_2
    up_labels_train = up_labels_train_1 + up_labels_train_2
    gold_labels_train = gold_labels_train_1 + gold_labels_train_2

    notes_test, labels_test, _1, _2, gold_labels_test = get_all('/host_home/data/i2b2/2014/testing/testing-RiskFactors-Complete') 

    space = [Integer(5, 10, name='up'),
            Integer(3, 7, name='window_size'),
            Integer(2, 4, name='embed_size'),
            Integer(1, 3, name='latent_dim'),
            Real(0, 0.3, name='dropout_rate')]
            #Integer(30, 30, name='epochs'),
            #Integer(1, 60000, name='max_features'),
            #Categorical([False], name='category'),
            #Categorical([True], name='embedding'),
            #Categorical(['CuDNNLSTM'], name='model_type')]
    x0 = [7, 4, 3, 2, 0.15253569878187465]
    res = gp_minimize(my_bayes_opt, space, x0=x0, n_calls=100, verbose=True)
    
    # python command: python heart_no_pad_bayes_opt.py > result_heart_no_pad_bayes_opt.txt
    
    print(res)

HBox(children=(IntProgress(value=0, max=521), HTML(value='')))

HBox(children=(IntProgress(value=0, max=269), HTML(value='')))

HBox(children=(IntProgress(value=0, max=514), HTML(value='')))

Iteration No: 1 started. Evaluating function at provided point.
upsampling for 7 times...
upsampling done

preparing features ...
preparing features done

preparing embedding matrix ...
preparing embedding matrix done

preparing targets ...
preparing targets done


model summary:
torch_tagger(
  (word_embeddings): Embedding(44984, 30)
  (lstm): LSTM(30, 128, bidirectional=True)
  (hidden2tag): Linear(in_features=256, out_features=97, bias=True)
)

training model ...


HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 1 	Training Loss: 0.022368 	Validation Loss: 0.012923
Adiitional val metrics: - ROC-AUC: 0.986336 - Log-Loss: 0.774920 - Hamming-Loss: 0.002058 - Subset-Accuracy: 0.885362 - F1-Score: 0.901327
Validation loss decreased (inf --> 0.012923).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 2 	Training Loss: 0.015518 	Validation Loss: 0.011680
Adiitional val metrics: - ROC-AUC: 0.988547 - Log-Loss: 0.687468 - Hamming-Loss: 0.001806 - Subset-Accuracy: 0.904391 - F1-Score: 0.914064
Validation loss decreased (0.012923 --> 0.011680).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 3 	Training Loss: 0.015897 	Validation Loss: 0.010327
Adiitional val metrics: - ROC-AUC: 0.988855 - Log-Loss: 0.647933 - Hamming-Loss: 0.001695 - Subset-Accuracy: 0.905144 - F1-Score: 0.919009
Validation loss decreased (0.011680 --> 0.010327).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 4 	Training Loss: 0.014226 	Validation Loss: 0.009761
Adiitional val metrics: - ROC-AUC: 0.989663 - Log-Loss: 0.608024 - Hamming-Loss: 0.001645 - Subset-Accuracy: 0.909576 - F1-Score: 0.921709
Validation loss decreased (0.010327 --> 0.009761).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 5 	Training Loss: 0.013841 	Validation Loss: 0.009204
Adiitional val metrics: - ROC-AUC: 0.990611 - Log-Loss: 0.579300 - Hamming-Loss: 0.001606 - Subset-Accuracy: 0.912560 - F1-Score: 0.923808
Validation loss decreased (0.009761 --> 0.009204).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 6 	Training Loss: 0.013586 	Validation Loss: 0.009110
Adiitional val metrics: - ROC-AUC: 0.990926 - Log-Loss: 0.576213 - Hamming-Loss: 0.001592 - Subset-Accuracy: 0.916986 - F1-Score: 0.924477
Validation loss decreased (0.009204 --> 0.009110).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 7 	Training Loss: 0.013398 	Validation Loss: 0.008954
Adiitional val metrics: - ROC-AUC: 0.991174 - Log-Loss: 0.561094 - Hamming-Loss: 0.001583 - Subset-Accuracy: 0.923563 - F1-Score: 0.925333
Validation loss decreased (0.009110 --> 0.008954).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 8 	Training Loss: 0.013170 	Validation Loss: 0.008990
Adiitional val metrics: - ROC-AUC: 0.991001 - Log-Loss: 0.566231 - Hamming-Loss: 0.001408 - Subset-Accuracy: 0.931919 - F1-Score: 0.933637
EarlyStopping counter: 1 out of 1
Early stopping
predicting gold label targets ...
predicting gold label targets done

Parameters: up = 7, window_size = 4, embed_size = 30, latent_dim = 128, dropout_rate = 0.153
F1 Scores for global labels:
ALL (average="micro"): 0.7711388916231912



Iteration No: 1 ended. Evaluation done at provided point.
Time taken: 762.4931
Function value obtained: -0.7711
Current minimum: -0.7711
Iteration No: 2 started. Evaluating function at random point.
upsampling for 8 times...
upsampling done

preparing features ...
preparing features done

preparing embedding matrix ...
preparing embedding matrix done

preparing targets ...
preparing targets done


model summary:
torch_tagger(
  (word_embeddings): Embedding(44984, 40)
  (lstm): LSTM(40, 64, bidirectional=True)

HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 1 	Training Loss: 0.016671 	Validation Loss: 0.008647
Adiitional val metrics: - ROC-AUC: 0.988570 - Log-Loss: 0.558212 - Hamming-Loss: 0.001421 - Subset-Accuracy: 0.934099 - F1-Score: 0.932847
Validation loss decreased (inf --> 0.008647).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 2 	Training Loss: 0.014185 	Validation Loss: 0.008612
Adiitional val metrics: - ROC-AUC: 0.989489 - Log-Loss: 0.550260 - Hamming-Loss: 0.001508 - Subset-Accuracy: 0.931532 - F1-Score: 0.928927
Validation loss decreased (0.008647 --> 0.008612).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 3 	Training Loss: 0.014016 	Validation Loss: 0.008364
Adiitional val metrics: - ROC-AUC: 0.989819 - Log-Loss: 0.543896 - Hamming-Loss: 0.001381 - Subset-Accuracy: 0.938798 - F1-Score: 0.935247
Validation loss decreased (0.008612 --> 0.008364).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 4 	Training Loss: 0.013447 	Validation Loss: 0.007607
Adiitional val metrics: - ROC-AUC: 0.990557 - Log-Loss: 0.504366 - Hamming-Loss: 0.001332 - Subset-Accuracy: 0.940107 - F1-Score: 0.937429
Validation loss decreased (0.008364 --> 0.007607).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 5 	Training Loss: 0.013209 	Validation Loss: 0.007226
Adiitional val metrics: - ROC-AUC: 0.991396 - Log-Loss: 0.475058 - Hamming-Loss: 0.001281 - Subset-Accuracy: 0.946090 - F1-Score: 0.939936
Validation loss decreased (0.007607 --> 0.007226).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 6 	Training Loss: 0.013187 	Validation Loss: 0.007141
Adiitional val metrics: - ROC-AUC: 0.991501 - Log-Loss: 0.475944 - Hamming-Loss: 0.001316 - Subset-Accuracy: 0.942160 - F1-Score: 0.938140
Validation loss decreased (0.007226 --> 0.007141).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 7 	Training Loss: 0.013196 	Validation Loss: 0.007596
Adiitional val metrics: - ROC-AUC: 0.991080 - Log-Loss: 0.506588 - Hamming-Loss: 0.001392 - Subset-Accuracy: 0.939500 - F1-Score: 0.934742
EarlyStopping counter: 1 out of 1
Early stopping
predicting gold label targets ...
predicting gold label targets done

Parameters: up = 8, window_size = 5, embed_size = 40, latent_dim = 64, dropout_rate = 0.297
F1 Scores for global labels:
ALL (average="micro"): 0.7354877318970677



Iteration No: 2 ended. Evaluation done at random point.
Time taken: 709.7175
Function value obtained: -0.7355
Current minimum: -0.7711
Iteration No: 3 started. Evaluating function at random point.
upsampling for 8 times...
upsampling done

preparing features ...
preparing features done

preparing embedding matrix ...
preparing embedding matrix done

preparing targets ...
preparing targets done


model summary:
torch_tagger(
  (word_embeddings): Embedding(44984, 40)
  (lstm): LSTM(40, 128, bidirectional=True)
 

HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 1 	Training Loss: 0.028099 	Validation Loss: 0.016114
Adiitional val metrics: - ROC-AUC: 0.982421 - Log-Loss: 0.890014 - Hamming-Loss: 0.003638 - Subset-Accuracy: 0.735004 - F1-Score: 0.811815
Validation loss decreased (inf --> 0.016114).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 2 	Training Loss: 0.024872 	Validation Loss: 0.018060
Adiitional val metrics: - ROC-AUC: 0.983234 - Log-Loss: 1.021701 - Hamming-Loss: 0.003782 - Subset-Accuracy: 0.733800 - F1-Score: 0.807580
EarlyStopping counter: 1 out of 1
Early stopping
predicting gold label targets ...
predicting gold label targets done

Parameters: up = 8, window_size = 6, embed_size = 40, latent_dim = 128, dropout_rate = 0.121
F1 Scores for global labels:
ALL (average="micro"): 0.6857199960027981



Iteration No: 3 ended. Evaluation done at random point.
Time taken: 209.8879
Function value obtained: -0.6857
Current minimum: -0.7711
Iteration No: 4 started. Evaluating function at random point.
upsampling for 7 times...
upsampling done

preparing features ...
preparing features done

preparing embedding matrix ...
preparing embedding matrix done

preparing targets ...
preparing targets done


model summary:
torch_tagger(
  (word_embeddings): Embedding(44984, 30)
  (lstm): LSTM(30, 64, bidirectional=True)
 

HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 1 	Training Loss: 0.018992 	Validation Loss: 0.010172
Adiitional val metrics: - ROC-AUC: 0.986542 - Log-Loss: 0.651104 - Hamming-Loss: 0.001737 - Subset-Accuracy: 0.898859 - F1-Score: 0.916386
Validation loss decreased (inf --> 0.010172).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 2 	Training Loss: 0.013909 	Validation Loss: 0.008102
Adiitional val metrics: - ROC-AUC: 0.990140 - Log-Loss: 0.520882 - Hamming-Loss: 0.001400 - Subset-Accuracy: 0.931474 - F1-Score: 0.933702
Validation loss decreased (0.010172 --> 0.008102).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 3 	Training Loss: 0.013314 	Validation Loss: 0.008548
Adiitional val metrics: - ROC-AUC: 0.990866 - Log-Loss: 0.516490 - Hamming-Loss: 0.001407 - Subset-Accuracy: 0.934235 - F1-Score: 0.933742
EarlyStopping counter: 1 out of 1
Early stopping
predicting gold label targets ...
predicting gold label targets done

Parameters: up = 7, window_size = 7, embed_size = 30, latent_dim = 64, dropout_rate = 0.027
F1 Scores for global labels:
ALL (average="micro"): 0.7470078415187783



Iteration No: 4 ended. Evaluation done at random point.
Time taken: 293.5831
Function value obtained: -0.7470
Current minimum: -0.7711
Iteration No: 5 started. Evaluating function at random point.
upsampling for 10 times...
upsampling done

preparing features ...
preparing features done

preparing embedding matrix ...
preparing embedding matrix done

preparing targets ...
preparing targets done


model summary:
torch_tagger(
  (word_embeddings): Embedding(44984, 20)
  (lstm): LSTM(20, 192, bidirectional=True)


HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 1 	Training Loss: 0.148219 	Validation Loss: 0.050750
Adiitional val metrics: - ROC-AUC: 0.951990 - Log-Loss: 2.912338 - Hamming-Loss: 0.001444 - Subset-Accuracy: 0.938442 - F1-Score: 0.930547
Validation loss decreased (inf --> 0.050750).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 2 	Training Loss: 0.144231 	Validation Loss: 0.047240
Adiitional val metrics: - ROC-AUC: 0.954683 - Log-Loss: 2.911936 - Hamming-Loss: 0.001469 - Subset-Accuracy: 0.935281 - F1-Score: 0.929196
Validation loss decreased (0.050750 --> 0.047240).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 3 	Training Loss: 0.144777 	Validation Loss: 0.056175
Adiitional val metrics: - ROC-AUC: 0.946450 - Log-Loss: 2.974676 - Hamming-Loss: 0.001353 - Subset-Accuracy: 0.954923 - F1-Score: 0.935716
EarlyStopping counter: 1 out of 1
Early stopping
predicting gold label targets ...
predicting gold label targets done

Parameters: up = 10, window_size = 7, embed_size = 20, latent_dim = 192, dropout_rate = 0.002
F1 Scores for global labels:
ALL (average="micro"): 0.0



Iteration No: 5 ended. Evaluation done at random point.
Time taken: 337.6461
Function value obtained: -0.0000
Current minimum: -0.7711
Iteration No: 6 started. Evaluating function at random point.
upsampling for 10 times...
upsampling done

preparing features ...


  'precision', 'predicted', average, warn_for)


preparing features done

preparing embedding matrix ...
preparing embedding matrix done

preparing targets ...
preparing targets done


model summary:
torch_tagger(
  (word_embeddings): Embedding(44984, 40)
  (lstm): LSTM(40, 64, bidirectional=True)
  (hidden2tag): Linear(in_features=128, out_features=97, bias=True)
)

training model ...


HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 1 	Training Loss: 0.017149 	Validation Loss: 0.007977
Adiitional val metrics: - ROC-AUC: 0.988237 - Log-Loss: 0.529148 - Hamming-Loss: 0.001432 - Subset-Accuracy: 0.933648 - F1-Score: 0.932355
Validation loss decreased (inf --> 0.007977).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 2 	Training Loss: 0.014147 	Validation Loss: 0.008419
Adiitional val metrics: - ROC-AUC: 0.990216 - Log-Loss: 0.540097 - Hamming-Loss: 0.001956 - Subset-Accuracy: 0.876011 - F1-Score: 0.904792
EarlyStopping counter: 1 out of 1
Early stopping
predicting gold label targets ...
predicting gold label targets done

Parameters: up = 10, window_size = 5, embed_size = 40, latent_dim = 64, dropout_rate = 0.113
F1 Scores for global labels:
ALL (average="micro"): 0.762418334543192



Iteration No: 6 ended. Evaluation done at random point.
Time taken: 230.0612
Function value obtained: -0.7624
Current minimum: -0.7711
Iteration No: 7 started. Evaluating function at random point.
upsampling for 10 times...
upsampling done

preparing features ...
preparing features done

preparing embedding matrix ...
preparing embedding matrix done

preparing targets ...
preparing targets done


model summary:
torch_tagger(
  (word_embeddings): Embedding(44984, 30)
  (lstm): LSTM(30, 192, bidirectional=True)


HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 1 	Training Loss: 0.073289 	Validation Loss: 0.021297
Adiitional val metrics: - ROC-AUC: 0.975961 - Log-Loss: 1.332075 - Hamming-Loss: 0.003228 - Subset-Accuracy: 0.786829 - F1-Score: 0.849224
Validation loss decreased (inf --> 0.021297).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 2 	Training Loss: 0.025119 	Validation Loss: 0.011609
Adiitional val metrics: - ROC-AUC: 0.986752 - Log-Loss: 0.756394 - Hamming-Loss: 0.001920 - Subset-Accuracy: 0.895227 - F1-Score: 0.909264
Validation loss decreased (0.021297 --> 0.011609).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 3 	Training Loss: 0.016734 	Validation Loss: 0.009559
Adiitional val metrics: - ROC-AUC: 0.989191 - Log-Loss: 0.612168 - Hamming-Loss: 0.001572 - Subset-Accuracy: 0.917672 - F1-Score: 0.925575
Validation loss decreased (0.011609 --> 0.009559).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 4 	Training Loss: 0.015467 	Validation Loss: 0.009491
Adiitional val metrics: - ROC-AUC: 0.990146 - Log-Loss: 0.605044 - Hamming-Loss: 0.001593 - Subset-Accuracy: 0.915187 - F1-Score: 0.924906
Validation loss decreased (0.009559 --> 0.009491).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 5 	Training Loss: 0.015503 	Validation Loss: 0.008939
Adiitional val metrics: - ROC-AUC: 0.989439 - Log-Loss: 0.564650 - Hamming-Loss: 0.001572 - Subset-Accuracy: 0.920271 - F1-Score: 0.925805
Validation loss decreased (0.009491 --> 0.008939).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 6 	Training Loss: 0.015289 	Validation Loss: 0.008414
Adiitional val metrics: - ROC-AUC: 0.991062 - Log-Loss: 0.538514 - Hamming-Loss: 0.001477 - Subset-Accuracy: 0.933241 - F1-Score: 0.931055
Validation loss decreased (0.008939 --> 0.008414).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 7 	Training Loss: 0.014602 	Validation Loss: 0.007986
Adiitional val metrics: - ROC-AUC: 0.991179 - Log-Loss: 0.519201 - Hamming-Loss: 0.001391 - Subset-Accuracy: 0.936202 - F1-Score: 0.934896
Validation loss decreased (0.008414 --> 0.007986).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

Epoch: 8 	Training Loss: 0.014494 	Validation Loss: 0.007514
Adiitional val metrics: - ROC-AUC: 0.991569 - Log-Loss: 0.497915 - Hamming-Loss: 0.001251 - Subset-Accuracy: 0.942900 - F1-Score: 0.941369
Validation loss decreased (0.007986 --> 0.007514).  Saving model ...


HBox(children=(IntProgress(value=0, max=790), HTML(value='')))

KeyboardInterrupt: 