In [1]:
import pandas as pd
import xml.etree.ElementTree as ET
import glob, os
import numpy as np
from comet_ml import Experiment, Optimizer
import pickle
import logging
import sys
from sklearn.utils import class_weight

In [2]:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ['TF_KERAS'] = '1'

# only reserve 1 GPU

In [3]:
import tensorflow as tf
tf.version

<module 'tensorflow._api.v2.version' from '/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tensorflow/_api/v2/version/__init__.py'>

In [4]:
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Dense, Dropout, Embedding, LSTM, Lambda, BatchNormalization, TimeDistributed, \
     Bidirectional, Input, concatenate, Flatten, RepeatVector, Activation, Multiply, Permute#, CuDNNLSTM
from tensorflow.keras import regularizers
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras import callbacks, optimizers
from tensorflow.keras import backend as K
from tensorflow.keras.utils import plot_model, Sequence

from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords

import tensorflow as tf

In [5]:
import tensorflow as tf
tf.test.is_gpu_available()

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


True

In [6]:
my_seed = 1234
# tf.set_random_seed(my_seed)

In [7]:
logger = logging.getLogger('training')
logger.addHandler(logging.StreamHandler(sys.stdout))
logger.setLevel(logging.DEBUG)

# Read data

In [8]:
def read_subject_writings(subject_file):
    writings = []
    with open(subject_file) as sf:
        contents = sf.read()
        root = ET.fromstring(contents)
        try:
            subject = root.findall('ID')[0].text.strip()
        except Exception:
            print('Cannot extract ID', contents[:500], '\n-------\n')        
        for w in root.iter('WRITING'):
            subject_writings = {'subject': subject}
            for title in w.findall('TITLE'):
                subject_writings['title'] = title.text
            for text in w.findall('TEXT'):
                subject_writings['text'] = text.text
            for date in w.findall('DATE'):
                subject_writings['date'] = date.text
            writings.append(subject_writings)
    return writings

In [9]:
# root_dir = '/home/anasab/' 
root_dir = '/home/anasab/'

In [10]:
datadir_T2 = root_dir + '/eRisk/data/eRisk2020_T2/eRisk2020_T2_TRAINING_DATA/'
labels_file_T2 = root_dir + '/eRisk/data/eRisk2020_T2/eRisk2020_T2_TRAINING_DATA/Depression Questionnaires_anon.txt'
nr_questions = 21

In [11]:
def read_texts(datadir_T2,
                labels_file_T2):
    writings = []
    writings_df = pd.DataFrame()
    labels_df = pd.DataFrame()

        
    for subject_file in os.listdir(datadir_T2):
        if not subject_file.startswith('subject'):
            continue
        writings.extend(read_subject_writings(os.path.join(datadir_T2, subject_file)))
    writings_df = pd.DataFrame(writings)
    
    labels_df = pd.read_csv(os.path.join(labels_file_T2), 
                                 delimiter='\s+', names=['subject'] + ['label%i' % i for i in range(nr_questions)])

    labels_df = labels_df.set_index('subject')
    
    writings_df = writings_df.join(labels_df, on='subject')
    
    return writings_df, labels_df

In [12]:
# writings_df, labels_df = read_texts(datadir_T2, labels_file_T2)
writings_df = pickle.load(open('writings_df_T2_liwc.pkl', 'rb'))

In [13]:
writings_df.groupby('subject').count()

Unnamed: 0_level_0,title,text,date,label0,label1,label2,label3,label4,label5,label6,...,feel,excl,future,nonfl,ppron,shehe,i,we,you,they
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
subject1272,120,120,120,120,120,120,120,120,120,120,...,120,120,120,120,120,120,120,120,120,120
subject2341,129,129,129,129,129,129,129,129,129,129,...,129,129,129,129,129,129,129,129,129,129
subject2432,332,332,332,332,332,332,332,332,332,332,...,332,332,332,332,332,332,332,332,332,332
subject2827,663,663,663,663,663,663,663,663,663,663,...,659,659,659,659,659,659,659,659,659,659
subject2903,313,313,313,313,313,313,313,313,313,313,...,313,313,313,313,313,313,313,313,313,313
subject2961,180,180,180,180,180,180,180,180,180,180,...,180,180,180,180,180,180,180,180,180,180
subject3707,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022,...,1022,1022,1022,1022,1022,1022,1022,1022,1022,1022
subject3993,1510,1510,1510,1510,1510,1510,1510,1510,1510,1510,...,1509,1509,1509,1509,1509,1509,1509,1509,1509,1509
subject4058,1028,1028,1028,1028,1028,1028,1028,1028,1028,1028,...,1028,1028,1028,1028,1028,1028,1028,1028,1028,1028
subject436,29,29,29,29,29,29,29,29,29,29,...,29,29,29,29,29,29,29,29,29,29


In [14]:
writings_df

Unnamed: 0,subject,title,text,date,label0,label1,label2,label3,label4,label5,...,feel,excl,future,nonfl,ppron,shehe,i,we,you,they
0,subject5791,,"Great, thanks a ton!",2018-10-30 17:35:30,1,0,1,1,0,0,...,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0
1,subject5791,The search button gives me a 404 error,Just downloaded GBA4ios 2.1 and when I go to ...,2018-10-30 17:19:41,1,0,1,1,0,0,...,0.0,0.074468,0.074468,0.0,0.063830,0.0,0.063830,0.0,0.000000,0.0
2,subject5791,,Remindme! 1 week,2018-10-30 14:33:49,1,0,1,1,0,0,...,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0
3,subject5791,,Me too please,2018-10-19 18:06:38,1,0,1,1,0,0,...,0.0,0.000000,0.000000,0.0,0.333333,0.0,0.333333,0.0,0.000000,0.0
4,subject5791,,Any chance you can pm me what this spoiler is...,2018-10-19 18:04:14,1,0,1,1,0,0,...,0.0,0.027778,0.111111,0.0,0.138889,0.0,0.111111,0.0,0.027778,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10936,subject3993,Alternative Currency Being Considered in Penn...,,2009-01-07 18:41:30,0,0,0,0,0,0,...,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0
10937,subject3993,Asus' new keyboard. Oh wait... thats not a ke...,,2009-01-07 17:13:53,0,0,0,0,0,0,...,0.0,0.076923,0.076923,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0
10938,subject3993,Homeland Security USA - tripe to entertain mo...,,2009-01-07 07:09:19,0,0,0,0,0,0,...,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0
10939,subject3993,10 dead as Israeli missile hits near U.N. sch...,,2009-01-06 17:15:24,0,0,0,0,0,0,...,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.0


## Preprocess text

In [15]:
tokenizer = RegexpTokenizer(r'\w+')

def tokenize(t):
    return tokenizer.tokenize(t.lower())

In [16]:
def tokenize_fields(writings_df):
    writings_df['tokenized_title'] = writings_df['title'].apply(lambda t: tokenize(t) 
                                                                if type(t)==str and t else None)
    writings_df['title_len'] = writings_df['tokenized_title'].apply(lambda t: len(t) 
                                                                    if type(t)==list and t else None)
    writings_df['tokenized_text'] = writings_df['text'].apply(lambda t: tokenize(t) 
                                                              if type(t)==str and t else None)
    writings_df['text_len'] = writings_df['tokenized_text'].apply(lambda t: len(t) 
                                                                  if type(t)==list and t else None)
    return writings_df

In [17]:
writings_df = tokenize_fields(writings_df)

In [18]:
writings_df.text_len.describe()

count    10409.000000
mean        50.365069
std         84.811676
min          1.000000
25%          9.000000
50%         24.000000
75%         54.000000
max       1567.000000
Name: text_len, dtype: float64

In [19]:
writings_df.title_len.describe()

count    1119.000000
mean       11.246649
std         6.979392
min         1.000000
25%         6.000000
50%        10.000000
75%        15.000000
max        51.000000
Name: title_len, dtype: float64

In [20]:
writings_df.groupby('subject').count().title.describe()

count      20.000000
mean      547.050000
std       446.144828
min        29.000000
25%       180.000000
50%       327.500000
75%      1006.250000
max      1510.000000
Name: title, dtype: float64

In [21]:
writings_df.groupby('subject').count().text.describe()

count      20.000000
mean      547.050000
std       446.144828
min        29.000000
25%       180.000000
50%       327.500000
75%      1006.250000
max      1510.000000
Name: text, dtype: float64

# RoBERTa & co

In [22]:
from simpletransformers.classification import MultiLabelClassificationModel


In [23]:
seq_len = 300

In [24]:
# Aggregate by users
writings_df = writings_df.fillna(value={'text': '', 'title':''})
column_functions = {'text': lambda t: " ".join(t), 
                                        'title': lambda t: " ".join(t),
                                        'text_len': 'sum',
                                        'title_len': 'sum'}
column_functions.update({'label%i'%i: 'min' for i in range(21)})
writings_per_user_df = writings_df.groupby('subject').aggregate(column_functions)
#                                          'subset': 'min'})
# writings_per_user_df = writings_per_user_df.fillna("")
writings_per_user_df['text'] = writings_per_user_df['text'] + " " +  writings_per_user_df['title']
writings_per_user_df['text_len'] = writings_per_user_df['text_len'] + writings_per_user_df['title_len']

In [25]:
writings_per_user_df.text_len.describe()

count        20.000000
mean      26841.750000
std       33383.871337
min        1168.000000
25%        7118.750000
50%       15522.500000
75%       27742.000000
max      110673.000000
Name: text_len, dtype: float64

In [26]:
def reverse_text(text):
    return " ".join(text.split()[::-1])
def truncate_text_beginning(text, seq_len=seq_len, epsilon=20):
    truncated_tokenized = text.split()[-seq_len-epsilon:]
    return " ".join(truncated_tokenized)
# writings_per_user_df['text'] = writings_per_user_df['text'].apply(truncate_text_beginning)

In [27]:
def get_subjects_split(writings_df, train_prop=0.8, test_slice=2, nr_slices=5, valid_prop=0):
    if 'subset' in writings_df.columns:
        training_subjects = list(set(writings_df[writings_df['subset']=='train'].subject))
        test_subjects = list(set(writings_df[writings_df['subset']=='test'].subject))
    else:
        all_subjects = sorted(list(set(writings_df.subject)))
        training_subjects_size = int(len(all_subjects) * train_prop)
        test_subjects_size = len(all_subjects) - training_subjects_size
        # Cross-validation, with fixed slice as input
        test_prop = 1-train_prop
        test_slice = min(test_slice, nr_slices)
        logger.debug("start index: %f, from %f\n" % (
            len(all_subjects)*(1/nr_slices)*test_slice, test_prop*test_slice))
        start_slice = int(len(all_subjects)*(1/nr_slices)*test_slice)
        test_subjects = all_subjects[start_slice: start_slice+test_subjects_size]
        training_subjects = [s for s in all_subjects if s not in test_subjects]
    training_subjects = sorted(training_subjects) # ensuring reproducibility
    valid_subjects_size = int(len(training_subjects) * valid_prop)
    valid_subjects = training_subjects[:valid_subjects_size]
    training_subjects = training_subjects[valid_subjects_size:]
    logger.debug("%d training users, %d validation users, %d test users." % (
        len(training_subjects), 
          len(valid_subjects),
          len(test_subjects)))
    subjects_split = {'train': training_subjects, 
                      'valid': valid_subjects, 
                      'test': test_subjects}
    return subjects_split

subjects_split = get_subjects_split(writings_df, nr_slices=5, test_slice=4)

start index: 16.000000, from 0.800000

16 training users, 0 validation users, 4 test users.


In [28]:
train_df = writings_per_user_df[writings_per_user_df.index.isin(subjects_split['train'])]#[['text', 'labels']]
test_df = writings_per_user_df[writings_per_user_df.index.isin(subjects_split['test'])]#[['text', 'labels']]

In [29]:
train_df.head()

Unnamed: 0_level_0,text,title,text_len,title_len,label0,label1,label2,label3,label4,label5,...,label11,label12,label13,label14,label15,label16,label17,label18,label19,label20
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
subject1272,Just waking up every morning and talking to m...,...,1168.0,12.0,1,2,2,1,0,1,...,1,2,2,2,-3,0,3,1,2,2
subject2341,"If you need to talk to someone, it's not a ba...",...,6911.0,0.0,1,2,3,2,3,2,...,2,3,3,2,-2,2,-3,2,2,0
subject2432,*kid A dataminer said that the octoling gir...,...,7188.0,63.0,1,3,3,2,3,2,...,3,1,3,2,1,2,1,2,1,0
subject2827,Im so sorry youve had a negative experience. ...,Opinions on high school anxiety ...,17140.0,130.0,1,3,3,2,3,2,...,1,2,3,1,-2,1,0,2,2,1
subject2903,"That was not at all how it was, but not gonna...",...,15350.0,12.0,0,0,1,1,0,0,...,0,0,0,1,0,0,-1,0,1,2


In [30]:
from sklearn.preprocessing import MultiLabelBinarizer

In [31]:
train_df['labels'] = train_df[['label%d'%i for i in range(21)]].values.tolist()
binarizer = MultiLabelBinarizer()
binarizer.fit([range(-3,3,1)])
# TODO: this is not the right way. it's not multilabel...?
train_df['labels'] = train_df['labels'].apply(lambda l: binarizer.transform((l,)))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
  .format(sorted(unknown, key=str)))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


In [32]:
train_df.head()

Unnamed: 0_level_0,text,title,text_len,title_len,label0,label1,label2,label3,label4,label5,...,label12,label13,label14,label15,label16,label17,label18,label19,label20,labels
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
subject1272,Just waking up every morning and talking to m...,...,1168.0,12.0,1,2,2,1,0,1,...,2,2,2,-3,0,3,1,2,2,"[[1, 0, 0, 1, 1, 1]]"
subject2341,"If you need to talk to someone, it's not a ba...",...,6911.0,0.0,1,2,3,2,3,2,...,3,3,2,-2,2,-3,2,2,0,"[[1, 1, 0, 1, 1, 1]]"
subject2432,*kid A dataminer said that the octoling gir...,...,7188.0,63.0,1,3,3,2,3,2,...,1,3,2,1,2,1,2,1,0,"[[0, 0, 0, 1, 1, 1]]"
subject2827,Im so sorry youve had a negative experience. ...,Opinions on high school anxiety ...,17140.0,130.0,1,3,3,2,3,2,...,2,3,1,-2,1,0,2,2,1,"[[0, 1, 0, 1, 1, 1]]"
subject2903,"That was not at all how it was, but not gonna...",...,15350.0,12.0,0,0,1,1,0,0,...,0,0,1,0,0,-1,0,1,2,"[[0, 0, 1, 1, 1, 1]]"


In [33]:
args = {
#    'model_type':  'roberta',
#    'model_name': 'roberta-base',
   'output_dir': 'outputs/',
   'cache_dir': 'cache/',
    'fp16': True,
   'fp16_opt_level': 'O1',
   'max_seq_length': seq_len,#256, #128,
   'train_batch_size': 8,
   'eval_batch_size': 8,
   'gradient_accumulation_steps': 1,
   'num_train_epochs': 2,
   'weight_decay': 0,
   'learning_rate': 4e-6,
   'adam_epsilon': 1e-8,
   'warmup_ratio': 0.06,
   'warmup_steps': 0,
   'max_grad_norm': 1.0,
    'logging_steps': 50,
   'evaluate_during_training': True,
   'save_steps': 2000,
   'eval_all_checkpoints': False,
    'evaluate_during_training': True,
    'evaluate_during_training_verbose': True,
    'evaluate_during_training_steps': 3,
   'use_tensorboard': True,
#     'tensorboard_dir': 'tensorboard/',
    'overwrite_output_dir': True,
   'reprocess_input_data': False,
}

In [34]:
# Create a TransformerModel
model = MultiLabelClassificationModel('roberta', 'roberta-base', args=args)

In [None]:
# Train the model
model.train_model(train_df=train_df, eval_df=test_df, text_ #acc=accuracy_score,
                                                          prec=precision_score,
                                                           f1=f1_score)#auto_weights=True)


# Extract features

In [28]:
hyperparams_features = {
    "max_features": 40000,
    # cut texts after this number of words
    # (among top max_features most common words)
    "maxlen": 512,
    "embedding_dim": 50,
    "user_level": True,
    "posts_per_user": 10,
    "batch_size": 2,
}

#### Emotions

In [29]:
def load_NRC(nrc_path):
    word_emotions = {}
    emotion_words = {}
    with open(nrc_path) as in_f:
        for line in in_f:
            line = line.strip()
            if not line:
                continue
            word, emotion, label = line.split()
            if word not in word_emotions:
                word_emotions[word] = set()
            if emotion not in emotion_words:
                emotion_words[emotion] = set()
            label = int(label)
            if label:
                word_emotions[word].add(emotion)
                emotion_words[emotion].add(word)
    return emotion_words

nrc_lexicon_path = root_dir + '/resources/NRC-Emotion-Lexicon-Wordlevel-v0.92.txt'
nrc_lexicon = load_NRC(nrc_lexicon_path)
emotions = list(nrc_lexicon.keys())


In [30]:
def encode_emotions(tokens, emotion_lexicon, emotions, relative=True):
    text_len = len(tokens)
    encoded_emotions = [0 for e in emotions]
    for i, emotion in enumerate(emotions):
        try:
            emotion_words = [t for t in tokens if t in emotion_lexicon[emotion]]
            if relative:
                encoded_emotions[i] = len(emotion_words) / len(tokens)
            else:
                encoded_emotions[i] = len(emotion_words)
        except ValueError:
            print("Emotion not found.")
    return encoded_emotions

In [31]:
from liwc_readDict import readDict

liwc = readDict(root_dir + '/resources/liwc.dic')

categories = set([c for (w,c) in liwc])
len(categories)

64

#### Personal pronouns

In [32]:
first_person_pronouns = {"i", "me", "my", "mine", "myself"}
def encode_pronouns(tokens, pronouns={"i", "me", "my", "mine", "myself"}, relative=True):
    if not tokens:
        return np.nan
    text_len = len(tokens)
    nr_pronouns = len([t for t in tokens if t in pronouns])
    if relative:
        return nr_pronouns/text_len
    else:
        return nr_pronouns

#### Stopwords

In [33]:
stopword_list = stopwords.words("english")
def encode_stopwords(tokens, stopwords=stopword_list):
    encoded_stopwords = [0 for s in stopword_list]
    if not tokens:
        return encoded_stopwords
    for i, stopword in enumerate(stopwords):
        if stopword in tokens:
            encoded_stopwords[i] += 1
    return encoded_stopwords

### BERT

In [33]:
# from easybert import Bert
# bert = Bert("https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1")

In [34]:
# x = bert.embed("A sequence of words is a sequebce.")

In [35]:
# def encode_bert(sequence):
#     return bert.embed(sequence)

## Simple transformers


In [44]:
# import simpletransformers

In [45]:
# prefix='/home/anasab/eRisk/data/'
# train_df = pd.read_csv(prefix + 'train.csv', header=None)
# train_df.head()

# eval_df = pd.read_csv(prefix + 'test.csv', header=None)
# eval_df.head()

# train_df[0] = (train_df[0] == 2).astype(int)
# eval_df[0] = (eval_df[0] == 2).astype(int)

# train_df = pd.DataFrame({
#     'text': train_df[1].replace(r'\n', ' ', regex=True),
#     'label':train_df[0]
# })

# print(train_df.head())

# eval_df = pd.DataFrame({
#     'text': eval_df[1].replace(r'\n', ' ', regex=True),
#     'label':eval_df[0]
# })

# print(eval_df.head())

In [46]:
# from simpletransformers.classification import ClassificationModel


# # Create a TransformerModel
# model = ClassificationModel('roberta', 'roberta-base')

# # Train the model
# model.train_model(train_df)

# # Evaluate the model
# result, model_outputs, wrong_predictions = model.eval_model(eval_df)

In [47]:
# from bert import albert_tokenization
# from bert import bert_tokenization

In [36]:
import tensorflow_hub as hub
# bert_path = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"
bert_path = "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/1"
import bert
from bert.tokenization.bert_tokenization import FullTokenizer

W0328 12:57:56.576619 140643282044672 __init__.py:56] Some hub symbols are not available because TensorFlow version is less than 1.14


ImportError: No module named 'bert.tokenization.bert_tokenization'; 'bert.tokenization' is not a package

In [49]:
bert_layer = hub.KerasLayer(bert_path,# signature='tokens' , signature_outputs_as_dict=True,
                            trainable=False)


INFO:absl:Using /tmp/tfhub_modules to cache modules.


InvalidArgumentError: assertion failed: [0] [Op:Assert] name: EagerVariableNameReuse

In [None]:
class InputExample(object):
    """A single training/test example for simple sequence classification."""

    def __init__(self, guid, text_a, text_b=None, label=None):
        """Constructs a InputExample.
    Args:
      guid: Unique id for the example.
      text_a: string. The untokenized text of the first sequence. For single
        sequence tasks, only this sequence must be specified.
      text_b: (Optional) string. The untokenized text of the second sequence.
        Only must be specified for sequence pair tasks.
      label: (Optional) string. The label of the example. This should be
        specified for train and dev examples, but not for test examples.
    """
        self.guid = guid
        self.text_a = text_a
        self.text_b = text_b
        self.label = label

In [None]:
def encode_text_for_bert(tokenizer, example, max_seq_length=512):
    """Converts a single `InputExample` into a single `InputFeatures`."""

#     if isinstance(example, PaddingInputExample):
#         input_ids = [0] * max_seq_length
#         input_mask = [0] * max_seq_length
#         segment_ids = [0] * max_seq_length
#         label = 0
#         return input_ids, input_mask, segment_ids, label

    tokens_a = tokenizer.tokenize(example.text_a)
    if len(tokens_a) > max_seq_length - 2:
        tokens_a = tokens_a[0 : (max_seq_length - 2)]

    tokens = []
    segment_ids = []
    tokens.append("[CLS]")
    segment_ids.append(0)
    for token in tokens_a:
        tokens.append(token)
        segment_ids.append(0)
    tokens.append("[SEP]")
    segment_ids.append(0)

    input_ids = tokenizer.convert_tokens_to_ids(tokens)

    # The mask has 1 for real tokens and 0 for padding tokens. Only real
    # tokens are attended to.
    input_mask = [1] * len(input_ids)

    # Zero-pad up to the sequence length.
    while len(input_ids) < max_seq_length:
        input_ids.append(0)
        input_mask.append(0)
        segment_ids.append(0)

    assert len(input_ids) == max_seq_length
    assert len(input_mask) == max_seq_length
    assert len(segment_ids) == max_seq_length

    return input_ids, input_mask, segment_ids, example.label


In [None]:
def create_tokenizer_from_hub_module():
    """Get the vocab file and casing info from the Hub module."""
#     bert_module =  hub.Module(bert_path)
#     tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    vocab_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()
    do_lower_case = bert_layer.resolved_object.do_lower_case.numpy()

#     vocab_file, do_lower_case = sess.run(
#         [
#             tokenization_info["vocab_file"],
#             tokenization_info["do_lower_case"],
#         ]
#     )

    return FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case)

In [None]:
# Instantiate tokenizer
# bert_tokenizer = FullTokenizer()
bert_tokenizer = create_tokenizer_from_hub_module()

encode_text_for_bert(bert_tokenizer, InputExample(None, 
                                               "Ana are mere"), 
                       hyperparams_features['maxlen'])

### tfhub albert

In [None]:
# input_ids = tf.keras.layers.Input(shape=[None], dtype=tf.int32)
# input_mask = tf.keras.layers.Input(shape=[None], dtype=tf.int32)
# sequence_mask = tf.keras.layers.Input(shape=[None], dtype=tf.int32)

# albert = hub.KerasLayer(
#     "https://tfhub.dev/google/albert_xlarge/3",
#     trainable=True,
#     signature="tokens",
#     output_key="pooled_output",
# )

# features = {
#     "input_ids": input_ids,
#     "input_mask": input_mask,
#     "segment_ids": sequence_mask,
# }
# out = albert(features)
# model = tf.keras.Model(inputs=[input_ids, input_mask, sequence_mask], outputs=out)
# model.compile("adam", loss="sparse_categorical_crossentropy")
# model.summary()

### Encode data

In [37]:
from collections import Counter
def encode_labels(labels):
    '''Convert ia to i and ib to -i'''
    encoded_labels = []
    for i, l in enumerate(labels):
        try:
            encoded_labels.append(int(l))
        except Exception as e:
            logger.debug("Encoding label %s\n" % l)
        
            if str(l)[-1] == 'a':
                encoded_labels.append(int(l[0]))
            elif str(l)[-1] == 'b':
                encoded_labels.append(-int(l[0]))
            else:
                logger.warning("Coult not encode label %s\n" % l)
    return encoded_labels

def load_erisk_data(writings_df, voc_size, emotion_lexicon, seq_len, emotions =  
                    ['anger', 'anticipation', 'disgust', 'fear', 'joy', 
                     'negative', 'positive', 'sadness', 'surprise', 'trust'],
                    liwc_categories = categories, ignore_features=[],
                    pronouns = ["i", "me", "my", "mine", "myself"],
                    train_prop=0.7, valid_prop=0.3, test_slice=2,
                    nr_slices=5,
                    min_post_len=3, min_word_len=1, 
                    user_level=True, vocabulary=None,
                   logger=logger):
    logger.debug("Loading data...\n")
    if not vocabulary:
        vocabulary = {}
        word_freqs = Counter()
        for words in writings_df.tokenized_text:
            word_freqs.update(words)
        for words in writings_df.tokenized_title:
            word_freqs.update(words)
        i = 1
        for w, f in word_freqs.most_common(voc_size-2): # keeping voc_size-1 for unk
            if len(w) < min_word_len:
                continue
            vocabulary[w] = i
            i += 1

    if 'subset' in writings_df.columns:
        training_subjects = list(set(writings_df[writings_df['subset']=='train'].subject))
        test_subjects = list(set(writings_df[writings_df['subset']=='test'].subject))
    else:
        all_subjects = sorted(list(set(writings_df.subject)))
        training_subjects_size = int(len(all_subjects) * train_prop)
        test_subjects_size = len(all_subjects) - training_subjects_size
        # Cross-validation, with fixed slice as input
        test_prop = 1-train_prop
        test_slice = min(test_slice, nr_slices)
        logger.debug("start index: %f, from %f\n" % (
            len(all_subjects)*(1/nr_slices)*test_slice, test_prop*test_slice))
        start_slice = int(len(all_subjects)*(1/nr_slices)*test_slice)
        test_subjects = all_subjects[start_slice: start_slice+test_subjects_size]
        training_subjects = [s for s in all_subjects if s not in test_subjects]
    training_subjects = sorted(training_subjects) # ensuring reproducibility
    valid_subjects_size = int(len(training_subjects) * valid_prop)
    valid_subjects = training_subjects[:valid_subjects_size]
    training_subjects = training_subjects[valid_subjects_size:]
    categories = [c for c in liwc_categories if c in writings_df.columns]
    logger.debug("%d training users, %d validation users, %d test users." % (
        len(training_subjects), 
          len(valid_subjects),
          len(test_subjects)))
    subjects_split = {'train': training_subjects, 
                      'valid': valid_subjects, 
                      'test': test_subjects}

    user_level_texts = {}
    for row in writings_df.sort_values(by='date').itertuples():
        words = []
        raw_text = ""
        if row.tokenized_title:
            words.extend(row.tokenized_title)
            raw_text += row.title
        if row.tokenized_text:
            words.extend(row.tokenized_text)
            raw_text += row.text
        if not words or len(words)<min_post_len:
            print(row.subject)
            continue
        labels = [getattr(row, 'label%d'%i) for i in range(nr_questions)]
        liwc_categs = [getattr(row, categ) for categ in categories]
        if row.subject not in user_level_texts.keys():
            user_level_texts[row.subject] = {}
            user_level_texts[row.subject]['texts'] = [words]
            user_level_texts[row.subject]['labels'] = encode_labels(labels)
            user_level_texts[row.subject]['liwc'] = [liwc_categs]
            user_level_texts[row.subject]['raw'] = [raw_text]
        else:
            user_level_texts[row.subject]['texts'].append(words)
            user_level_texts[row.subject]['liwc'].append(liwc_categs)
            user_level_texts[row.subject]['raw'].append(raw_text)

    return user_level_texts, subjects_split, vocabulary


In [38]:
user_level_data, subjects_split, vocabulary = load_erisk_data(writings_df, 
                                                            seq_len=hyperparams_features['maxlen'],
                                                            voc_size=hyperparams_features['max_features'],
                                                           emotion_lexicon=nrc_lexicon,
                                                           emotions=emotions,
                                                           user_level=hyperparams_features['user_level'],
                                                                                logger=logger,
                                                           vocabulary=pickle.load(open('vocabulary_40K_all.pkl', 'rb'))
                                                                               )

Loading data...



I0328 12:58:08.486985 140643282044672 <ipython-input-37-ed5a3d9938b1>:29] Loading data...



start index: 8.000000, from 0.600000



I0328 12:58:08.488625 140643282044672 <ipython-input-37-ed5a3d9938b1>:55] start index: 8.000000, from 0.600000



10 training users, 4 validation users, 6 test users.


I0328 12:58:08.489387 140643282044672 <ipython-input-37-ed5a3d9938b1>:67] 10 training users, 4 validation users, 6 test users.


subject3993
subject3993
subject3993
subject9798
subject9798
subject9798
subject9798
subject9798
subject9798
subject9798
subject9798
subject2903
subject9798
subject6619
subject2903
subject2903
subject9798
subject6619
subject6619
subject6619
subject6619
subject6619
subject6619
subject6619
subject6619
subject6619
subject6619
subject3993
subject5791
subject6619
subject5791
subject9798
subject6619
subject9798
subject6619
subject6619
subject6619
subject6619
subject6619
subject6619
subject5791
subject7039
subject5791
subject5791
subject6619
subject6619
subject6619
subject6619
subject6619
subject6635
subject6619
subject6635
subject6619
subject6635
subject6635
subject7039
subject3993
subject6635
subject6619
subject7039
subject9694
subject6619
subject7039
subject7039
subject7039
subject7039
subject7039
subject7039
subject7039
subject7039
subject7039
subject6619
subject7039
subject9694
subject6635
subject6635
subject7039
subject9694
subject9694
subject9694
subject9694
subject9694
subject7039
subj

subject2827
subject9798
subject9454
subject9454
subject9454
subject2961
subject2827
subject9454
subject9218
subject9218
subject9218
subject9218
subject2432
subject9454
subject9798
subject6900
subject2961
subject2961
subject2961
subject9454
subject2827
subject2827
subject9218
subject2827
subject2827
subject9218
subject2827
subject2961
subject9218
subject9798
subject5897
subject9694
subject9218
subject2827
subject9218
subject2827
subject9218
subject9694
subject2432
subject2827
subject9454
subject2432
subject2961
subject9218
subject9694
subject2961
subject2432
subject9454
subject2432
subject9694
subject9694
subject9694
subject9694
subject9694
subject9694
subject9694
subject9798
subject2827
subject2432
subject9798
subject9694
subject6900
subject9798
subject3993
subject9694
subject6900
subject6900
subject6900
subject6900
subject9798
subject3993
subject9694
subject6900
subject9694
subject9694
subject2903
subject4058
subject3707
subject3707
subject9694
subject9694
subject9694
subject9798
subj

In [39]:
user_level_data['subject6900']['labels']

[1, 1, 2, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 2, 2, 0, 0, 0, 1, 0]

In [40]:
def load_embeddings(path, embedding_dim, voc):
    # random matrix with mean value = 0
    embedding_matrix = np.random.random((len(voc)+2, embedding_dim)) - 0.5 # voc + unk + pad value(0)

    f = open(path)
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        word_i = voc.get(word)
        if word_i is not None:
            embedding_matrix[word_i] = coefs
    f.close()

    print('Total %s word vectors.' % len(embedding_matrix))

 
    return embedding_matrix

pretrained_embeddings_path = root_dir + '/resources/glove.twitter.27B/glove.twitter.27B.%dd.txt' % hyperparams_features['embedding_dim']
embedding_matrix = load_embeddings(pretrained_embeddings_path, hyperparams_features['embedding_dim'], vocabulary)


Total 40000 word vectors.


## Data Generator

In [41]:
class DataGenerator(Sequence):
    'Generates data for Keras'
    def __init__(self, user_level_data, subjects_split, set_type='train', bert_tokenizer=bert_tokenizer,
                 batch_size=hyperparams_features['batch_size'], seq_len=hyperparams_features['maxlen'], 
                 voc_size=hyperparams_features['max_features'], emotion_lexicon=nrc_lexicon,
                 emotions=emotions, pronouns=["i", "me", "my", "mine", "myself"], 
                 max_posts_per_user=hyperparams_features['posts_per_user'],
                 shuffle=True):
        'Initialization'
        self.seq_len = seq_len
        self.bert_tokenizer = bert_tokenizer
        self.subjects_split = subjects_split
        self.set = set_type
        self.emotion_lexicon = emotion_lexicon
        self.batch_size = batch_size
        self.data = user_level_data
        self.emotions = emotions
        self.pronouns = pronouns
        self.shuffle = shuffle
        self.voc_size = voc_size
        self.max_posts_per_user = max_posts_per_user
        self.on_epoch_end()

    def __encode_text(self, tokens, raw_text):
        # Using voc_size-1 value for OOV token
        encoded_tokens = [vocabulary.get(w, self.voc_size-1) for w in tokens]
        encoded_emotions = encode_emotions(tokens, self.emotion_lexicon, self.emotions)
        encoded_pronouns = encode_pronouns(tokens, self.pronouns)
        encoded_stopwords = encode_stopwords(tokens)
        bert_ids, bert_masks, bert_segments, label = encode_text_for_bert(self.bert_tokenizer, InputExample(None, 
                                               raw_text), self.seq_len)
        return (encoded_tokens, encoded_emotions, encoded_pronouns, encoded_stopwords,
               bert_ids, bert_masks, bert_segments)
        
    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.subjects_split[self.set]) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        user_indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        # Find users
        users = [self.subjects_split[self.set][i] for i in user_indexes
                    if self.subjects_split[self.set][i] in self.data.keys()] # TODO: maybe needs a warning that user is missing

        post_indexes = {}
        # Sample post ids
        for subject in users:
            posts_len = len(self.data[subject]['texts'])
            posts_index_sample = sorted(np.random.choice(posts_len, 
                                                         min(self.max_posts_per_user, posts_len),
                                                         replace=False))
            post_indexes[subject] = posts_index_sample
        # Generate data
        X, y = self.__data_generation(users, post_indexes)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.subjects_split[self.set]))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, users, post_indexes):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        tokens_data = []
        categ_data = []
        sparse_data = []
        subjects = []
        bert_ids_data = []
        bert_masks_data = []
        bert_segments_data = []
        labels = []

        for subject in users:
            texts = self.data[subject]['texts']
            raw_texts = self.data[subject]['raw']
            label = self.data[subject]['label']
            liwc_scores = self.data[subject]['liwc']
            
            # Sample
            texts = [texts[i] for i in post_indexes[subject]]
            liwc_selection = [liwc_scores[i] for i in post_indexes[subject]]
            raw_texts = [raw_texts[i] for i in post_indexes[subject]]
            
            all_words = [sum(texts, [])] # merge all texts in one list -- Ok, why sum?? this is wrong!!
            liwc_aggreg = [np.array(liwc_selection).mean(axis=0).tolist()]
            all_raw_texts = [" ".join(raw_texts)]
            
            for i, words in enumerate(all_words):
                encoded_tokens, encoded_emotions, encoded_pronouns, encoded_stopwords, \
                    bert_ids, bert_masks, bert_segments = self.__encode_text(words, all_raw_texts[i])
                subject_id = int(subject.split('t')[1])
                tokens_data.append(encoded_tokens)
                categ_data.append(encoded_emotions + [encoded_pronouns] + liwc_aggreg[i])
                sparse_data.append(encoded_stopwords)
                bert_ids_data.append(bert_ids)
                bert_masks_data.append(bert_masks)
                bert_segments_data.append(bert_segments)
                
                labels.append(label)
                subjects.append(subject_id)

        
        # using zeros for padding
        tokens_data_padded = sequence.pad_sequences(tokens_data, maxlen=self.seq_len)

        return ([np.array(tokens_data_padded), np.array(categ_data), np.array(sparse_data),
                 np.array(bert_ids_data), np.array(bert_masks_data), np.array(bert_segments_data),
                np.array(subjects)],
                np.array(labels))

NameError: name 'bert_tokenizer' is not defined

In [None]:
class DataGenerator(Sequence):
    'Generates data for Keras'
    def __init__(self, user_level_data, subjects_split, batch_size=hyperparams_features['batch_size'], 
                 seq_len=hyperparams_features['maxlen'], voc_size=hyperparams_features['max_features'], 
                 emotion_lexicon=nrc_lexicon, set_type='train', test_user_indexes=[0],
                 emotions=emotions, pronouns=["i", "me", "my", "mine", "myself"], 
                 max_posts_per_user=hyperparams_features['posts_per_user'],
                 bert_tokenizer=bert_tokenizer,
                 shuffle=True):
        'Initialization'
        self.seq_len = seq_len
        self.emotion_lexicon = emotion_lexicon
        self.bert_tokenizer = bert_tokenizer
        self.batch_size = batch_size
        self.data = user_level_data
        self.all_users = list(self.data.keys())
        self.emotions = emotions
        self.pronouns = pronouns
        self.set = set_type
        self.subjects_split = subjects_split
        self.shuffle = shuffle
        self.voc_size = voc_size
        self.max_posts_per_user = max_posts_per_user
        self.test_user_indexes = test_user_indexes
        self.on_epoch_end()


    def __encode_text(self, tokens, raw_text):
        # Using voc_size-1 value for OOV token
        encoded_tokens = [vocabulary.get(w, self.voc_size-1) for w in tokens]
        encoded_emotions = encode_emotions(tokens, self.emotion_lexicon, self.emotions)
        encoded_pronouns = encode_pronouns(tokens, self.pronouns)
        encoded_stopwords = encode_stopwords(tokens)
        bert_ids, bert_masks, bert_segments, label = encode_text_for_bert(self.bert_tokenizer, InputExample(None, 
                                               raw_text), self.seq_len)
        return (encoded_tokens, encoded_emotions, encoded_pronouns, encoded_stopwords,
               bert_ids, bert_masks, bert_segments)
        
    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.subjects_split[self.set]) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        user_indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        # Find users
        users = [self.subjects_split[self.set][i] for i in user_indexes
                    if self.subjects_split[self.set][i] in self.data.keys()] # TODO: maybe needs a warning that user is missing

        post_indexes = {}
        # Sample post ids
        for subject in users:
            posts_len = len(self.data[subject]['texts'])
            posts_index_sample = sorted(np.random.choice(posts_len, 
                                                         min(self.max_posts_per_user, posts_len),
                                                         replace=False))
            post_indexes[subject] = posts_index_sample
        # Generate data
        X, y = self.__data_generation(users, post_indexes)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.subjects_split[self.set]))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, users, post_indexes):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        tokens_data = []
        categ_data = []
        sparse_data = []
        subjects = []
        bert_ids_data = []
        bert_masks_data = []
        bert_segments_data = []
        labels = []
        for subject in users:
            texts = self.data[subject]['texts']
            label = self.data[subject]['labels']
            raw_texts = self.data[subject]['raw']

            # Sample
            texts = [texts[i] for i in post_indexes[subject]]
            liwc_selection = [self.data[subject]['liwc'][i] for i in post_indexes[subject]]
            raw_texts = [raw_texts[i] for i in post_indexes[subject]]

            all_words = [sum(texts, [])] # merge all texts in one list
            liwc_aggreg = [np.array(liwc_selection).mean(axis=0).tolist()]
            all_raw_texts = [" ".join(raw_texts)]

            
            for i, words in enumerate(all_words):
                encoded_tokens, encoded_emotions, encoded_pronouns, encoded_stopwords, \
                    bert_ids, bert_masks, bert_segments = self.__encode_text(words, all_raw_texts[i])
                subject_id = int(subject.split('t')[1])
                tokens_data.append(encoded_tokens)
                categ_data.append(encoded_emotions + [encoded_pronouns] + liwc_aggreg[i])
                sparse_data.append(encoded_stopwords)
                labels.append(label)
                bert_ids_data.append(bert_ids)
                bert_masks_data.append(bert_masks)
                bert_segments_data.append(bert_segments)
                
                subjects.append(subject_id)

        
        # using zeros for padding
        tokens_data_padded = sequence.pad_sequences(tokens_data, maxlen=self.seq_len)

        return ([np.array(tokens_data_padded), np.array(categ_data), np.array(sparse_data),
#                 np.array(subjects),
                np.array(bert_ids_data, dtype=np.int32), 
                 np.array(bert_masks_data, dtype=np.int32), 
                 np.array(bert_segments_data, dtype=np.int32),],
                np.array(labels))
#                 np.array(labels).reshape(self.batch_size, -1, len(labels)).tolist()) # to have one array per output

In [None]:
# TODO: Don't split into the 3 sets, do leave-one-out cross-validation

In [None]:
def get_subjects_split(test_size=hyperparams_features['batch_size']):
    test_user_indexes = [np.random.randint(len(user_level_data)) for i in range(test_size)]

    subjects_split = {'test': [u for i,u in 
                               enumerate(user_level_data.keys()) if i in test_user_indexes],
                     'train': [u for i,u in 
                               enumerate(user_level_data.keys()) if i not in test_user_indexes],}
    return subjects_split

In [None]:
logger.setLevel(logging.DEBUG)

# TODO: it is skipping the last batch
x_data = {'train': [], 'test': []}
y_data = {'train': [], 'test': []}
subjects_split = get_subjects_split()
for set_type in ['train', 'test']:
    for x, y in DataGenerator(user_level_data, batch_size=hyperparams_features['batch_size'],
                            set_type=set_type,
                             subjects_split=subjects_split):
        print(x)
        x_data[set_type].append(x)
        y_data[set_type].append(y)
#         break


In [None]:
y_data['test'][0]

In [None]:
x_data['train'][0][5].shape

In [None]:
np.array([[1,2],[3,4]]).reshape(2,-1)

In [None]:
sum([len(subjects_split[s]) for s in ['train', 'test']])

In [None]:
x_data['train']

In [None]:
y_data

# Train

In [None]:
hyperparams = {
    'lstm_units': 10,
    'dense_bow_units': 20,
    'dropout': 0.0,
    'l2_dense': 0.00000011,
    'l2_embeddings': 0.000001,
    'dense_sentence_units': 100,
    'optimizer': 'adam',
    'bert_dense_units': 256,
    'decay': 0.00001,
    'lr': 0.01,
    "trainable_embeddings": False,
    "reduce_lr_factor": 0.0002,
    "reduce_lr_patience": 1000,
    "freeze_patience": 500,
    'threshold': 0.5,
    'bert_len': 768,
    'ignore_layer': ['batchnorm', 'lstm_layers', 'numerical_dense_layer', 'sparse_feat_dense_layer'],
    'norm_momentum': 0.1,

}
if not hyperparams['optimizer']:
    hyperparams['optimizer'] = optimizers.Adam(lr=hyperparams['lr'], #beta_1=0.9, beta_2=0.999, epsilon=0.0001,
                                   decay=hyperparams['decay'])

In [None]:
# class Metrics():
#     def __init__(self, threshold=0.5):
#         self.threshold=threshold
        
#     def recall_m(self, y_true, y_pred):
#             y_labels = y_true
#             y_pred = K.cast(K.greater(K.clip(y_pred, 0, 1), self.threshold), K.floatx())        
#             possible_positives = K.sum(K.round(K.clip(y_labels, 0, 1)))
#             true_positives = K.sum(K.round(K.clip(y_labels * y_pred, 0, 1)))
#             recall = true_positives / (possible_positives + K.epsilon())
#             return recall

#     def precision_m(self, y_true, y_pred):
#             y_labels = y_true
#             y_pred = K.cast(K.greater(K.clip(y_pred, 0, 1), self.threshold), K.floatx())        
#             true_positives = K.sum(K.round(K.clip(y_labels * y_pred, 0, 1)))
#             predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
#             precision = true_positives / (predicted_positives + K.epsilon())
#             return precision

#     def f1_m(self, y_true, y_pred):
#         precision = self.precision_m(y_true, y_pred)
#         recall = self.recall_m(y_true, y_pred)
#         return 2*((precision*recall)/(precision+recall+K.epsilon()))

# def binary_crossentropy_custom(y_true, y_pred):
#     y_labels = y_true
#     return K.binary_crossentropy(y_labels, 
#                                  y_pred)

# metrics_class = Metrics(threshold=hyperparams['threshold'])

In [None]:
def build_model(hyperparams, hyperparams_features, embedding_matrix, emotions, stopwords_list,
                liwc_categories, nr_classes,
               ignore_layer=[]):

    tokens_features = Input(shape=(hyperparams_features['maxlen'],), name='word_seq')
    embedding_layer = Embedding(hyperparams_features['max_features'], 
                                hyperparams_features['embedding_dim'], 
                                input_length=hyperparams_features['maxlen'],
                                embeddings_regularizer=regularizers.l2(hyperparams['l2_embeddings']),
                                weights=[embedding_matrix], 
                                trainable=hyperparams['trainable_embeddings'],
                               name='embeddings_layer')(
        tokens_features)
#     if 'batchnorm' not in ignore_layer:
#         embedding_layer_norm = BatchNormalization(axis=-1, momentum=hyperparams['norm_momentum'],
#                                                      name='embeddings_layer_norm')(embedding_layer)
#     lstm_layers = Bidirectional(LSTM(hyperparams['lstm_units']))(embedding_layer)


    lstm_layers = LSTM(hyperparams['lstm_units'], 
                           return_sequences='attention' not in ignore_layer,
                      name='LSTM_layer')(embedding_layer)
    
    # Attention
    if 'attention' not in ignore_layer:
        attention = Dense(1, activation='tanh', name='attention')(lstm_layers)
        attention = Flatten()(attention)
        attention = Activation('softmax')(attention)
        attention = RepeatVector(hyperparams['lstm_units'])(attention)
        attention = Permute([2, 1])(attention)

        sent_representation = Multiply()([lstm_layers, attention])
        sent_representation = Lambda(lambda xin: K.sum(xin, axis=1), 
                                     output_shape=(hyperparams['lstm_units'],)
                                    )(sent_representation)

        
    else:
        sent_representation = lstm_layers
        
    
    sent_representation = Dropout(hyperparams['dropout'], name='lstm_att_dropout')(sent_representation)
    if hyperparams['dense_sentence_units']:
        sent_representation = Dense(units=hyperparams['dense_sentence_units'],
                                   name='dense_sent_representation')(sent_representation)
    numerical_features = Input(shape=(len(emotions) + 1 + len(liwc_categories),), name='numeric_input') # emotions and pronouns
    
    in_id_bert = Input(shape=(hyperparams_features['maxlen'],), dtype='int32', name="input_ids_bert")
    in_mask_bert = Input(shape=(hyperparams_features['maxlen'],), dtype='int32', name="input_masks_bert")
    in_segment_bert = Input(shape=(hyperparams_features['maxlen'],), dtype='int32', name="segment_ids_bert")
#     bert_layer = hub.Module(
#         "https://tfhub.dev/google/albert_xlarge/3",
#         bert_path, trainable=True,
#         trainable=False,
#         signature="tokens",
#         signature_outputs_as_dict=True,
#         output_key="pooled_output",
#     )

#     bert_layer = hub.Module(
# #         "https://tfhub.dev/google/albert_xlarge/3",
#         bert_path, trainable=True,
# #         trainable=False,
# #         signature="tokens",
# #         signature_outputs_as_dict=True,
#         output_key="pooled_output",
#     )

    albert = hub.KerasLayer(
        "https://tfhub.dev/google/albert_base/3",
        trainable=False,
        signature="tokens",
        output_key="pooled_output",
    )

    bert_features = {
        "input_ids": in_id_bert,
        "input_mask": in_mask_bert,
        "segment_ids": in_segment_bert,
    }
    bert_output = albert(bert_features)
#     bert_output = albert([in_id_bert, in_mask_bert, in_segment_bert])['pooled_output']  # TODO: can also be 'mean'. Check BertLayer
    dense_layer_bert = Dense(units=hyperparams['bert_dense_units'],
                        kernel_regularizer=regularizers.l2(hyperparams['l2_dense']),
                        name='bert_dense_layer',
                       )(bert_output)
    sparse_features = Input(shape=(len(stopwords_list),), name='sparse_input') # stopwords

    if hyperparams['dense_bow_units']:
        dense_layer_sparse = Dense(units=hyperparams['dense_bow_units'],
                              name='sparse_feat_dense_layer',
                                kernel_regularizer=regularizers.l2(hyperparams['l2_dense']),
                              )(sparse_features)
    else:
        dense_layer_sparse = sparse_features
    
    if 'batchnorm' not in ignore_layer:
        numerical_features_norm = BatchNormalization(axis=1, momentum=hyperparams['norm_momentum'],
                                                     name='numerical_features_norm')(numerical_features)
        sent_representation_norm = BatchNormalization(axis=1, momentum=hyperparams['norm_momentum'],
                                                      name='sent_repr_norm')(sent_representation)

        dense_layer_sparse_norm = BatchNormalization(axis=1, momentum=hyperparams['norm_momentum'],
                                                         name='sparse_features_norm')(dense_layer_sparse)
        dense_layer_bert_norm = BatchNormalization(axis=1, momentum=hyperparams['norm_momentum'],
                                                         name='bert_features_norm')(dense_layer_bert)
        
    subjects = Input(shape=(1,), name='subjects')
    

    all_layers = {
        'lstm_layers': sent_representation,
        'numerical_dense_layer': numerical_features,
        'sparse_feat_dense_layer': dense_layer_sparse,
        'bert_layer': dense_layer_bert
    }
    if 'batchnorm' not in ignore_layer:
        all_layers = {
            'lstm_layers': sent_representation_norm,
            'numerical_dense_layer': numerical_features_norm,
            'sparse_feat_dense_layer': dense_layer_sparse_norm,
            'bert_layer': dense_layer_bert_norm
        }
    layers_to_merge = []
    for n, l in all_layers.items():
        if n in ignore_layer:
            continue
        layers_to_merge.append(l)
        
    if len(layers_to_merge) == 1:
        merged_layers = layers_to_merge[0]
    else:
        merged_layers = concatenate(layers_to_merge)
    output_layers = []
    for label in range(nr_classes):
        output_layer = Dense(1, activation='softmax',
                         name='output_layer%d' % label,
                        kernel_regularizer=regularizers.l2(hyperparams['l2_dense']))(merged_layers)
        output_layers.append(output_layer)

    # Compile model
    model = Model(inputs=[tokens_features, numerical_features, sparse_features,
                         in_id_bert, in_mask_bert, in_segment_bert], 
                  outputs=output_layers)

    model.compile(hyperparams['optimizer'], {'output_layer%d'%i: 
                                             'mean_squared_error' for i in range(nr_classes)},
                  metrics={'output_layer%d' % label: 
                           ['accuracy', 'mean_squared_error'] for label in range(nr_classes)})
    return model



In [None]:
hub.KerasLayer

In [507]:
model = build_model(hyperparams, hyperparams_features, embedding_matrix, emotions, stopword_list,
                    liwc_categories=[c for c in categories if c in writings_df.columns],
                    nr_classes=nr_questions,
                   ignore_layer=hyperparams['ignore_layer'])
model.summary()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_ids_bert (InputLayer)     [(None, 512)]        0                                            
__________________________________________________________________________________________________
input_masks_bert (InputLayer)   [(None, 512)]        0                                            
__________________________________________________________________________________________________
segment_ids_bert (InputLayer)   [(None, 512)]        0                                            
__________________________________________________________________________________________________
keras_layer_7 (KerasLayer)      (None, 768)          11812272    input_ids_bert[0][0]             
                                                                 input_masks_bert[0][0]     

In [508]:
def train_model(model, 
                data_generator_train, data_generator_valid,
                epochs, start_epoch=0, workers=4,
                callback_list = [],
                model_path='/tmp/model',
               verbose=1):
    logging.info('Train...')
    experiment.log_parameter('callbacks', callbacks)

    history = model.fit_generator(data_generator_train,
#               batch_size=batch_size,
#                 steps_per_epoch=steps_per_epoch,
              epochs=epochs, initial_epoch=start_epoch, 
              validation_data=data_generator_valid,
                        verbose=verbose,
#               validation_split=0.3,
                       workers=workers,
            callbacks = [
                callbacks.ModelCheckpoint(filepath='%s_best' % model_path, verbose=1, 
                                          save_best_only=True),
                callbacks.EarlyStopping(patience=500), *callback_list
            ])
    model.save(model_path)
    experiment.log_parameter('model_path', model_path)
    return model, history

In [509]:
experiment = Experiment(api_key="eoBdVyznAhfg3bK9pZ58ZSXfv",
                        project_name="mental", workspace="ananana", disabled=False)

experiment.log_parameters(hyperparams_features)

experiment.log_parameter('emotion_lexicon', nrc_lexicon_path)
experiment.log_parameter('emotions', emotions)
experiment.log_parameter('embeddings_path', pretrained_embeddings_path)

experiment.add_tag('T2')
experiment.log_parameters(hyperparams)

COMET INFO: ----------------------------
COMET INFO: Comet.ml Experiment Summary:
COMET INFO:   Data:
COMET INFO:     url: https://www.comet.ml/ananana/mental/99e269aaa3a94ad8b84a9645288489cf
COMET INFO:   Metrics [count] (min, max):
COMET INFO:     batch_loss [803]                                      : (16.0, 63.000038146972656)
COMET INFO:     batch_output_layer0_accuracy [803]                    : (0.0714285746216774, 0.6190476417541504)
COMET INFO:     batch_output_layer0_loss [803]                        : (0.761904776096344, 3.0)
COMET INFO:     batch_output_layer0_mean_squared_error [803]          : (0.761904776096344, 3.0)
COMET INFO:     batch_output_layer10_accuracy [803]                   : (0.0714285746216774, 0.6190476417541504)
COMET INFO:     batch_output_layer10_loss [803]                       : (0.761904776096344, 3.0)
COMET INFO:     batch_output_layer10_mean_squared_error [803]         : (0.761904776096344, 3.0)
COMET INFO:     batch_output_layer11_accuracy [803]  

COMET INFO:     output_layer12_mean_squared_error [803]               : (1.3531745672225952, 1.5555557012557983)
COMET INFO:     output_layer13_accuracy [803]                         : (0.2460317462682724, 0.2976190745830536)
COMET INFO:     output_layer13_loss [803]                             : (1.3531745672225952, 1.5555557012557983)
COMET INFO:     output_layer13_mean_squared_error [803]               : (1.3531745672225952, 1.5555557012557983)
COMET INFO:     output_layer14_accuracy [803]                         : (0.2460317462682724, 0.2976190745830536)
COMET INFO:     output_layer14_loss [803]                             : (1.3531745672225952, 1.5555557012557983)
COMET INFO:     output_layer14_mean_squared_error [803]               : (1.3531745672225952, 1.5555557012557983)
COMET INFO:     output_layer15_accuracy [803]                         : (0.2460317462682724, 0.2976190745830536)
COMET INFO:     output_layer15_loss [803]                             : (1.3531745672225952, 1.5

COMET INFO:     val_output_layer0_accuracy [803]                      : (0.2460317462682724, 0.3174603283405304)
COMET INFO:     val_output_layer0_loss [803]                          : (1.29365074634552, 1.5476192235946655)
COMET INFO:     val_output_layer0_mean_squared_error [803]            : (1.29365074634552, 1.5476192235946655)
COMET INFO:     val_output_layer10_accuracy [803]                     : (0.2460317462682724, 0.3174603283405304)
COMET INFO:     val_output_layer10_loss [803]                         : (1.29365074634552, 1.5476192235946655)
COMET INFO:     val_output_layer10_mean_squared_error [803]           : (1.29365074634552, 1.5476192235946655)
COMET INFO:     val_output_layer11_accuracy [803]                     : (0.2460317462682724, 0.3174603283405304)
COMET INFO:     val_output_layer11_loss [803]                         : (1.29365074634552, 1.5476192235946655)
COMET INFO:     val_output_layer11_mean_squared_error [803]           : (1.29365074634552, 1.5476192235946

COMET INFO:     validate_batch_output_layer12_mean_squared_error [803]: (0.6904761791229248, 2.0)
COMET INFO:     validate_batch_output_layer13_accuracy [803]          : (0.1428571492433548, 0.4285714626312256)
COMET INFO:     validate_batch_output_layer13_loss [803]              : (0.6904761791229248, 2.0)
COMET INFO:     validate_batch_output_layer13_mean_squared_error [803]: (0.6904761791229248, 2.0)
COMET INFO:     validate_batch_output_layer14_accuracy [803]          : (0.1428571492433548, 0.4285714626312256)
COMET INFO:     validate_batch_output_layer14_loss [803]              : (0.6904761791229248, 2.0)
COMET INFO:     validate_batch_output_layer14_mean_squared_error [803]: (0.6904761791229248, 2.0)
COMET INFO:     validate_batch_output_layer15_accuracy [803]          : (0.1428571492433548, 0.4285714626312256)
COMET INFO:     validate_batch_output_layer15_loss [803]              : (0.6904761791229248, 2.0)
COMET INFO:     validate_batch_output_layer15_mean_squared_error [803]: (

In [510]:
subjects_split = get_subjects_split(test_size=10)
data_generator_train = DataGenerator(user_level_data, set_type='train', 
                                     subjects_split=subjects_split)
data_generator_valid = DataGenerator(user_level_data, set_type='test',  
                                     subjects_split=subjects_split)
model, history = train_model(model, data_generator_train, data_generator_valid,
           epochs=1000, start_epoch=0,
                      callback_list = [],
                      model_path='models/bert_t21', workers=1)

Epoch 1/1000
Epoch 00001: val_loss improved from inf to 27.87505, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 2/1000
Epoch 00002: val_loss improved from 27.87505 to 27.87504, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 3/1000
Epoch 00003: val_loss did not improve from 27.87504


Epoch 4/1000
Epoch 00004: val_loss did not improve from 27.87504
Epoch 5/1000
Epoch 00005: val_loss did not improve from 27.87504
Epoch 6/1000


Epoch 00006: val_loss improved from 27.87504 to 27.87504, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 7/1000
Epoch 00007: val_loss did not improve from 27.87504


Epoch 8/1000
Epoch 00008: val_loss did not improve from 27.87504
Epoch 9/1000
Epoch 00009: val_loss did not improve from 27.87504
Epoch 10/1000


Epoch 00010: val_loss did not improve from 27.87504
Epoch 11/1000
Epoch 00011: val_loss did not improve from 27.87504
Epoch 12/1000


Epoch 00012: val_loss did not improve from 27.87504
Epoch 13/1000
Epoch 00013: val_loss did not improve from 27.87504
Epoch 14/1000


Epoch 00014: val_loss did not improve from 27.87504
Epoch 15/1000
Epoch 00015: val_loss did not improve from 27.87504
Epoch 16/1000


Epoch 00016: val_loss did not improve from 27.87504
Epoch 17/1000
Epoch 00017: val_loss did not improve from 27.87504
Epoch 18/1000


Epoch 00018: val_loss did not improve from 27.87504
Epoch 19/1000
Epoch 00019: val_loss did not improve from 27.87504
Epoch 20/1000


Epoch 00020: val_loss did not improve from 27.87504
Epoch 21/1000
Epoch 00021: val_loss did not improve from 27.87504
Epoch 22/1000


Epoch 00022: val_loss improved from 27.87504 to 27.87504, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 23/1000
Epoch 00023: val_loss improved from 27.87504 to 27.87504, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 24/1000
Epoch 00024: val_loss did not improve from 27.87504


Epoch 25/1000
Epoch 00025: val_loss did not improve from 27.87504
Epoch 26/1000
Epoch 00026: val_loss did not improve from 27.87504
Epoch 27/1000


Epoch 00027: val_loss did not improve from 27.87504
Epoch 28/1000
Epoch 00028: val_loss improved from 27.87504 to 27.87504, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 29/1000
Epoch 00029: val_loss did not improve from 27.87504


Epoch 30/1000
Epoch 00030: val_loss did not improve from 27.87504
Epoch 31/1000
Epoch 00031: val_loss did not improve from 27.87504
Epoch 32/1000


Epoch 00032: val_loss improved from 27.87504 to 27.87503, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 33/1000
Epoch 00033: val_loss did not improve from 27.87503


Epoch 34/1000
Epoch 00034: val_loss did not improve from 27.87503
Epoch 35/1000
Epoch 00035: val_loss did not improve from 27.87503
Epoch 36/1000


Epoch 00036: val_loss did not improve from 27.87503
Epoch 37/1000
Epoch 00037: val_loss did not improve from 27.87503
Epoch 38/1000


Epoch 00038: val_loss did not improve from 27.87503
Epoch 39/1000
Epoch 00039: val_loss did not improve from 27.87503
Epoch 40/1000


Epoch 00040: val_loss did not improve from 27.87503
Epoch 41/1000
Epoch 00041: val_loss did not improve from 27.87503
Epoch 42/1000


Epoch 00042: val_loss improved from 27.87503 to 27.87503, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 43/1000
Epoch 00043: val_loss did not improve from 27.87503


Epoch 44/1000
Epoch 00044: val_loss did not improve from 27.87503
Epoch 45/1000
Epoch 00045: val_loss did not improve from 27.87503
Epoch 46/1000


Epoch 00046: val_loss did not improve from 27.87503
Epoch 47/1000
Epoch 00047: val_loss did not improve from 27.87503
Epoch 48/1000


Epoch 00048: val_loss did not improve from 27.87503
Epoch 49/1000
Epoch 00049: val_loss did not improve from 27.87503
Epoch 50/1000


Epoch 00050: val_loss did not improve from 27.87503
Epoch 51/1000
Epoch 00051: val_loss did not improve from 27.87503
Epoch 52/1000


Epoch 00052: val_loss did not improve from 27.87503
Epoch 53/1000
Epoch 00053: val_loss improved from 27.87503 to 27.87503, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 54/1000
Epoch 00054: val_loss did not improve from 27.87503


Epoch 55/1000
Epoch 00055: val_loss did not improve from 27.87503
Epoch 56/1000
Epoch 00056: val_loss did not improve from 27.87503
Epoch 57/1000


Epoch 00057: val_loss did not improve from 27.87503
Epoch 58/1000
Epoch 00058: val_loss did not improve from 27.87503
Epoch 59/1000


Epoch 00059: val_loss did not improve from 27.87503
Epoch 60/1000
Epoch 00060: val_loss did not improve from 27.87503
Epoch 61/1000


Epoch 00061: val_loss did not improve from 27.87503
Epoch 62/1000
Epoch 00062: val_loss did not improve from 27.87503
Epoch 63/1000


Epoch 00063: val_loss did not improve from 27.87503
Epoch 64/1000
Epoch 00064: val_loss improved from 27.87503 to 27.87502, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 65/1000
Epoch 00065: val_loss did not improve from 27.87502


Epoch 66/1000
Epoch 00066: val_loss did not improve from 27.87502
Epoch 67/1000
Epoch 00067: val_loss improved from 27.87502 to 27.87502, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 68/1000
Epoch 00068: val_loss did not improve from 27.87502


Epoch 69/1000
Epoch 00069: val_loss did not improve from 27.87502
Epoch 70/1000
Epoch 00070: val_loss did not improve from 27.87502
Epoch 71/1000


Epoch 00071: val_loss improved from 27.87502 to 27.87502, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 72/1000
Epoch 00072: val_loss did not improve from 27.87502


Epoch 73/1000
Epoch 00073: val_loss did not improve from 27.87502
Epoch 74/1000
Epoch 00074: val_loss did not improve from 27.87502
Epoch 75/1000


Epoch 00075: val_loss did not improve from 27.87502
Epoch 76/1000
Epoch 00076: val_loss improved from 27.87502 to 27.87502, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 77/1000
Epoch 00077: val_loss did not improve from 27.87502


Epoch 78/1000
Epoch 00078: val_loss did not improve from 27.87502
Epoch 79/1000
Epoch 00079: val_loss did not improve from 27.87502
Epoch 80/1000


Epoch 00080: val_loss improved from 27.87502 to 27.87502, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 81/1000
Epoch 00081: val_loss did not improve from 27.87502


Epoch 82/1000
Epoch 00082: val_loss did not improve from 27.87502
Epoch 83/1000
Epoch 00083: val_loss did not improve from 27.87502
Epoch 84/1000


Epoch 00084: val_loss did not improve from 27.87502
Epoch 85/1000
Epoch 00085: val_loss did not improve from 27.87502
Epoch 86/1000


Epoch 00086: val_loss did not improve from 27.87502
Epoch 87/1000
Epoch 00087: val_loss did not improve from 27.87502
Epoch 88/1000


Epoch 00088: val_loss did not improve from 27.87502
Epoch 89/1000
Epoch 00089: val_loss did not improve from 27.87502
Epoch 90/1000


Epoch 00090: val_loss did not improve from 27.87502
Epoch 91/1000
Epoch 00091: val_loss did not improve from 27.87502
Epoch 92/1000


Epoch 00092: val_loss did not improve from 27.87502
Epoch 93/1000
Epoch 00093: val_loss improved from 27.87502 to 27.87501, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 94/1000
Epoch 00094: val_loss did not improve from 27.87501


Epoch 95/1000
Epoch 00095: val_loss did not improve from 27.87501
Epoch 96/1000
Epoch 00096: val_loss did not improve from 27.87501
Epoch 97/1000


Epoch 00097: val_loss did not improve from 27.87501
Epoch 98/1000
Epoch 00098: val_loss improved from 27.87501 to 27.87501, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 99/1000
Epoch 00099: val_loss did not improve from 27.87501


Epoch 100/1000
Epoch 00100: val_loss did not improve from 27.87501
Epoch 101/1000
Epoch 00101: val_loss did not improve from 27.87501
Epoch 102/1000


Epoch 00102: val_loss did not improve from 27.87501
Epoch 103/1000
Epoch 00103: val_loss did not improve from 27.87501
Epoch 104/1000


Epoch 00104: val_loss did not improve from 27.87501
Epoch 105/1000
Epoch 00105: val_loss did not improve from 27.87501
Epoch 106/1000


Epoch 00106: val_loss did not improve from 27.87501
Epoch 107/1000
Epoch 00107: val_loss did not improve from 27.87501
Epoch 108/1000


Epoch 00108: val_loss did not improve from 27.87501
Epoch 109/1000
Epoch 00109: val_loss did not improve from 27.87501
Epoch 110/1000


Epoch 00110: val_loss did not improve from 27.87501
Epoch 111/1000
Epoch 00111: val_loss did not improve from 27.87501
Epoch 112/1000


Epoch 00112: val_loss did not improve from 27.87501
Epoch 113/1000
Epoch 00113: val_loss did not improve from 27.87501
Epoch 114/1000


Epoch 00114: val_loss did not improve from 27.87501
Epoch 115/1000
Epoch 00115: val_loss did not improve from 27.87501
Epoch 116/1000


Epoch 00116: val_loss improved from 27.87501 to 27.87501, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 117/1000
Epoch 00117: val_loss did not improve from 27.87501


Epoch 118/1000
Epoch 00118: val_loss did not improve from 27.87501
Epoch 119/1000
Epoch 00119: val_loss did not improve from 27.87501
Epoch 120/1000


Epoch 00120: val_loss did not improve from 27.87501
Epoch 121/1000
Epoch 00121: val_loss did not improve from 27.87501
Epoch 122/1000


Epoch 00122: val_loss did not improve from 27.87501
Epoch 123/1000
Epoch 00123: val_loss did not improve from 27.87501
Epoch 124/1000


Epoch 00124: val_loss did not improve from 27.87501
Epoch 125/1000
Epoch 00125: val_loss did not improve from 27.87501
Epoch 126/1000


Epoch 00126: val_loss did not improve from 27.87501
Epoch 127/1000
Epoch 00127: val_loss improved from 27.87501 to 27.87501, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 128/1000
Epoch 00128: val_loss did not improve from 27.87501


Epoch 129/1000
Epoch 00129: val_loss did not improve from 27.87501
Epoch 130/1000
Epoch 00130: val_loss did not improve from 27.87501
Epoch 131/1000


Epoch 00131: val_loss did not improve from 27.87501
Epoch 132/1000
Epoch 00132: val_loss did not improve from 27.87501
Epoch 133/1000


Epoch 00133: val_loss did not improve from 27.87501
Epoch 134/1000
Epoch 00134: val_loss did not improve from 27.87501
Epoch 135/1000


Epoch 00135: val_loss did not improve from 27.87501
Epoch 136/1000
Epoch 00136: val_loss improved from 27.87501 to 27.87500, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 137/1000
Epoch 00137: val_loss did not improve from 27.87500


Epoch 138/1000
Epoch 00138: val_loss did not improve from 27.87500
Epoch 139/1000
Epoch 00139: val_loss did not improve from 27.87500
Epoch 140/1000


Epoch 00140: val_loss did not improve from 27.87500
Epoch 141/1000
Epoch 00141: val_loss did not improve from 27.87500
Epoch 142/1000


Epoch 00142: val_loss did not improve from 27.87500
Epoch 143/1000
Epoch 00143: val_loss did not improve from 27.87500
Epoch 144/1000


Epoch 00144: val_loss did not improve from 27.87500
Epoch 145/1000
Epoch 00145: val_loss did not improve from 27.87500
Epoch 146/1000


Epoch 00146: val_loss did not improve from 27.87500
Epoch 147/1000
Epoch 00147: val_loss did not improve from 27.87500
Epoch 148/1000


Epoch 00148: val_loss did not improve from 27.87500
Epoch 149/1000
Epoch 00149: val_loss did not improve from 27.87500
Epoch 150/1000


Epoch 00150: val_loss did not improve from 27.87500
Epoch 151/1000
Epoch 00151: val_loss did not improve from 27.87500
Epoch 152/1000


Epoch 00152: val_loss did not improve from 27.87500
Epoch 153/1000
Epoch 00153: val_loss did not improve from 27.87500
Epoch 154/1000


Epoch 00154: val_loss did not improve from 27.87500
Epoch 155/1000
Epoch 00155: val_loss did not improve from 27.87500
Epoch 156/1000


Epoch 00156: val_loss did not improve from 27.87500
Epoch 157/1000
Epoch 00157: val_loss did not improve from 27.87500
Epoch 158/1000


Epoch 00158: val_loss did not improve from 27.87500
Epoch 159/1000
Epoch 00159: val_loss did not improve from 27.87500
Epoch 160/1000


Epoch 00160: val_loss did not improve from 27.87500
Epoch 161/1000
Epoch 00161: val_loss did not improve from 27.87500
Epoch 162/1000


Epoch 00162: val_loss did not improve from 27.87500
Epoch 163/1000
Epoch 00163: val_loss did not improve from 27.87500
Epoch 164/1000


Epoch 00164: val_loss did not improve from 27.87500
Epoch 165/1000
Epoch 00165: val_loss did not improve from 27.87500
Epoch 166/1000


Epoch 00166: val_loss did not improve from 27.87500
Epoch 167/1000
Epoch 00167: val_loss did not improve from 27.87500
Epoch 168/1000


Epoch 00168: val_loss did not improve from 27.87500
Epoch 169/1000
Epoch 00169: val_loss did not improve from 27.87500
Epoch 170/1000


Epoch 00170: val_loss improved from 27.87500 to 27.87500, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 171/1000
Epoch 00171: val_loss did not improve from 27.87500


Epoch 172/1000
Epoch 00172: val_loss did not improve from 27.87500
Epoch 173/1000
Epoch 00173: val_loss did not improve from 27.87500
Epoch 174/1000


Epoch 00174: val_loss did not improve from 27.87500
Epoch 175/1000
Epoch 00175: val_loss did not improve from 27.87500
Epoch 176/1000


Epoch 00176: val_loss did not improve from 27.87500
Epoch 177/1000
Epoch 00177: val_loss did not improve from 27.87500
Epoch 178/1000


Epoch 00178: val_loss improved from 27.87500 to 27.87500, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 179/1000
Epoch 00179: val_loss did not improve from 27.87500


Epoch 180/1000
Epoch 00180: val_loss did not improve from 27.87500
Epoch 181/1000
Epoch 00181: val_loss did not improve from 27.87500
Epoch 182/1000


Epoch 00182: val_loss did not improve from 27.87500
Epoch 183/1000
Epoch 00183: val_loss did not improve from 27.87500
Epoch 184/1000


Epoch 00184: val_loss did not improve from 27.87500
Epoch 185/1000
Epoch 00185: val_loss did not improve from 27.87500
Epoch 186/1000


Epoch 00186: val_loss did not improve from 27.87500
Epoch 187/1000
Epoch 00187: val_loss did not improve from 27.87500
Epoch 188/1000


Epoch 00188: val_loss did not improve from 27.87500
Epoch 189/1000
Epoch 00189: val_loss did not improve from 27.87500
Epoch 190/1000


Epoch 00190: val_loss did not improve from 27.87500
Epoch 191/1000
Epoch 00191: val_loss did not improve from 27.87500
Epoch 192/1000


Epoch 00192: val_loss did not improve from 27.87500
Epoch 193/1000
Epoch 00193: val_loss did not improve from 27.87500
Epoch 194/1000


Epoch 00194: val_loss did not improve from 27.87500
Epoch 195/1000
Epoch 00195: val_loss did not improve from 27.87500
Epoch 196/1000


Epoch 00196: val_loss did not improve from 27.87500
Epoch 197/1000
Epoch 00197: val_loss did not improve from 27.87500
Epoch 198/1000


Epoch 00198: val_loss did not improve from 27.87500
Epoch 199/1000
Epoch 00199: val_loss did not improve from 27.87500
Epoch 200/1000


Epoch 00200: val_loss did not improve from 27.87500
Epoch 201/1000
Epoch 00201: val_loss did not improve from 27.87500
Epoch 202/1000


Epoch 00202: val_loss did not improve from 27.87500
Epoch 203/1000
Epoch 00203: val_loss did not improve from 27.87500
Epoch 204/1000


Epoch 00204: val_loss did not improve from 27.87500
Epoch 205/1000
Epoch 00205: val_loss did not improve from 27.87500
Epoch 206/1000


Epoch 00206: val_loss did not improve from 27.87500
Epoch 207/1000
Epoch 00207: val_loss did not improve from 27.87500
Epoch 208/1000


Epoch 00208: val_loss did not improve from 27.87500
Epoch 209/1000
Epoch 00209: val_loss did not improve from 27.87500
Epoch 210/1000


Epoch 00210: val_loss did not improve from 27.87500
Epoch 211/1000
Epoch 00211: val_loss did not improve from 27.87500
Epoch 212/1000


Epoch 00212: val_loss did not improve from 27.87500
Epoch 213/1000
Epoch 00213: val_loss did not improve from 27.87500
Epoch 214/1000


Epoch 00214: val_loss did not improve from 27.87500
Epoch 215/1000
Epoch 00215: val_loss did not improve from 27.87500
Epoch 216/1000


Epoch 00216: val_loss did not improve from 27.87500
Epoch 217/1000
Epoch 00217: val_loss did not improve from 27.87500
Epoch 218/1000


Epoch 00218: val_loss did not improve from 27.87500
Epoch 219/1000
Epoch 00219: val_loss did not improve from 27.87500
Epoch 220/1000


Epoch 00220: val_loss did not improve from 27.87500
Epoch 221/1000
Epoch 00221: val_loss did not improve from 27.87500
Epoch 222/1000


Epoch 00222: val_loss did not improve from 27.87500
Epoch 223/1000
Epoch 00223: val_loss did not improve from 27.87500
Epoch 224/1000


Epoch 00224: val_loss did not improve from 27.87500
Epoch 225/1000
Epoch 00225: val_loss did not improve from 27.87500
Epoch 226/1000


Epoch 00226: val_loss did not improve from 27.87500
Epoch 227/1000
Epoch 00227: val_loss did not improve from 27.87500
Epoch 228/1000


Epoch 00228: val_loss did not improve from 27.87500
Epoch 229/1000
Epoch 00229: val_loss did not improve from 27.87500
Epoch 230/1000


Epoch 00230: val_loss did not improve from 27.87500
Epoch 231/1000
Epoch 00231: val_loss did not improve from 27.87500
Epoch 232/1000


Epoch 00232: val_loss did not improve from 27.87500
Epoch 233/1000
Epoch 00233: val_loss did not improve from 27.87500
Epoch 234/1000


Epoch 00234: val_loss did not improve from 27.87500
Epoch 235/1000
Epoch 00235: val_loss did not improve from 27.87500
Epoch 236/1000


Epoch 00236: val_loss did not improve from 27.87500
Epoch 237/1000
Epoch 00237: val_loss did not improve from 27.87500
Epoch 238/1000


Epoch 00238: val_loss did not improve from 27.87500
Epoch 239/1000
Epoch 00239: val_loss did not improve from 27.87500
Epoch 240/1000


Epoch 00240: val_loss did not improve from 27.87500
Epoch 241/1000
Epoch 00241: val_loss did not improve from 27.87500
Epoch 242/1000


Epoch 00242: val_loss did not improve from 27.87500
Epoch 243/1000
Epoch 00243: val_loss did not improve from 27.87500
Epoch 244/1000


Epoch 00244: val_loss did not improve from 27.87500
Epoch 245/1000
Epoch 00245: val_loss did not improve from 27.87500
Epoch 246/1000


Epoch 00246: val_loss did not improve from 27.87500
Epoch 247/1000
Epoch 00247: val_loss did not improve from 27.87500
Epoch 248/1000


Epoch 00248: val_loss did not improve from 27.87500
Epoch 249/1000
Epoch 00249: val_loss did not improve from 27.87500
Epoch 250/1000


Epoch 00250: val_loss did not improve from 27.87500
Epoch 251/1000
Epoch 00251: val_loss did not improve from 27.87500
Epoch 252/1000


Epoch 00252: val_loss did not improve from 27.87500
Epoch 253/1000
Epoch 00253: val_loss did not improve from 27.87500
Epoch 254/1000


Epoch 00254: val_loss did not improve from 27.87500
Epoch 255/1000
Epoch 00255: val_loss did not improve from 27.87500
Epoch 256/1000


Epoch 00256: val_loss did not improve from 27.87500
Epoch 257/1000
Epoch 00257: val_loss did not improve from 27.87500
Epoch 258/1000


Epoch 00258: val_loss did not improve from 27.87500
Epoch 259/1000
Epoch 00259: val_loss did not improve from 27.87500
Epoch 260/1000


Epoch 00260: val_loss did not improve from 27.87500
Epoch 261/1000
Epoch 00261: val_loss did not improve from 27.87500
Epoch 262/1000


Epoch 00262: val_loss did not improve from 27.87500
Epoch 263/1000
Epoch 00263: val_loss did not improve from 27.87500
Epoch 264/1000


Epoch 00264: val_loss did not improve from 27.87500
Epoch 265/1000
Epoch 00265: val_loss did not improve from 27.87500
Epoch 266/1000


Epoch 00266: val_loss did not improve from 27.87500
Epoch 267/1000
Epoch 00267: val_loss did not improve from 27.87500
Epoch 268/1000


Epoch 00268: val_loss did not improve from 27.87500
Epoch 269/1000
Epoch 00269: val_loss did not improve from 27.87500
Epoch 270/1000


Epoch 00270: val_loss did not improve from 27.87500
Epoch 271/1000
Epoch 00271: val_loss did not improve from 27.87500
Epoch 272/1000


Epoch 00272: val_loss did not improve from 27.87500
Epoch 273/1000
Epoch 00273: val_loss did not improve from 27.87500
Epoch 274/1000


Epoch 00274: val_loss did not improve from 27.87500
Epoch 275/1000
Epoch 00275: val_loss did not improve from 27.87500
Epoch 276/1000


Epoch 00276: val_loss did not improve from 27.87500
Epoch 277/1000
Epoch 00277: val_loss did not improve from 27.87500
Epoch 278/1000


Epoch 00278: val_loss did not improve from 27.87500
Epoch 279/1000
Epoch 00279: val_loss did not improve from 27.87500
Epoch 280/1000


Epoch 00280: val_loss did not improve from 27.87500
Epoch 281/1000
Epoch 00281: val_loss did not improve from 27.87500
Epoch 282/1000


Epoch 00282: val_loss did not improve from 27.87500
Epoch 283/1000
Epoch 00283: val_loss did not improve from 27.87500
Epoch 284/1000


Epoch 00284: val_loss did not improve from 27.87500
Epoch 285/1000
Epoch 00285: val_loss did not improve from 27.87500
Epoch 286/1000


Epoch 00286: val_loss did not improve from 27.87500
Epoch 287/1000
Epoch 00287: val_loss did not improve from 27.87500
Epoch 288/1000


Epoch 00288: val_loss did not improve from 27.87500
Epoch 289/1000
Epoch 00289: val_loss did not improve from 27.87500
Epoch 290/1000


Epoch 00290: val_loss did not improve from 27.87500
Epoch 291/1000
Epoch 00291: val_loss did not improve from 27.87500
Epoch 292/1000


Epoch 00292: val_loss did not improve from 27.87500
Epoch 293/1000
Epoch 00293: val_loss did not improve from 27.87500
Epoch 294/1000


Epoch 00294: val_loss did not improve from 27.87500
Epoch 295/1000
Epoch 00295: val_loss did not improve from 27.87500
Epoch 296/1000


Epoch 00296: val_loss did not improve from 27.87500
Epoch 297/1000
Epoch 00297: val_loss did not improve from 27.87500
Epoch 298/1000


Epoch 00298: val_loss did not improve from 27.87500
Epoch 299/1000
Epoch 00299: val_loss did not improve from 27.87500
Epoch 300/1000


Epoch 00300: val_loss did not improve from 27.87500
Epoch 301/1000
Epoch 00301: val_loss did not improve from 27.87500
Epoch 302/1000


Epoch 00302: val_loss did not improve from 27.87500
Epoch 303/1000
Epoch 00303: val_loss did not improve from 27.87500
Epoch 304/1000


Epoch 00304: val_loss did not improve from 27.87500
Epoch 305/1000
Epoch 00305: val_loss did not improve from 27.87500
Epoch 306/1000


Epoch 00306: val_loss did not improve from 27.87500
Epoch 307/1000
Epoch 00307: val_loss did not improve from 27.87500
Epoch 308/1000


Epoch 00308: val_loss did not improve from 27.87500
Epoch 309/1000
Epoch 00309: val_loss did not improve from 27.87500
Epoch 310/1000


Epoch 00310: val_loss did not improve from 27.87500
Epoch 311/1000
Epoch 00311: val_loss did not improve from 27.87500
Epoch 312/1000


Epoch 00312: val_loss did not improve from 27.87500
Epoch 313/1000
Epoch 00313: val_loss did not improve from 27.87500
Epoch 314/1000


Epoch 00314: val_loss did not improve from 27.87500
Epoch 315/1000
Epoch 00315: val_loss did not improve from 27.87500
Epoch 316/1000


Epoch 00316: val_loss did not improve from 27.87500
Epoch 317/1000
Epoch 00317: val_loss did not improve from 27.87500
Epoch 318/1000


Epoch 00318: val_loss did not improve from 27.87500
Epoch 319/1000
Epoch 00319: val_loss did not improve from 27.87500
Epoch 320/1000


Epoch 00320: val_loss did not improve from 27.87500
Epoch 321/1000
Epoch 00321: val_loss did not improve from 27.87500
Epoch 322/1000


Epoch 00322: val_loss did not improve from 27.87500
Epoch 323/1000
Epoch 00323: val_loss did not improve from 27.87500
Epoch 324/1000


Epoch 00324: val_loss did not improve from 27.87500
Epoch 325/1000
Epoch 00325: val_loss did not improve from 27.87500
Epoch 326/1000


Epoch 00326: val_loss did not improve from 27.87500
Epoch 327/1000
Epoch 00327: val_loss did not improve from 27.87500
Epoch 328/1000


Epoch 00328: val_loss did not improve from 27.87500
Epoch 329/1000
Epoch 00329: val_loss did not improve from 27.87500
Epoch 330/1000


Epoch 00330: val_loss did not improve from 27.87500
Epoch 331/1000
Epoch 00331: val_loss did not improve from 27.87500
Epoch 332/1000


Epoch 00332: val_loss did not improve from 27.87500
Epoch 333/1000
Epoch 00333: val_loss did not improve from 27.87500
Epoch 334/1000


Epoch 00334: val_loss did not improve from 27.87500
Epoch 335/1000
Epoch 00335: val_loss did not improve from 27.87500
Epoch 336/1000


Epoch 00336: val_loss did not improve from 27.87500
Epoch 337/1000
Epoch 00337: val_loss did not improve from 27.87500
Epoch 338/1000


Epoch 00338: val_loss did not improve from 27.87500
Epoch 339/1000
Epoch 00339: val_loss did not improve from 27.87500
Epoch 340/1000


Epoch 00340: val_loss did not improve from 27.87500
Epoch 341/1000
Epoch 00341: val_loss did not improve from 27.87500
Epoch 342/1000


Epoch 00342: val_loss did not improve from 27.87500
Epoch 343/1000
Epoch 00343: val_loss did not improve from 27.87500
Epoch 344/1000


Epoch 00344: val_loss did not improve from 27.87500
Epoch 345/1000
Epoch 00345: val_loss did not improve from 27.87500
Epoch 346/1000


Epoch 00346: val_loss did not improve from 27.87500
Epoch 347/1000
Epoch 00347: val_loss did not improve from 27.87500
Epoch 348/1000


Epoch 00348: val_loss did not improve from 27.87500
Epoch 349/1000
Epoch 00349: val_loss did not improve from 27.87500
Epoch 350/1000


Epoch 00350: val_loss did not improve from 27.87500
Epoch 351/1000
Epoch 00351: val_loss did not improve from 27.87500
Epoch 352/1000


Epoch 00352: val_loss did not improve from 27.87500
Epoch 353/1000
Epoch 00353: val_loss did not improve from 27.87500
Epoch 354/1000


Epoch 00354: val_loss did not improve from 27.87500
Epoch 355/1000
Epoch 00355: val_loss did not improve from 27.87500
Epoch 356/1000


Epoch 00356: val_loss did not improve from 27.87500
Epoch 357/1000
Epoch 00357: val_loss did not improve from 27.87500
Epoch 358/1000


Epoch 00358: val_loss did not improve from 27.87500
Epoch 359/1000
Epoch 00359: val_loss did not improve from 27.87500
Epoch 360/1000


Epoch 00360: val_loss did not improve from 27.87500
Epoch 361/1000
Epoch 00361: val_loss did not improve from 27.87500
Epoch 362/1000


Epoch 00362: val_loss did not improve from 27.87500
Epoch 363/1000
Epoch 00363: val_loss did not improve from 27.87500
Epoch 364/1000


Epoch 00364: val_loss did not improve from 27.87500
Epoch 365/1000
Epoch 00365: val_loss did not improve from 27.87500
Epoch 366/1000


Epoch 00366: val_loss did not improve from 27.87500
Epoch 367/1000
Epoch 00367: val_loss did not improve from 27.87500
Epoch 368/1000


Epoch 00368: val_loss did not improve from 27.87500
Epoch 369/1000
Epoch 00369: val_loss did not improve from 27.87500
Epoch 370/1000


Epoch 00370: val_loss did not improve from 27.87500
Epoch 371/1000
Epoch 00371: val_loss did not improve from 27.87500
Epoch 372/1000


Epoch 00372: val_loss did not improve from 27.87500
Epoch 373/1000
Epoch 00373: val_loss did not improve from 27.87500
Epoch 374/1000


Epoch 00374: val_loss did not improve from 27.87500
Epoch 375/1000
Epoch 00375: val_loss did not improve from 27.87500
Epoch 376/1000


Epoch 00376: val_loss did not improve from 27.87500
Epoch 377/1000
Epoch 00377: val_loss did not improve from 27.87500
Epoch 378/1000


Epoch 00378: val_loss did not improve from 27.87500
Epoch 379/1000
Epoch 00379: val_loss did not improve from 27.87500
Epoch 380/1000


Epoch 00380: val_loss did not improve from 27.87500
Epoch 381/1000
Epoch 00381: val_loss did not improve from 27.87500
Epoch 382/1000


Epoch 00382: val_loss did not improve from 27.87500
Epoch 383/1000
Epoch 00383: val_loss did not improve from 27.87500
Epoch 384/1000


Epoch 00384: val_loss did not improve from 27.87500
Epoch 385/1000
Epoch 00385: val_loss did not improve from 27.87500
Epoch 386/1000


Epoch 00386: val_loss improved from 27.87500 to 27.87500, saving model to models/bert_t21_best
INFO:tensorflow:Assets written to: models/bert_t21_best/assets


INFO:tensorflow:Assets written to: models/bert_t21_best/assets


Epoch 387/1000
Epoch 00387: val_loss did not improve from 27.87500


Epoch 388/1000
Epoch 00388: val_loss did not improve from 27.87500
Epoch 389/1000
Epoch 00389: val_loss did not improve from 27.87500
Epoch 390/1000


Epoch 00390: val_loss did not improve from 27.87500
Epoch 391/1000
Epoch 00391: val_loss did not improve from 27.87500
Epoch 392/1000


Epoch 00392: val_loss did not improve from 27.87500
Epoch 393/1000
Epoch 00393: val_loss did not improve from 27.87500
Epoch 394/1000


Epoch 00394: val_loss did not improve from 27.87500
Epoch 395/1000
Epoch 00395: val_loss did not improve from 27.87500
Epoch 396/1000


Epoch 00396: val_loss did not improve from 27.87500
Epoch 397/1000
Epoch 00397: val_loss did not improve from 27.87500
Epoch 398/1000


Epoch 00398: val_loss did not improve from 27.87500
Epoch 399/1000
Epoch 00399: val_loss did not improve from 27.87500
Epoch 400/1000


Epoch 00400: val_loss did not improve from 27.87500
Epoch 401/1000
Epoch 00401: val_loss did not improve from 27.87500
Epoch 402/1000


Epoch 00402: val_loss did not improve from 27.87500
Epoch 403/1000
Epoch 00403: val_loss did not improve from 27.87500
Epoch 404/1000


Epoch 00404: val_loss did not improve from 27.87500
Epoch 405/1000
Epoch 00405: val_loss did not improve from 27.87500
Epoch 406/1000


Epoch 00406: val_loss did not improve from 27.87500
Epoch 407/1000
Epoch 00407: val_loss did not improve from 27.87500
Epoch 408/1000


Epoch 00408: val_loss did not improve from 27.87500
Epoch 409/1000
Epoch 00409: val_loss did not improve from 27.87500
Epoch 410/1000


Epoch 00410: val_loss did not improve from 27.87500
Epoch 411/1000
Epoch 00411: val_loss did not improve from 27.87500
Epoch 412/1000


Epoch 00412: val_loss did not improve from 27.87500
Epoch 413/1000
Epoch 00413: val_loss did not improve from 27.87500
Epoch 414/1000


Epoch 00414: val_loss did not improve from 27.87500
Epoch 415/1000
Epoch 00415: val_loss did not improve from 27.87500
Epoch 416/1000


Epoch 00416: val_loss did not improve from 27.87500
Epoch 417/1000
Epoch 00417: val_loss did not improve from 27.87500
Epoch 418/1000


Epoch 00418: val_loss did not improve from 27.87500
Epoch 419/1000
Epoch 00419: val_loss did not improve from 27.87500
Epoch 420/1000


Epoch 00420: val_loss did not improve from 27.87500
Epoch 421/1000
Epoch 00421: val_loss did not improve from 27.87500
Epoch 422/1000


Epoch 00422: val_loss did not improve from 27.87500
Epoch 423/1000
Epoch 00423: val_loss did not improve from 27.87500
Epoch 424/1000


Epoch 00424: val_loss did not improve from 27.87500
Epoch 425/1000
Epoch 00425: val_loss did not improve from 27.87500
Epoch 426/1000


Epoch 00426: val_loss did not improve from 27.87500
Epoch 427/1000
Epoch 00427: val_loss did not improve from 27.87500
Epoch 428/1000


Epoch 00428: val_loss did not improve from 27.87500
Epoch 429/1000
Epoch 00429: val_loss did not improve from 27.87500
Epoch 430/1000


Epoch 00430: val_loss did not improve from 27.87500
Epoch 431/1000
Epoch 00431: val_loss did not improve from 27.87500
Epoch 432/1000


Epoch 00432: val_loss did not improve from 27.87500
Epoch 433/1000
Epoch 00433: val_loss did not improve from 27.87500
Epoch 434/1000


Epoch 00434: val_loss did not improve from 27.87500
Epoch 435/1000
Epoch 00435: val_loss did not improve from 27.87500
Epoch 436/1000


Epoch 00436: val_loss did not improve from 27.87500
Epoch 437/1000
Epoch 00437: val_loss did not improve from 27.87500
Epoch 438/1000


Epoch 00438: val_loss did not improve from 27.87500
Epoch 439/1000
Epoch 00439: val_loss did not improve from 27.87500
Epoch 440/1000


Epoch 00440: val_loss did not improve from 27.87500
Epoch 441/1000
Epoch 00441: val_loss did not improve from 27.87500
Epoch 442/1000


Epoch 00442: val_loss did not improve from 27.87500
Epoch 443/1000
Epoch 00443: val_loss did not improve from 27.87500
Epoch 444/1000


Epoch 00444: val_loss did not improve from 27.87500
Epoch 445/1000
Epoch 00445: val_loss did not improve from 27.87500
Epoch 446/1000


Epoch 00446: val_loss did not improve from 27.87500
Epoch 447/1000
Epoch 00447: val_loss did not improve from 27.87500
Epoch 448/1000


Epoch 00448: val_loss did not improve from 27.87500
Epoch 449/1000
Epoch 00449: val_loss did not improve from 27.87500
Epoch 450/1000


Epoch 00450: val_loss did not improve from 27.87500
Epoch 451/1000
Epoch 00451: val_loss did not improve from 27.87500
Epoch 452/1000


Epoch 00452: val_loss did not improve from 27.87500
Epoch 453/1000
Epoch 00453: val_loss did not improve from 27.87500
Epoch 454/1000


Epoch 00454: val_loss did not improve from 27.87500
Epoch 455/1000
Epoch 00455: val_loss did not improve from 27.87500
Epoch 456/1000


Epoch 00456: val_loss did not improve from 27.87500
Epoch 457/1000
Epoch 00457: val_loss did not improve from 27.87500
Epoch 458/1000


Epoch 00458: val_loss did not improve from 27.87500
Epoch 459/1000
Epoch 00459: val_loss did not improve from 27.87500
Epoch 460/1000


Epoch 00460: val_loss did not improve from 27.87500
Epoch 461/1000
Epoch 00461: val_loss did not improve from 27.87500
Epoch 462/1000


Epoch 00462: val_loss did not improve from 27.87500
Epoch 463/1000
Epoch 00463: val_loss did not improve from 27.87500
Epoch 464/1000


Epoch 00464: val_loss did not improve from 27.87500
Epoch 465/1000
Epoch 00465: val_loss did not improve from 27.87500
Epoch 466/1000


Epoch 00466: val_loss did not improve from 27.87500
Epoch 467/1000
Epoch 00467: val_loss did not improve from 27.87500
Epoch 468/1000


Epoch 00468: val_loss did not improve from 27.87500
Epoch 469/1000
Epoch 00469: val_loss did not improve from 27.87500
Epoch 470/1000


Epoch 00470: val_loss did not improve from 27.87500
Epoch 471/1000
Epoch 00471: val_loss did not improve from 27.87500
Epoch 472/1000


Epoch 00472: val_loss did not improve from 27.87500
Epoch 473/1000
Epoch 00473: val_loss did not improve from 27.87500
Epoch 474/1000


Epoch 00474: val_loss did not improve from 27.87500
Epoch 475/1000
Epoch 00475: val_loss did not improve from 27.87500
Epoch 476/1000


Epoch 00476: val_loss did not improve from 27.87500
Epoch 477/1000
Epoch 00477: val_loss did not improve from 27.87500
Epoch 478/1000


Epoch 00478: val_loss did not improve from 27.87500
Epoch 479/1000
Epoch 00479: val_loss did not improve from 27.87500
Epoch 480/1000


Epoch 00480: val_loss did not improve from 27.87500
Epoch 481/1000
Epoch 00481: val_loss did not improve from 27.87500
Epoch 482/1000


Epoch 00482: val_loss did not improve from 27.87500
Epoch 483/1000
Epoch 00483: val_loss did not improve from 27.87500
Epoch 484/1000


Epoch 00484: val_loss did not improve from 27.87500
Epoch 485/1000
Epoch 00485: val_loss did not improve from 27.87500
Epoch 486/1000


Epoch 00486: val_loss did not improve from 27.87500
Epoch 487/1000
Epoch 00487: val_loss did not improve from 27.87500
Epoch 488/1000


Epoch 00488: val_loss did not improve from 27.87500
Epoch 489/1000
Epoch 00489: val_loss did not improve from 27.87500
Epoch 490/1000


Epoch 00490: val_loss did not improve from 27.87500
Epoch 491/1000
Epoch 00491: val_loss did not improve from 27.87500
Epoch 492/1000


Epoch 00492: val_loss did not improve from 27.87500
Epoch 493/1000
Epoch 00493: val_loss did not improve from 27.87500
Epoch 494/1000


Epoch 00494: val_loss did not improve from 27.87500
Epoch 495/1000
Epoch 00495: val_loss did not improve from 27.87500
Epoch 496/1000


Epoch 00496: val_loss did not improve from 27.87500
Epoch 497/1000
Epoch 00497: val_loss did not improve from 27.87500
Epoch 498/1000


Epoch 00498: val_loss did not improve from 27.87500
Epoch 499/1000
Epoch 00499: val_loss did not improve from 27.87500
Epoch 500/1000


Epoch 00500: val_loss did not improve from 27.87500
Epoch 501/1000
Epoch 00501: val_loss did not improve from 27.87500
Epoch 502/1000


Epoch 00502: val_loss did not improve from 27.87500
Epoch 503/1000
Epoch 00503: val_loss did not improve from 27.87500
Epoch 504/1000


Epoch 00504: val_loss did not improve from 27.87500
Epoch 505/1000
Epoch 00505: val_loss did not improve from 27.87500
Epoch 506/1000


Epoch 00506: val_loss did not improve from 27.87500
Epoch 507/1000
Epoch 00507: val_loss did not improve from 27.87500
Epoch 508/1000


Epoch 00508: val_loss did not improve from 27.87500
Epoch 509/1000
Epoch 00509: val_loss did not improve from 27.87500
Epoch 510/1000


Epoch 00510: val_loss did not improve from 27.87500
Epoch 511/1000
Epoch 00511: val_loss did not improve from 27.87500
Epoch 512/1000


Epoch 00512: val_loss did not improve from 27.87500
Epoch 513/1000
Epoch 00513: val_loss did not improve from 27.87500
Epoch 514/1000


Epoch 00514: val_loss did not improve from 27.87500
Epoch 515/1000
Epoch 00515: val_loss did not improve from 27.87500
Epoch 516/1000


Epoch 00516: val_loss did not improve from 27.87500
Epoch 517/1000
Epoch 00517: val_loss did not improve from 27.87500
Epoch 518/1000


Epoch 00518: val_loss did not improve from 27.87500
Epoch 519/1000
Epoch 00519: val_loss did not improve from 27.87500
Epoch 520/1000


Epoch 00520: val_loss did not improve from 27.87500
Epoch 521/1000
Epoch 00521: val_loss did not improve from 27.87500
Epoch 522/1000


Epoch 00522: val_loss did not improve from 27.87500
Epoch 523/1000
Epoch 00523: val_loss did not improve from 27.87500
Epoch 524/1000


Epoch 00524: val_loss did not improve from 27.87500
Epoch 525/1000
Epoch 00525: val_loss did not improve from 27.87500
Epoch 526/1000


Epoch 00526: val_loss did not improve from 27.87500
Epoch 527/1000
Epoch 00527: val_loss did not improve from 27.87500
Epoch 528/1000


Epoch 00528: val_loss did not improve from 27.87500
Epoch 529/1000
Epoch 00529: val_loss did not improve from 27.87500
Epoch 530/1000


Epoch 00530: val_loss did not improve from 27.87500
Epoch 531/1000
Epoch 00531: val_loss did not improve from 27.87500
Epoch 532/1000


Epoch 00532: val_loss did not improve from 27.87500
Epoch 533/1000
Epoch 00533: val_loss did not improve from 27.87500
Epoch 534/1000


Epoch 00534: val_loss did not improve from 27.87500
Epoch 535/1000
Epoch 00535: val_loss did not improve from 27.87500
Epoch 536/1000


Epoch 00536: val_loss did not improve from 27.87500
Epoch 537/1000
Epoch 00537: val_loss did not improve from 27.87500
Epoch 538/1000


Epoch 00538: val_loss did not improve from 27.87500
Epoch 539/1000
Epoch 00539: val_loss did not improve from 27.87500
Epoch 540/1000


Epoch 00540: val_loss did not improve from 27.87500
Epoch 541/1000
Epoch 00541: val_loss did not improve from 27.87500
Epoch 542/1000


Epoch 00542: val_loss did not improve from 27.87500
Epoch 543/1000
Epoch 00543: val_loss did not improve from 27.87500
Epoch 544/1000


Epoch 00544: val_loss did not improve from 27.87500
Epoch 545/1000
Epoch 00545: val_loss did not improve from 27.87500
Epoch 546/1000


Epoch 00546: val_loss did not improve from 27.87500
Epoch 547/1000
Epoch 00547: val_loss did not improve from 27.87500
Epoch 548/1000


Epoch 00548: val_loss did not improve from 27.87500
Epoch 549/1000
Epoch 00549: val_loss did not improve from 27.87500
Epoch 550/1000


Epoch 00550: val_loss did not improve from 27.87500
Epoch 551/1000
Epoch 00551: val_loss did not improve from 27.87500
Epoch 552/1000


Epoch 00552: val_loss did not improve from 27.87500
Epoch 553/1000
Epoch 00553: val_loss did not improve from 27.87500
Epoch 554/1000


Epoch 00554: val_loss did not improve from 27.87500
Epoch 555/1000
Epoch 00555: val_loss did not improve from 27.87500
Epoch 556/1000


Epoch 00556: val_loss did not improve from 27.87500
Epoch 557/1000
Epoch 00557: val_loss did not improve from 27.87500
Epoch 558/1000


Epoch 00558: val_loss did not improve from 27.87500
Epoch 559/1000
Epoch 00559: val_loss did not improve from 27.87500
Epoch 560/1000


Epoch 00560: val_loss did not improve from 27.87500
Epoch 561/1000
Epoch 00561: val_loss did not improve from 27.87500
Epoch 562/1000


Epoch 00562: val_loss did not improve from 27.87500
Epoch 563/1000
Epoch 00563: val_loss did not improve from 27.87500
Epoch 564/1000


Epoch 00564: val_loss did not improve from 27.87500
Epoch 565/1000
Epoch 00565: val_loss did not improve from 27.87500
Epoch 566/1000


Epoch 00566: val_loss did not improve from 27.87500
Epoch 567/1000
Epoch 00567: val_loss did not improve from 27.87500
Epoch 568/1000


Epoch 00568: val_loss did not improve from 27.87500
Epoch 569/1000
Epoch 00569: val_loss did not improve from 27.87500
Epoch 570/1000


Epoch 00570: val_loss did not improve from 27.87500
Epoch 571/1000
Epoch 00571: val_loss did not improve from 27.87500
Epoch 572/1000


Epoch 00572: val_loss did not improve from 27.87500
Epoch 573/1000
Epoch 00573: val_loss did not improve from 27.87500
Epoch 574/1000


Epoch 00574: val_loss did not improve from 27.87500
Epoch 575/1000
Epoch 00575: val_loss did not improve from 27.87500
Epoch 576/1000


Epoch 00576: val_loss did not improve from 27.87500
Epoch 577/1000
Epoch 00577: val_loss did not improve from 27.87500
Epoch 578/1000


Epoch 00578: val_loss did not improve from 27.87500
Epoch 579/1000
Epoch 00579: val_loss did not improve from 27.87500
Epoch 580/1000


Epoch 00580: val_loss did not improve from 27.87500
Epoch 581/1000
Epoch 00581: val_loss did not improve from 27.87500
Epoch 582/1000


Epoch 00582: val_loss did not improve from 27.87500
Epoch 583/1000
Epoch 00583: val_loss did not improve from 27.87500
Epoch 584/1000


Epoch 00584: val_loss did not improve from 27.87500
Epoch 585/1000
Epoch 00585: val_loss did not improve from 27.87500
Epoch 586/1000


Epoch 00586: val_loss did not improve from 27.87500
Epoch 587/1000
Epoch 00587: val_loss did not improve from 27.87500
Epoch 588/1000


Epoch 00588: val_loss did not improve from 27.87500
Epoch 589/1000
Epoch 00589: val_loss did not improve from 27.87500
Epoch 590/1000


Epoch 00590: val_loss did not improve from 27.87500
Epoch 591/1000
Epoch 00591: val_loss did not improve from 27.87500
Epoch 592/1000


Epoch 00592: val_loss did not improve from 27.87500
Epoch 593/1000
Epoch 00593: val_loss did not improve from 27.87500
Epoch 594/1000


Epoch 00594: val_loss did not improve from 27.87500
Epoch 595/1000
Epoch 00595: val_loss did not improve from 27.87500
Epoch 596/1000


Epoch 00596: val_loss did not improve from 27.87500
Epoch 597/1000
Epoch 00597: val_loss did not improve from 27.87500
Epoch 598/1000


Epoch 00598: val_loss did not improve from 27.87500
Epoch 599/1000
Epoch 00599: val_loss did not improve from 27.87500
Epoch 600/1000


Epoch 00600: val_loss did not improve from 27.87500
Epoch 601/1000
Epoch 00601: val_loss did not improve from 27.87500
Epoch 602/1000


Epoch 00602: val_loss did not improve from 27.87500
Epoch 603/1000
Epoch 00603: val_loss did not improve from 27.87500
Epoch 604/1000


Epoch 00604: val_loss did not improve from 27.87500
Epoch 605/1000
Epoch 00605: val_loss did not improve from 27.87500
Epoch 606/1000


Epoch 00606: val_loss did not improve from 27.87500
Epoch 607/1000
Epoch 00607: val_loss did not improve from 27.87500
Epoch 608/1000


Epoch 00608: val_loss did not improve from 27.87500
Epoch 609/1000
Epoch 00609: val_loss did not improve from 27.87500
Epoch 610/1000


Epoch 00610: val_loss did not improve from 27.87500
Epoch 611/1000
Epoch 00611: val_loss did not improve from 27.87500
Epoch 612/1000


Epoch 00612: val_loss did not improve from 27.87500
Epoch 613/1000
Epoch 00613: val_loss did not improve from 27.87500
Epoch 614/1000


Epoch 00614: val_loss did not improve from 27.87500
Epoch 615/1000
Epoch 00615: val_loss did not improve from 27.87500
Epoch 616/1000


Epoch 00616: val_loss did not improve from 27.87500
Epoch 617/1000
Epoch 00617: val_loss did not improve from 27.87500
Epoch 618/1000


Epoch 00618: val_loss did not improve from 27.87500
Epoch 619/1000
Epoch 00619: val_loss did not improve from 27.87500
Epoch 620/1000


Epoch 00620: val_loss did not improve from 27.87500
Epoch 621/1000
Epoch 00621: val_loss did not improve from 27.87500
Epoch 622/1000


Epoch 00622: val_loss did not improve from 27.87500
Epoch 623/1000
Epoch 00623: val_loss did not improve from 27.87500
Epoch 624/1000


Epoch 00624: val_loss did not improve from 27.87500
Epoch 625/1000
Epoch 00625: val_loss did not improve from 27.87500
Epoch 626/1000


Epoch 00626: val_loss did not improve from 27.87500
Epoch 627/1000
Epoch 00627: val_loss did not improve from 27.87500
Epoch 628/1000


Epoch 00628: val_loss did not improve from 27.87500
Epoch 629/1000
Epoch 00629: val_loss did not improve from 27.87500
Epoch 630/1000


Epoch 00630: val_loss did not improve from 27.87500
Epoch 631/1000
Epoch 00631: val_loss did not improve from 27.87500
Epoch 632/1000


Epoch 00632: val_loss did not improve from 27.87500
Epoch 633/1000
Epoch 00633: val_loss did not improve from 27.87500
Epoch 634/1000


Epoch 00634: val_loss did not improve from 27.87500
Epoch 635/1000
Epoch 00635: val_loss did not improve from 27.87500
Epoch 636/1000


Epoch 00636: val_loss did not improve from 27.87500
Epoch 637/1000
Epoch 00637: val_loss did not improve from 27.87500
Epoch 638/1000


Epoch 00638: val_loss did not improve from 27.87500
Epoch 639/1000
Epoch 00639: val_loss did not improve from 27.87500
Epoch 640/1000


Epoch 00640: val_loss did not improve from 27.87500
Epoch 641/1000
Epoch 00641: val_loss did not improve from 27.87500
Epoch 642/1000


Epoch 00642: val_loss did not improve from 27.87500
Epoch 643/1000
Epoch 00643: val_loss did not improve from 27.87500
Epoch 644/1000


Epoch 00644: val_loss did not improve from 27.87500
Epoch 645/1000
Epoch 00645: val_loss did not improve from 27.87500
Epoch 646/1000


Epoch 00646: val_loss did not improve from 27.87500
Epoch 647/1000
Epoch 00647: val_loss did not improve from 27.87500
Epoch 648/1000


Epoch 00648: val_loss did not improve from 27.87500
Epoch 649/1000
Epoch 00649: val_loss did not improve from 27.87500
Epoch 650/1000


Epoch 00650: val_loss did not improve from 27.87500
Epoch 651/1000
Epoch 00651: val_loss did not improve from 27.87500
Epoch 652/1000


Epoch 00652: val_loss did not improve from 27.87500
Epoch 653/1000
Epoch 00653: val_loss did not improve from 27.87500
Epoch 654/1000


Epoch 00654: val_loss did not improve from 27.87500
Epoch 655/1000
Epoch 00655: val_loss did not improve from 27.87500
Epoch 656/1000


Epoch 00656: val_loss did not improve from 27.87500
Epoch 657/1000
Epoch 00657: val_loss did not improve from 27.87500
Epoch 658/1000


Epoch 00658: val_loss did not improve from 27.87500
Epoch 659/1000
Epoch 00659: val_loss did not improve from 27.87500
Epoch 660/1000


Epoch 00660: val_loss did not improve from 27.87500
Epoch 661/1000
Epoch 00661: val_loss did not improve from 27.87500
Epoch 662/1000


Epoch 00662: val_loss did not improve from 27.87500
Epoch 663/1000
Epoch 00663: val_loss did not improve from 27.87500
Epoch 664/1000


Epoch 00664: val_loss did not improve from 27.87500
Epoch 665/1000
Epoch 00665: val_loss did not improve from 27.87500
Epoch 666/1000


Epoch 00666: val_loss did not improve from 27.87500
Epoch 667/1000
Epoch 00667: val_loss did not improve from 27.87500
Epoch 668/1000


Epoch 00668: val_loss did not improve from 27.87500
Epoch 669/1000
Epoch 00669: val_loss did not improve from 27.87500
Epoch 670/1000


Epoch 00670: val_loss did not improve from 27.87500
Epoch 671/1000
Epoch 00671: val_loss did not improve from 27.87500
Epoch 672/1000


Epoch 00672: val_loss did not improve from 27.87500
Epoch 673/1000
Epoch 00673: val_loss did not improve from 27.87500
Epoch 674/1000


Epoch 00674: val_loss did not improve from 27.87500
Epoch 675/1000
Epoch 00675: val_loss did not improve from 27.87500
Epoch 676/1000


Epoch 00676: val_loss did not improve from 27.87500
Epoch 677/1000
Epoch 00677: val_loss did not improve from 27.87500
Epoch 678/1000


Epoch 00678: val_loss did not improve from 27.87500
Epoch 679/1000
Epoch 00679: val_loss did not improve from 27.87500
Epoch 680/1000


Epoch 00680: val_loss did not improve from 27.87500
Epoch 681/1000
Epoch 00681: val_loss did not improve from 27.87500
Epoch 682/1000


Epoch 00682: val_loss did not improve from 27.87500
Epoch 683/1000
Epoch 00683: val_loss did not improve from 27.87500
Epoch 684/1000


Epoch 00684: val_loss did not improve from 27.87500
Epoch 685/1000
Epoch 00685: val_loss did not improve from 27.87500
Epoch 686/1000


Epoch 00686: val_loss did not improve from 27.87500
Epoch 687/1000
Epoch 00687: val_loss did not improve from 27.87500
Epoch 688/1000


Epoch 00688: val_loss did not improve from 27.87500
Epoch 689/1000
Epoch 00689: val_loss did not improve from 27.87500
Epoch 690/1000


Epoch 00690: val_loss did not improve from 27.87500
Epoch 691/1000
Epoch 00691: val_loss did not improve from 27.87500
Epoch 692/1000


Epoch 00692: val_loss did not improve from 27.87500
Epoch 693/1000
Epoch 00693: val_loss did not improve from 27.87500
Epoch 694/1000


Epoch 00694: val_loss did not improve from 27.87500
Epoch 695/1000
Epoch 00695: val_loss did not improve from 27.87500
Epoch 696/1000


Epoch 00696: val_loss did not improve from 27.87500
Epoch 697/1000
Epoch 00697: val_loss did not improve from 27.87500
Epoch 698/1000


Epoch 00698: val_loss did not improve from 27.87500
Epoch 699/1000
Epoch 00699: val_loss did not improve from 27.87500
Epoch 700/1000


Epoch 00700: val_loss did not improve from 27.87500
Epoch 701/1000
Epoch 00701: val_loss did not improve from 27.87500
Epoch 702/1000


Epoch 00702: val_loss did not improve from 27.87500
Epoch 703/1000
Epoch 00703: val_loss did not improve from 27.87500
Epoch 704/1000


Epoch 00704: val_loss did not improve from 27.87500
Epoch 705/1000
Epoch 00705: val_loss did not improve from 27.87500
Epoch 706/1000


Epoch 00706: val_loss did not improve from 27.87500
Epoch 707/1000
Epoch 00707: val_loss did not improve from 27.87500
Epoch 708/1000


Epoch 00708: val_loss did not improve from 27.87500
Epoch 709/1000
Epoch 00709: val_loss did not improve from 27.87500
Epoch 710/1000


Epoch 00710: val_loss did not improve from 27.87500
Epoch 711/1000
Epoch 00711: val_loss did not improve from 27.87500
Epoch 712/1000


Epoch 00712: val_loss did not improve from 27.87500
Epoch 713/1000
Epoch 00713: val_loss did not improve from 27.87500
Epoch 714/1000


Epoch 00714: val_loss did not improve from 27.87500
Epoch 715/1000
Epoch 00715: val_loss did not improve from 27.87500
Epoch 716/1000


Epoch 00716: val_loss did not improve from 27.87500
Epoch 717/1000
Epoch 00717: val_loss did not improve from 27.87500
Epoch 718/1000


Epoch 00718: val_loss did not improve from 27.87500
Epoch 719/1000
Epoch 00719: val_loss did not improve from 27.87500
Epoch 720/1000


Epoch 00720: val_loss did not improve from 27.87500
Epoch 721/1000
Epoch 00721: val_loss did not improve from 27.87500
Epoch 722/1000


Epoch 00722: val_loss did not improve from 27.87500
Epoch 723/1000
Epoch 00723: val_loss did not improve from 27.87500
Epoch 724/1000


Epoch 00724: val_loss did not improve from 27.87500
Epoch 725/1000
Epoch 00725: val_loss did not improve from 27.87500
Epoch 726/1000


Epoch 00726: val_loss did not improve from 27.87500
Epoch 727/1000
Epoch 00727: val_loss did not improve from 27.87500
Epoch 728/1000


Epoch 00728: val_loss did not improve from 27.87500
Epoch 729/1000
Epoch 00729: val_loss did not improve from 27.87500
Epoch 730/1000


Epoch 00730: val_loss did not improve from 27.87500
Epoch 731/1000
Epoch 00731: val_loss did not improve from 27.87500
Epoch 732/1000


Epoch 00732: val_loss did not improve from 27.87500
Epoch 733/1000
Epoch 00733: val_loss did not improve from 27.87500
Epoch 734/1000


Epoch 00734: val_loss did not improve from 27.87500
Epoch 735/1000
Epoch 00735: val_loss did not improve from 27.87500
Epoch 736/1000


Epoch 00736: val_loss did not improve from 27.87500
Epoch 737/1000
Epoch 00737: val_loss did not improve from 27.87500
Epoch 738/1000


Epoch 00738: val_loss did not improve from 27.87500
Epoch 739/1000
Epoch 00739: val_loss did not improve from 27.87500
Epoch 740/1000


Epoch 00740: val_loss did not improve from 27.87500
Epoch 741/1000
Epoch 00741: val_loss did not improve from 27.87500
Epoch 742/1000


Epoch 00742: val_loss did not improve from 27.87500
Epoch 743/1000
Epoch 00743: val_loss did not improve from 27.87500
Epoch 744/1000


Epoch 00744: val_loss did not improve from 27.87500
Epoch 745/1000
Epoch 00745: val_loss did not improve from 27.87500
Epoch 746/1000


Epoch 00746: val_loss did not improve from 27.87500
Epoch 747/1000
Epoch 00747: val_loss did not improve from 27.87500
Epoch 748/1000


Epoch 00748: val_loss did not improve from 27.87500
Epoch 749/1000
Epoch 00749: val_loss did not improve from 27.87500
Epoch 750/1000


Epoch 00750: val_loss did not improve from 27.87500
Epoch 751/1000
Epoch 00751: val_loss did not improve from 27.87500
Epoch 752/1000


Epoch 00752: val_loss did not improve from 27.87500
Epoch 753/1000
Epoch 00753: val_loss did not improve from 27.87500
Epoch 754/1000


Epoch 00754: val_loss did not improve from 27.87500
Epoch 755/1000
Epoch 00755: val_loss did not improve from 27.87500
Epoch 756/1000


Epoch 00756: val_loss did not improve from 27.87500
Epoch 757/1000
Epoch 00757: val_loss did not improve from 27.87500
Epoch 758/1000


Epoch 00758: val_loss did not improve from 27.87500
Epoch 759/1000
Epoch 00759: val_loss did not improve from 27.87500
Epoch 760/1000


Epoch 00760: val_loss did not improve from 27.87500
Epoch 761/1000
Epoch 00761: val_loss did not improve from 27.87500
Epoch 762/1000


Epoch 00762: val_loss did not improve from 27.87500
Epoch 763/1000
Epoch 00763: val_loss did not improve from 27.87500
Epoch 764/1000


Epoch 00764: val_loss did not improve from 27.87500
Epoch 765/1000
Epoch 00765: val_loss did not improve from 27.87500
Epoch 766/1000


Epoch 00766: val_loss did not improve from 27.87500
Epoch 767/1000
Epoch 00767: val_loss did not improve from 27.87500
Epoch 768/1000


Epoch 00768: val_loss did not improve from 27.87500
Epoch 769/1000
Epoch 00769: val_loss did not improve from 27.87500
Epoch 770/1000


Epoch 00770: val_loss did not improve from 27.87500
Epoch 771/1000
Epoch 00771: val_loss did not improve from 27.87500
Epoch 772/1000


Epoch 00772: val_loss did not improve from 27.87500
Epoch 773/1000
Epoch 00773: val_loss did not improve from 27.87500
Epoch 774/1000


Epoch 00774: val_loss did not improve from 27.87500
Epoch 775/1000
Epoch 00775: val_loss did not improve from 27.87500
Epoch 776/1000


Epoch 00776: val_loss did not improve from 27.87500
Epoch 777/1000
Epoch 00777: val_loss did not improve from 27.87500
Epoch 778/1000


Epoch 00778: val_loss did not improve from 27.87500
Epoch 779/1000
Epoch 00779: val_loss did not improve from 27.87500
Epoch 780/1000


Epoch 00780: val_loss did not improve from 27.87500
Epoch 781/1000
Epoch 00781: val_loss did not improve from 27.87500
Epoch 782/1000


Epoch 00782: val_loss did not improve from 27.87500
Epoch 783/1000
Epoch 00783: val_loss did not improve from 27.87500
Epoch 784/1000


Epoch 00784: val_loss did not improve from 27.87500
Epoch 785/1000
Epoch 00785: val_loss did not improve from 27.87500
Epoch 786/1000


Epoch 00786: val_loss did not improve from 27.87500
Epoch 787/1000
Epoch 00787: val_loss did not improve from 27.87500
Epoch 788/1000


Epoch 00788: val_loss did not improve from 27.87500
Epoch 789/1000
Epoch 00789: val_loss did not improve from 27.87500
Epoch 790/1000


Epoch 00790: val_loss did not improve from 27.87500
Epoch 791/1000
Epoch 00791: val_loss did not improve from 27.87500
Epoch 792/1000


Epoch 00792: val_loss did not improve from 27.87500
Epoch 793/1000
Epoch 00793: val_loss did not improve from 27.87500
Epoch 794/1000


Epoch 00794: val_loss did not improve from 27.87500
Epoch 795/1000
Epoch 00795: val_loss did not improve from 27.87500
Epoch 796/1000


Epoch 00796: val_loss did not improve from 27.87500
Epoch 797/1000
Epoch 00797: val_loss did not improve from 27.87500
Epoch 798/1000


Epoch 00798: val_loss did not improve from 27.87500
Epoch 799/1000
Epoch 00799: val_loss did not improve from 27.87500
Epoch 800/1000


Epoch 00800: val_loss did not improve from 27.87500
Epoch 801/1000
Epoch 00801: val_loss did not improve from 27.87500
Epoch 802/1000


Epoch 00802: val_loss did not improve from 27.87500
Epoch 803/1000
Epoch 00803: val_loss did not improve from 27.87500
Epoch 804/1000


Epoch 00804: val_loss did not improve from 27.87500
Epoch 805/1000
Epoch 00805: val_loss did not improve from 27.87500
Epoch 806/1000


Epoch 00806: val_loss did not improve from 27.87500
Epoch 807/1000
Epoch 00807: val_loss did not improve from 27.87500
Epoch 808/1000


Epoch 00808: val_loss did not improve from 27.87500
Epoch 809/1000
Epoch 00809: val_loss did not improve from 27.87500
Epoch 810/1000


Epoch 00810: val_loss did not improve from 27.87500
Epoch 811/1000
Epoch 00811: val_loss did not improve from 27.87500
Epoch 812/1000


Epoch 00812: val_loss did not improve from 27.87500
Epoch 813/1000
Epoch 00813: val_loss did not improve from 27.87500
Epoch 814/1000


Epoch 00814: val_loss did not improve from 27.87500
Epoch 815/1000
Epoch 00815: val_loss did not improve from 27.87500
Epoch 816/1000


Epoch 00816: val_loss did not improve from 27.87500
Epoch 817/1000
Epoch 00817: val_loss did not improve from 27.87500
Epoch 818/1000


Epoch 00818: val_loss did not improve from 27.87500
Epoch 819/1000
Epoch 00819: val_loss did not improve from 27.87500
Epoch 820/1000


Epoch 00820: val_loss did not improve from 27.87500
Epoch 821/1000
Epoch 00821: val_loss did not improve from 27.87500
Epoch 822/1000


Epoch 00822: val_loss did not improve from 27.87500
Epoch 823/1000
Epoch 00823: val_loss did not improve from 27.87500
Epoch 824/1000


Epoch 00824: val_loss did not improve from 27.87500
Epoch 825/1000
Epoch 00825: val_loss did not improve from 27.87500
Epoch 826/1000


Epoch 00826: val_loss did not improve from 27.87500
Epoch 827/1000
Epoch 00827: val_loss did not improve from 27.87500
Epoch 828/1000


Epoch 00828: val_loss did not improve from 27.87500
Epoch 829/1000
Epoch 00829: val_loss did not improve from 27.87500
Epoch 830/1000


Epoch 00830: val_loss did not improve from 27.87500
Epoch 831/1000
Epoch 00831: val_loss did not improve from 27.87500
Epoch 832/1000


Epoch 00832: val_loss did not improve from 27.87500
Epoch 833/1000
Epoch 00833: val_loss did not improve from 27.87500
Epoch 834/1000


Epoch 00834: val_loss did not improve from 27.87500
Epoch 835/1000
Epoch 00835: val_loss did not improve from 27.87500
Epoch 836/1000


Epoch 00836: val_loss did not improve from 27.87500
Epoch 837/1000
Epoch 00837: val_loss did not improve from 27.87500
Epoch 838/1000


Epoch 00838: val_loss did not improve from 27.87500
Epoch 839/1000
Epoch 00839: val_loss did not improve from 27.87500
Epoch 840/1000


Epoch 00840: val_loss did not improve from 27.87500
Epoch 841/1000
Epoch 00841: val_loss did not improve from 27.87500
Epoch 842/1000


Epoch 00842: val_loss did not improve from 27.87500
Epoch 843/1000
Epoch 00843: val_loss did not improve from 27.87500
Epoch 844/1000


Epoch 00844: val_loss did not improve from 27.87500
Epoch 845/1000
Epoch 00845: val_loss did not improve from 27.87500
Epoch 846/1000


Epoch 00846: val_loss did not improve from 27.87500
Epoch 847/1000
Epoch 00847: val_loss did not improve from 27.87500
Epoch 848/1000


Epoch 00848: val_loss did not improve from 27.87500
Epoch 849/1000
Epoch 00849: val_loss did not improve from 27.87500
Epoch 850/1000


Epoch 00850: val_loss did not improve from 27.87500
Epoch 851/1000
Epoch 00851: val_loss did not improve from 27.87500
Epoch 852/1000


Epoch 00852: val_loss did not improve from 27.87500
Epoch 853/1000
Epoch 00853: val_loss did not improve from 27.87500
Epoch 854/1000


Epoch 00854: val_loss did not improve from 27.87500
Epoch 855/1000
Epoch 00855: val_loss did not improve from 27.87500
Epoch 856/1000


Epoch 00856: val_loss did not improve from 27.87500
Epoch 857/1000
Epoch 00857: val_loss did not improve from 27.87500
Epoch 858/1000


Epoch 00858: val_loss did not improve from 27.87500
Epoch 859/1000
Epoch 00859: val_loss did not improve from 27.87500
Epoch 860/1000


Epoch 00860: val_loss did not improve from 27.87500
Epoch 861/1000
Epoch 00861: val_loss did not improve from 27.87500
Epoch 862/1000


Epoch 00862: val_loss did not improve from 27.87500
Epoch 863/1000
Epoch 00863: val_loss did not improve from 27.87500
Epoch 864/1000


Epoch 00864: val_loss did not improve from 27.87500
Epoch 865/1000
Epoch 00865: val_loss did not improve from 27.87500
Epoch 866/1000


Epoch 00866: val_loss did not improve from 27.87500
Epoch 867/1000
Epoch 00867: val_loss did not improve from 27.87500
Epoch 868/1000


Epoch 00868: val_loss did not improve from 27.87500
Epoch 869/1000
Epoch 00869: val_loss did not improve from 27.87500
Epoch 870/1000


Epoch 00870: val_loss did not improve from 27.87500
Epoch 871/1000
Epoch 00871: val_loss did not improve from 27.87500
Epoch 872/1000


Epoch 00872: val_loss did not improve from 27.87500
Epoch 873/1000
Epoch 00873: val_loss did not improve from 27.87500
Epoch 874/1000


Epoch 00874: val_loss did not improve from 27.87500
Epoch 875/1000
Epoch 00875: val_loss did not improve from 27.87500
Epoch 876/1000


Epoch 00876: val_loss did not improve from 27.87500
Epoch 877/1000
Epoch 00877: val_loss did not improve from 27.87500
Epoch 878/1000


Epoch 00878: val_loss did not improve from 27.87500
Epoch 879/1000
Epoch 00879: val_loss did not improve from 27.87500
Epoch 880/1000


Epoch 00880: val_loss did not improve from 27.87500
Epoch 881/1000
Epoch 00881: val_loss did not improve from 27.87500
Epoch 882/1000


Epoch 00882: val_loss did not improve from 27.87500
Epoch 883/1000
Epoch 00883: val_loss did not improve from 27.87500
Epoch 884/1000


Epoch 00884: val_loss did not improve from 27.87500
Epoch 885/1000
Epoch 00885: val_loss did not improve from 27.87500
Epoch 886/1000


Epoch 00886: val_loss did not improve from 27.87500
INFO:tensorflow:Assets written to: models/bert_t21/assets


INFO:tensorflow:Assets written to: models/bert_t21/assets


In [511]:
subjects_split

{'test': ['subject9798',
  'subject7039',
  'subject436',
  'subject6619',
  'subject4058',
  'subject5897',
  'subject2432',
  'subject9218'],
 'train': ['subject3993',
  'subject6635',
  'subject2903',
  'subject5791',
  'subject9694',
  'subject2341',
  'subject3707',
  'subject1272',
  'subject9454',
  'subject2961',
  'subject2827',
  'subject6900']}

In [None]:
for d in DataGenerator(user_level_data, set_type='train', 
                                     subjects_split=subjects_split):
    print(d[0])
    break

In [None]:
tf.test.is_gpu_available()

# SVM

In [1]:
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score


In [2]:
writings_df = writings_df.fillna(value={'text': '', 'title':''})
column_functions = {'text': lambda t: " ".join(t), 
                                        'title': lambda t: " ".join(t),
                                        'text_len': 'sum',
                                        'title_len': 'sum',
                                        'tokenized_text': 'sum',
                                        'tokenized_title': 'sum'
                     }
column_functions.update({'label%i'%i: 'min' for i in range(21)})
column_functions.update({c: 'mean' for c in list(categories) + emotions + ["pronouns"]})
writings_per_user_df = writings_df.groupby('subject').aggregate(column_functions)

NameError: name 'writings_df' is not defined

In [36]:
writings_per_user_df.head()

Unnamed: 0_level_0,text,title,text_len,title_len,tokenized_text,tokenized_title,label0,label1,label2,label3,...,anticipation,disgust,fear,joy,negative,positive,sadness,surprise,trust,pronouns
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
subject1272,Just waking up every morning and talking to m...,...,1156.0,12.0,"[just, waking, up, every, morning, and, talkin...","[thank, you, trump, very, cool, facts, finally...",1,2,2,1,...,0.024889,0.030528,0.025334,0.025333,0.055499,0.053095,0.019931,0.015196,0.037916,0.061013
subject2341,"If you need to talk to someone, it's not a ba...",...,6911.0,0.0,"[if, you, need, to, talk, to, someone, it, s, ...",[],1,2,3,2,...,0.034064,0.009286,0.028078,0.031474,0.041094,0.05856,0.029255,0.023046,0.045392,0.041899
subject2432,*kid A dataminer said that the octoling gir...,...,7125.0,63.0,"[kid, a, dataminer, said, that, the, octoling,...","[what, s, the, point, okay, what, based, on, y...",1,3,3,2,...,0.014419,0.01144,0.016223,0.016127,0.03091,0.044589,0.016004,0.010126,0.025531,0.036645
subject2827,Im so sorry youve had a negative experience. ...,Opinions on high school anxiety ...,17010.0,130.0,"[im, so, sorry, youve, had, a, negative, exper...","[opinions, on, high, school, anxiety, need, ad...",1,3,3,2,...,0.039748,0.012513,0.019591,0.043596,0.027667,0.071131,0.023701,0.021699,0.043266,0.051753
subject2903,"That was not at all how it was, but not gonna...",...,15338.0,12.0,"[that, was, not, at, all, how, it, was, but, n...","[reddit, which, current, bands, artists, are, ...",0,0,1,1,...,0.021783,0.008517,0.015851,0.023864,0.023307,0.047234,0.012222,0.010885,0.029729,0.041074


### Universal sentence encoder

In [1]:
import tensorflow as tf
import tensorflow_hub as hub
# import sentencepiece
# import tensorflow_text


ImportError: Traceback (most recent call last):
  File "/usr/local/tensorflow/python3.5/1.13.1/lib/python3.5/site-packages/tensorflow/python/pywrap_tensorflow.py", line 58, in <module>
    from tensorflow.python.pywrap_tensorflow_internal import *
  File "/usr/local/tensorflow/python3.5/1.13.1/lib/python3.5/site-packages/tensorflow/python/pywrap_tensorflow_internal.py", line 28, in <module>
    _pywrap_tensorflow_internal = swig_import_helper()
  File "/usr/local/tensorflow/python3.5/1.13.1/lib/python3.5/site-packages/tensorflow/python/pywrap_tensorflow_internal.py", line 24, in swig_import_helper
    _mod = imp.load_module('_pywrap_tensorflow_internal', fp, pathname, description)
  File "/home/anasab/tfvenv/lib/python3.5/imp.py", line 242, in load_module
    return load_dynamic(name, filename, file)
  File "/home/anasab/tfvenv/lib/python3.5/imp.py", line 342, in load_dynamic
    return _load(spec)
ImportError: libcublas.so.10.0: cannot open shared object file: No such file or directory


Failed to load the native TensorFlow runtime.

See https://www.tensorflow.org/install/errors

for some common reasons and solutions.  Include the entire stack trace
above this error message when asking for help.

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
module_url = "https://tfhub.dev/google/universal-sentence-encoder-large/3"
# Import the Universal Sentence Encoder's TF Hub module
embed = hub.Module(module_url)

In [3]:
embed_layer = hub.KerasLayer("https://tfhub.dev/google/universal-sentence-encoder-large/4",input_shape=[],dtype=tf.string,trainable=True)

model = tf.keras.Sequential()
model.add(embed_layer)
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

model.summary()

model.compile(optimizer='adam',
          loss='binary_crossentropy',
          metrics=['accuracy'])

history = model.fit(train_dataset,
                validation_data=validation_dataset,
                epochs=30,
                verbose=1
               )

InvalidArgumentError: assertion failed: [0] [Op:Assert] name: EagerVariableNameReuse

In [17]:
tf.version

<module 'tensorflow_core._api.v2.version' from '/home/anasab/tfvenv/lib/python3.5/site-packages/tensorflow_core/_api/v2/version/__init__.py'>

In [6]:
g = tf.Graph()
with g.as_default():
    use = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
    # use = tf.saved_model.load("https://tfhub.dev/google/universal-sentence-encoder-multilingual/3")
    # hub.Module("https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/3")

W0328 14:03:56.196056 139845059770112 deprecation.py:506] From /home/anasab/tfvenv/lib/python3.5/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1786: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [4]:
tf.compat.v1.disable_eager_execution()

module_url = "https://tfhub.dev/google/universal-sentence-encoder/4" #@param ["https://tfhub.dev/google/universal-sentence-encoder/4", "https://tfhub.dev/google/universal-sentence-encoder-large/5"]
model = hub.load(module_url)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [5]:
with tf.compat.v1.Session() as session:
    session.run([tf.compat.v1.global_variables_initializer(), tf.compat.v1.tables_initializer()])
    message_embeddings = session.run(embed(messages))

InternalError: Dst tensor is not initialized.
	 [[node RestoreV2 (defined at /home/anasab/tftf3.6/local/lib/python3.6/site-packages/tensorflow_hub/module_v2.py:95) ]]

Original stack trace for 'RestoreV2':
  File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/traitlets/config/application.py", line 664, in launch_instance
    app.start()
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 583, in start
    self.io_loop.start()
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 149, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.6/asyncio/base_events.py", line 442, in run_forever
    self._run_once()
  File "/usr/lib/python3.6/asyncio/base_events.py", line 1462, in _run_once
    handle._run()
  File "/usr/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tornado/ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tornado/ioloop.py", line 743, in _run_callback
    ret = callback()
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tornado/gen.py", line 787, in inner
    self.run()
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tornado/gen.py", line 748, in run
    yielded = self.gen.send(value)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 361, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 268, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 541, in execute_request
    user_expressions, allow_stdin,
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 300, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2858, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2886, in _run_cell
    return runner(coro)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3063, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3254, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-4-33c15096ee91>", line 4, in <module>
    model = hub.load(module_url)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tensorflow_hub/module_v2.py", line 95, in load
    obj = tf_v1.saved_model.load_v2(module_path, tags=tags)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tensorflow_core/python/saved_model/load.py", line 528, in load
    return load_internal(export_dir, tags)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tensorflow_core/python/saved_model/load.py", line 552, in load_internal
    export_dir)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tensorflow_core/python/saved_model/load.py", line 128, in __init__
    self._restore_checkpoint()
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tensorflow_core/python/saved_model/load.py", line 280, in _restore_checkpoint
    load_status = saver.restore(variables_path)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tensorflow_core/python/training/tracking/util.py", line 1283, in restore
    checkpoint=checkpoint, proto_id=0).restore(self._graph_view.root)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tensorflow_core/python/training/tracking/base.py", line 209, in restore
    restore_ops = trackable._restore_from_checkpoint_position(self)  # pylint: disable=protected-access
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tensorflow_core/python/training/tracking/base.py", line 908, in _restore_from_checkpoint_position
    tensor_saveables, python_saveables))
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tensorflow_core/python/training/tracking/util.py", line 289, in restore_saveables
    validated_saveables).restore(self.save_path_tensor)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tensorflow_core/python/training/saving/functional_saver.py", line 255, in restore
    restore_ops.update(saver.restore(file_prefix))
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tensorflow_core/python/training/saving/functional_saver.py", line 95, in restore
    file_prefix, tensor_names, tensor_slices, tensor_dtypes)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tensorflow_core/python/ops/gen_io_ops.py", line 1506, in restore_v2
    name=name)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tensorflow_core/python/framework/op_def_library.py", line 742, in _apply_op_helper
    attrs=attr_protos, op_def=op_def)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 3322, in _create_op_internal
    op_def=op_def)
  File "/home/anasab/tftf3.6/local/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 1756, in __init__
    self._traceback = tf_stack.extract_stack()


In [None]:
message_embeddings

In [14]:
embed = hub.Module("../resources/sentence_wise_email/module/module_useT")

RuntimeError: variable_scope module_2/ was unused but the corresponding name_scope was already taken.

In [12]:
with tf.compat.v1.Session() as sess:
    model(["The cat is on the mat"])

AssertionError: 

In [58]:
# features = [np.random.rand(75) for i in range(20)]
features = writings_per_user_df[list(categories) + emotions + ["pronouns"]]

def cross_validation(folds=2):
    svmmodels= {}
    total_score = 0
    for l in range(21):
#         print("Classifier for label", l)
        labels = writings_per_user_df['label%d' % l].values
        svmmodels[l] = SVC(kernel='rbf', C=5)
        cvscores = cross_val_score(svmmodels[l], features, labels, cv=folds)
#         print(sum(cvscores)/folds, cvscores)
        total_score += sum(cvscores)/folds
    return total_score/21

print(cross_validation())

0.44047619047619047




In [425]:
def results_for_label(features, l, train_examples=5):
    labels = writings_per_user_df['label%d' % l].values
    svmmodel=SVC()
    svmmodel.fit(features[:train_examples], labels[:train_examples])
    predictions = svmmodel.predict(features[train_examples:])
    print(l, predictions, labels[train_examples:], labels[:train_examples])
    return labels[train_examples:]==predictions

In [426]:
features

Unnamed: 0_level_0,funct,article,affect,negemo,sad,cogmech,inhib,bio,body,achieve,...,anticipation,disgust,fear,joy,negative,positive,sadness,surprise,trust,pronouns
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
subject1272,0.590753,0.042808,0.078767,0.036815,0.004281,0.189212,0.001712,0.035959,0.015411,0.013699,...,0.024889,0.030528,0.025334,0.025333,0.055499,0.053095,0.019931,0.015196,0.037916,0.061013
subject2341,0.60628,0.038489,0.087976,0.035595,0.007524,0.216032,0.003907,0.013312,0.003762,0.017364,...,0.034064,0.009286,0.028078,0.031474,0.041094,0.05856,0.029255,0.023046,0.045392,0.041899
subject2432,0.536311,0.067613,0.053005,0.020033,0.002365,0.164997,0.005147,0.013495,0.005287,0.017947,...,0.014419,0.01144,0.016223,0.016127,0.03091,0.044589,0.016004,0.010126,0.025531,0.036645
subject2827,0.612485,0.038448,0.106301,0.034597,0.010385,0.205893,0.007001,0.021587,0.004201,0.025088,...,0.039748,0.012513,0.019591,0.043596,0.027667,0.071131,0.023701,0.021699,0.043266,0.051753
subject2903,0.59798,0.058176,0.055244,0.019023,0.003388,0.189186,0.003453,0.026254,0.00899,0.013876,...,0.021783,0.008517,0.015851,0.023864,0.023307,0.047234,0.012222,0.010885,0.029729,0.041074
subject2961,0.611006,0.038209,0.084884,0.034421,0.008577,0.21232,0.004456,0.035869,0.008355,0.022502,...,0.027696,0.013048,0.017931,0.022866,0.028927,0.044044,0.019895,0.010373,0.027395,0.079129
subject3707,0.472825,0.040254,0.056373,0.026718,0.00637,0.158801,0.005783,0.032637,0.004418,0.017728,...,0.033694,0.006842,0.027104,0.018208,0.036167,0.042436,0.017377,0.011779,0.029148,0.025308
subject3993,0.509612,0.062823,0.047135,0.014031,0.002421,0.161933,0.005494,0.018872,0.004479,0.018782,...,0.0267,0.008983,0.015668,0.022874,0.03198,0.056786,0.015183,0.012096,0.036448,0.035235
subject4058,0.569715,0.084511,0.057204,0.028526,0.003428,0.172183,0.006208,0.008912,0.002323,0.017557,...,0.025554,0.010193,0.024997,0.019391,0.038248,0.052755,0.017248,0.015714,0.036149,0.030001
subject436,0.565337,0.055708,0.0674,0.035763,0.000688,0.191884,0.004127,0.028886,0.001376,0.013067,...,0.028608,0.002323,0.028324,0.039499,0.033384,0.057559,0.024337,0.00544,0.055317,0.063032


In [427]:
cumresults = []
for l in range(21):
    results = results_for_label(features, l)
    cumresults.append(results)

nrusers = len(cumresults[0])
nrques = 21
correct_per_user = {u: 0 for u in range(nrusers)}
for q, ques in enumerate(cumresults):
    for u, answ in enumerate(cumresults[q]):
        if answ:
            correct_per_user[u] += 1

for u in correct_per_user:
    print("u", u, correct_per_user[u]/nrques)
print("AHR", sum(correct_per_user.values())/nrusers/nrques)

0 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] [1 1 0 0 2 1 2 0 0 1 1 2 1 0 1] [1 1 1 1 0]
1 [2 3 3 3 3 3 3 3 2 3 3 2 2 3 3] [1 3 0 0 1 0 3 0 1 1 1 2 3 0 1] [2 2 3 3 0]
2 [3 3 3 3 3 3 3 3 3 3 3 3 3 3 3] [1 0 0 0 2 1 3 0 1 2 0 1 3 2 2] [2 3 3 3 1]
3 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] [1 1 0 1 3 1 2 1 1 1 1 2 2 0 1] [1 2 2 2 1]
4 [3 3 3 3 3 3 3 3 3 3 3 3 3 3 3] [0 0 0 0 2 0 1 0 1 0 0 1 1 1 3] [0 3 3 3 0]
5 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] [0 0 0 0 0 0 0 0 0 0 0 0 3 0 0] [1 2 2 2 0]
6 [3 2 2 2 2 2 2 2 3 2 2 3 3 2 2] [1 0 0 0 3 2 2 0 3 1 2 3 3 0 1] [3 3 2 2 1]
7 [3 2 2 2 2 2 2 2 2 2 2 3 3 2 2] [1 1 0 0 3 1 2 0 1 1 0 2 3 0 1] [2 3 2 3 0]
8 [1 2 2 2 2 2 2 2 1 2 2 1 1 2 2] [1 1 0 1 1 0 1 0 1 1 0 1 2 0 0] [1 1 2 2 0]
9 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [1 2 0 0 2 0 0 0 0 1 0 2 3 0 0] [3 0 1 2 0]
10 [0 1 1 1 1 1 1 1 0 0 1 0 1 1 1] [1 3 0 0 1 0 0 0 2 1 0 2 2 1 2] [3 0 1 1 0]
11 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] [1 0 0 0 3 1 3 1 1 1 1 3 3 0 3] [1 2 3 1 0]
12 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] [1 0 0 0 2 0 2 0 3 0 0 1 3 

# Extra analysis


### Extract LIWC

In [409]:
def merge_tokens(row):
    tokens = []
    if row.tokenized_text:
        tokens += row.tokenized_text
    if row.tokenized_title:
        tokens += row.tokenized_title
    return tokens
writings_df['all_tokens'] = writings_df.apply (lambda row: merge_tokens(row), axis=1)

In [None]:
# TODO: include the title
def extract_emotions(tokens, emotion, relative=True):
    if not tokens:
        return None
    emotion_words = [t for t in tokens 
                     if t in nrc_lexicon[emotion]]
    if relative:
        return len(emotion_words) / len(tokens)
    else:
        return len(emotion_words)
    
    return encoded_emotions

from functools import partial
for emotion in emotions:
    writings_df[emotion] = writings_df['all_tokens'].apply(partial(extract_emotions, emotion=emotion, 
                                                                   relative=True))

In [None]:
writings_df['pronouns'] = writings_df['all_tokens'].apply(partial(encode_pronouns, relative=True))

In [None]:
writings_df[['label%i'%i for i in range(21)] + ['text', 'pronouns', 'text_len'] + emotions].corr('spearman')

In [None]:
# writings_df['label15'] = writings_df['label15'].apply(lambda l: encode_labels([l])[0])

In [None]:
# writings_df['label17'] = writings_df['label17'].apply(lambda l: encode_labels([l])[0])

In [None]:
writings_df.groupby('subject').mean()[
    ['label%i'%i for i in range(21)] + ['pronouns', 'text_len'] + emotions].corr(
    'spearman')[['pronouns', 'text_len'] + emotions]

In [None]:
writings_df.corrwith?

In [405]:
from liwc_readDict import readDict

liwc = readDict('/home/anasab/resources/liwc.dic')
categories = [c for (w,c) in liwc]
set(categories)
liwc_dict = {}
for (w, c) in liwc:
    if c not in liwc_dict:
        liwc_dict[c] = []
    liwc_dict[c].append(w)

In [413]:
writings_per_user_df['all_tokens'] = writings_per_user_df.apply (lambda row: merge_tokens(row), axis=1)

In [414]:
def encode_liwc_categories(tokens, category_words, relative=True):
    category_cnt = 0
    if not tokens:
        return None
    text_len = len(tokens)
    for t in tokens:
        for word in category_words:
            if t==word or (word[-1]=='*' and t.startswith(word[:-1])) \
            or (t==word.split("'")[0]):
                category_cnt += 1
                break # one token cannot belong to more than one word in the category
    if relative:
        return category_cnt/text_len
    else:
        return category_cnt

In [415]:
%%time
from functools import partial
# for categ in ['negemo', 'posemo', 'affect', 'sad', 'anx', 'pronoun']:#liwc_dict.keys():
for categ in liwc_dict.keys():
#     if categ in writings_df.columns:
#         continue
    print("Encoding for category %s..." % categ)
    writings_per_user_df[categ] = writings_per_user_df['all_tokens'].apply(partial(encode_liwc_categories, 
                                                                   category_words=liwc_dict[categ], 
                                                                   relative=True))


Encoding for category funct...
Encoding for category article...
Encoding for category affect...
Encoding for category negemo...
Encoding for category sad...
Encoding for category cogmech...
Encoding for category inhib...
Encoding for category bio...
Encoding for category body...
Encoding for category achieve...
Encoding for category health...
Encoding for category sexual...
Encoding for category adverb...
Encoding for category preps...
Encoding for category space...
Encoding for category relativ...
Encoding for category time...
Encoding for category work...
Encoding for category certain...
Encoding for category assent...
Encoding for category anger...
Encoding for category posemo...
Encoding for category insight...
Encoding for category verb...
Encoding for category past...
Encoding for category money...
Encoding for category percept...
Encoding for category social...
Encoding for category friend...
Encoding for category motion...
Encoding for category cause...
Encoding for category le

In [None]:
relevant_categs = ['posemo', 'negemo', 'anx', 'sad', 'affect', 'feel', 'social', 'health', 
                   'sexual', 'present', 'cogmech', 'inhib']
writings_df.groupby('subject').mean()[
    ['label%i'%i for i in range(21)] + relevant_categs].corr(
    'spearman')[relevant_categs]

In [None]:
list(writings_df.groupby('subject').min()[
    ['label%i'%i for i in range(21)] + list(liwc_dict.keys())].corr()[list(liwc_dict.keys())].mean().sort_values().index)

In [None]:
pickle.dump(writings_df, open('writings_df_T2_liwc.pkl', 'wb+'))