In [173]:
# import
import keras
import sys
import numpy as np
import string
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense
from keras.utils import np_utils, plot_model
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn import model_selection
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, precision_recall_fscore_support
import h5py as h5py

In [174]:
# if we are doeing binary classification. That means say if a token is a named entity or not
BINARY = False

# number of epochs for training
epochs = 10 

# the english side of the corpus
en_corpus_file = "corpus-en.txt"

# the ewondo side of the corpus
ewo_corpus_file = "corpus-ewo.txt"

# name of the file to same the model 
best_model_file = "best-model-conll.hdfs"

# The maximal number of phrases to use
max_nb_of_phrases =  -1

# the maximal number of duplicates for each word in the corpus
duplication = 1

# wether we are using only the vocabulary, ro redundancy
is_only_vocab = True

# if word should be shuffle or not
shuffle = is_only_vocab

# normalization strategy
# log, max, mean_log, 
# None: for no normalization
normalization_strategy = "mean_log"

# if we are using the Zennaki et al. signature
is_zennaki = False

# the number of neurons in the first layer
h1_size = 640

# number of neurons in the second layer
h2_size = 160  

In [175]:
def getTag(aString):
    """
        convert a string to a tag
    """
    tag = "O"
    if BINARY:
        if aString != "O":
            return "NE"
    else:
        tag = aString
    return tag
     

In [176]:
def load_corpus(file, max_nb_of_phrases):
    """
    Load a corpus stored in a file
    Input:
        - file: the name of the file of the corpus
        - max_nb_of_phases: maximal number of phrases to load
    
    Return:
        - a DataFrame representing the corpus
        - the number of phrases in the corpus
    """
    nb_of_phrases = 0
    dataset = {"word": [], "ne-tag": []}
    with open(file) as f:
        prev_line = None
        for cpt, line in enumerate(f):
            if cpt == 0:
                continue
            if nb_of_phrases == max_nb_of_phrases:
                break;

            l = line.strip()
            if len(l) == 0 and len(prev_line) != 0:
                nb_of_phrases += 1
                dataset["word"].append(line)
                dataset["ne-tag"].append(None)
            else:
                l = l.split("\t")
                if l[0] not in string.punctuation:
                    dataset["word"].append(l[0])
                    dataset["ne-tag"].append(ne_type(l[1]))
            prev_line = line.strip()
        
    return pd.DataFrame(dataset), nb_of_phrases+1

In [177]:
def log_normalization(fingerprints):
    fingerprints[fingerprints > 0] = 1 + np.log(fingerprints[fingerprints > 0])
    return fingerprints

In [178]:
def max_normalization(fingerprints):
    maxis = fingerprints.max(axis = 1)
    fingerprints[fingerprints > 0] = fingerprints[fingerprints > 0].apply(lambda row: 0.5 + 0.5 * row / maxis[row.index])
    return fingerprints

In [179]:
def mean_log_normalization(fingerprints):
    means = fingerprints.mean(axis=1)
    fingerprints[fingerprints > 0] = fingerprints[fingerprints > 0].apply(lambda row: (1 + np.log(row)) / 1 + np.log(means[row.index]))
    return fingerprints

In [180]:
def normalize(fingerprints):
    if normalization_strategy == "log":
        return log_normalization(fingerprints)
    elif normalization_strategy == "max":
        return max_normalization(fingerprints)
    elif normalization_strategy == "mean_log":
        return mean_log_normalization(fingerprints)
    elif normalization_strategy == "log_inv":
        fp = log_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "max_inv":
        fp = max_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "mean_log_inv":
        fp = mean_log_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "tf":
        fp = fingerprints
        fp[fp > 0] = 1 / fp[fp > 0]
        return fp
    else:
        return fingerprints

In [181]:
def corpus_fingerprint(aDataframe, nb_of_biphrases):
    """
    Create the distributionnal signature of each word in the corpus
    Input:
        -aDataFrame: the corpus DataFrame
        -nb_of_biphrases: number of phrases in the corpus
    Return:
        a DataFrame: corpus fingerprint, the columns are the words in the corpus
    """
    print("Normalization strategy:", normalization_strategy)
    tf = {}
    fingerprints = {}
    current_bi_phrase_index = 0
    nb_word_in_corpus = aDataframe[aDataframe.word != "\n"].word.size
    words_in_current_phrase = []
    for index, row in aDataframe.iterrows():
        if current_bi_phrase_index > nb_of_biphrases:
            break
            
        word = row['word']
        
        if word != "\n":
            words_in_current_phrase.append(word)
            if word not in fingerprints:
                fingerprints[word] = np.zeros(nb_of_biphrases, dtype=np.float32)
                tf[word] = 0
            tf[word] += 1
            fingerprints[word][current_bi_phrase_index] = 1
        else:
            nb_word_in_current_phrase = len(words_in_current_phrase)
            current_bi_phrase_index += 1
            words_in_current_phrase = []
        
    if not is_zennaki:
        for word in fingerprints:
            for i in range(nb_of_biphrases):
                if fingerprints[word][i] != 0:
                    fingerprints[word][i] = nb_word_in_corpus / tf[word]
    ret = pd.DataFrame(fingerprints)
        
    return normalize(ret)

In [182]:
def corpus2trainingdata(aDataframe, fingerprintsDataFrame):
    """
    Convert corpus to training data => numpy array
    
    Input:
        -aDataFrame: Corpus dataframe
        -fingerprintsDataFrame: distributionnal signature of words in the corpus
    Return:
        (X, y): X is the array of words (signature) in the corpus and y is the corresponding labels (NE tags)
    """
    X = np.zeros((aDataframe.shape[0], fingerprintsDataFrame.shape[0]), dtype=np.int8)
    y = np.zeros(aDataframe.shape[0], dtype=np.int8)
    i = 0
    for row in aDataframe.iterrows():
        X[i] = fingerprintsDataFrame[row[1]['word']].values
        y[i] = tag2int[getTag(row[1]['ne-tag'])]
        i += 1
    return X, y

In [183]:
# A utility function to convert NE tags
def ne_type(aType):
    aType = aType.lower()
    if 'per' in aType:
        t =  'NE' if BINARY else 'PER' 
    elif 'loc' in aType:
        t =  'NE' if BINARY else 'LOC'
    elif 'org' in aType:
        t =  'NE' if BINARY else 'ORG'
    elif 'hour' in aType:
        t =  'NE' if BINARY else 'MISC'
    elif aType != 'o' and len(aType) > 0 :
        t =  'NE' if BINARY else 'MISC'
    else:
        t = 'O'
    return t

In [184]:
def P_R_F1(y_pred, y_true, neg_class):
    same = y_pred[y_true==y_pred]
    tp = same[same != neg_class].size
    nb_of_pos_exple = y_true[y_true != neg_class].size
    nb_of_pos_pred = y_pred[y_pred != neg_class].size
    p = r = f1 = 0
    try:
        p = np.round(tp*100/nb_of_pos_pred, 2)
    except ZeroDivisionError:
        print("number of correct positive predictions is 0")
        
    try:
        r = np.round(tp*100/nb_of_pos_exple, 2)
    except ZeroDivisionError:
        print("number of position exple is 0")
        
    try:
        f1 = np.round(2*r*p/(r+p), 2)
    except ZeroDivisionError:
        print("Recall and precision are 0")

    return p, r, f1

In [185]:
def shuffle_data(X, y):
    indices = [i for i in  range(X.shape[0])]
    np.random.shuffle(indices)
    return X[indices], y[indices]

In [186]:
def create_model(input_dim, output_dim):
    model = Sequential()
    model.add(Dense(h1_size, input_dim=input_dim, activation='sigmoid', name="hidden1"))
    model.add(Dense(h2_size, activation='sigmoid', name="hidden2"))
    if BINARY:
        model.add(Dense(1, activation='sigmoid', name="outputlayer"))
        model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['binary_accuracy'])
    else:
        model.add(Dense(output_dim, activation='softmax', name="outputlayer"))
        model.compile(loss='categorical_crossentropy', optimizer="rmsprop", metrics=['accuracy'])
    model.summary()
    return model

In [187]:
def train_model(model, X_train, y_train, X_val, y_val, epochs=epochs):
    # stop learning if the error is the same between two consecutive epochs
    early_stop = EarlyStopping(patience=20, verbose=2)
    
    # saving best model
    best_model_cp = ModelCheckpoint(best_model_file, save_best_only=True, verbose=1)
    
    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, verbose=0, shuffle=shuffle, callbacks=[best_model_cp, early_stop])
    
    #loading and returning the best model
    return keras.models.load_model(best_model_file)

In [188]:
def predict(model, X, y, binary=BINARY):
    if BINARY:
        y_pred = np.round(model.predict(X))
        y_true = y
    else:
        predictions = model.predict(X)
        y_pred = np.array([np.argmax(p) for p in predictions])
        y_true = np.array([np.argmax(t) for t in y ])
    return y_true, y_pred

In [189]:
def model_performance(y_true, y_pred):
    return P_R_F1(y_pred, y_true, tag2int['O']) #precision, recall, f1-score

In [190]:
def model_performace_by_tag(y_true, y_pred, tag):
    p, r, f1 = 0, 0, 0
    
    eq = y_pred[y_pred==y_true]
    correctly_pred = eq[eq==tag].size
    try:
        p = np.round(100 * correctly_pred / y_pred[y_pred==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        r = np.round(100 * correctly_pred / y_true[y_true==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        f1 = np.round(2 * r * p / (r + p), 2)
    except ZeroDivisionError:
        pass
    
    return p, r, f1

In [191]:
def algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, epochs=epochs, model=None):
    """
    Train a model on (X, y) and validate on (X_val, y_val) then project on (X_ewo)
    """
    test_precision, train_precision, ewo_precision = [], [], []
    test_recall, train_recall, ewo_recall = [], [], []
    test_fscore, train_fscore, ewo_fscore = [], [], []
    
    test_result_by_tag = {}
    train_result_by_tag = {}
    ewo_result_by_tag = {}
    for t in tagSet:
        f1_key = "F1-"+t
        p_key = "P-"+t
        r_key = "R-"+t
        train_result_by_tag[f1_key], train_result_by_tag[p_key], train_result_by_tag[r_key] = [], [], []
        test_result_by_tag[f1_key], test_result_by_tag[p_key], test_result_by_tag[r_key] = [], [], []
        ewo_result_by_tag[f1_key], ewo_result_by_tag[p_key], ewo_result_by_tag[r_key] = [], [], []

    m = train_model(model, X_train, y_train, X_val, y_val, epochs=epochs)
        
    y_true, y_pred = predict(m, X_train, y_train)
    p_train, r_train, f1_train = model_performance(y_true, y_pred)
        
    y_true_val, y_pred_val = predict(m, X_val, y_val)
    p_val, r_val, f1_val = model_performance(y_true_val, y_pred_val)
        
    y_true_ewo, y_pred_ewo = predict(m, X_ewo, y_ewo) 
    p_ewo, r_ewo, f1_ewo = model_performance(y_true_ewo, y_pred_ewo)
        
    for t in range(len(int2tag)):
        f1_key = "F1-" + int2tag[t]
        p_key = "P-" + int2tag[t]
        r_key = "R-" + int2tag[t]
            
        p, r, f1 = model_performace_by_tag(y_true, y_pred, t)
        train_result_by_tag[p_key].append(p)
        train_result_by_tag[r_key].append(r)
        train_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_val, y_pred_val, t)
        test_result_by_tag[p_key].append(p)
        test_result_by_tag[r_key].append(r)
        test_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_ewo, y_pred_ewo, t)
        ewo_result_by_tag[p_key].append(p)
        ewo_result_by_tag[r_key].append(r)
        ewo_result_by_tag[f1_key].append(f1)
                
    test_precision.append(p_val)
    train_precision.append(p_train)
    ewo_precision.append(p_ewo)
        
    test_recall.append(r_val)
    train_recall.append(r_train)
    ewo_recall.append(r_ewo)
        
    test_fscore.append(f1_val)
    train_fscore.append(f1_train)
    ewo_fscore.append(f1_ewo)
    return pd.DataFrame({
        'P_val': test_precision, 
        'P_train': train_precision, 
        'P_ewo': ewo_precision, 'R_val': test_recall, 'R_train': train_recall, 
        'R_ewo': ewo_recall, 'F1-val': test_fscore, 'F1-train': train_fscore, 'F1-ewo': ewo_fscore}), pd.DataFrame(train_result_by_tag), pd.DataFrame(test_result_by_tag), pd.DataFrame(ewo_result_by_tag)

In [192]:
def algoCrossVal(X, y, X_ewo, y_ewo, k = 10, repeat=1): 
    """
    Traing a model with k-fold cross validation
    We train the model `repeat` times to check it's stability
    """
    block_size = int(X.shape[0] / k)   
    output = None
    model = None
    train_by_tags, test_by_tags, ewo_by_tags = None, None, None
    for it in range(repeat):
        print("AlgoCrossValIter -", it+1)
        model = create_model(X.shape[1], len(tagSet))
        results = None
        train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = None, None, None
        for i in range(k):
            X_val, y_val = X[i*block_size:i*block_size+block_size], y[i*block_size:i*block_size+block_size]
            X_train = np.concatenate((X[0:i*block_size], X[i*block_size+block_size:]))
            y_train = np.concatenate((y[0:i*block_size], y[i*block_size+block_size:]))

            X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
            X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])

            result, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)
            if results is None:
                results = result.copy()
                train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = train_by_tag.copy(), test_by_tag.copy(), ewo_by_tag.copy()
            else:
                results = pd.concat([results, result], ignore_index=True)
                train_by_tagsTmp = pd.concat([train_by_tagsTmp, train_by_tag], ignore_index=True)
                test_by_tagsTmp = pd.concat([test_by_tagsTmp, test_by_tag], ignore_index=True)
                ewo_by_tagsTmp = pd.concat([ewo_by_tagsTmp, ewo_by_tag], ignore_index=True)
        
        if output is None:
            output = results.mean(axis=0).to_frame()
            train_by_tags = train_by_tagsTmp.mean(axis=0).to_frame()
            test_by_tags = test_by_tagsTmp.mean(axis=0).to_frame()
            ewo_by_tags = ewo_by_tagsTmp.mean(axis=0).to_frame()
        else:
            output = pd.concat([output, results.mean(axis=0).to_frame()], axis=1)
            train_by_tags = pd.concat([train_by_tags, train_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            test_by_tags = pd.concat([test_by_tags, test_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            ewo_by_tags = pd.concat([ewo_by_tags, ewo_by_tagsTmp.mean(axis=0).to_frame()], axis=1)

    return output, train_by_tags, test_by_tags, ewo_by_tags, model

In [193]:
en_corpus, en_nb_of_phrases = load_corpus(en_corpus_file, max_nb_of_phrases)

In [194]:
nb_word_in_corpus = en_corpus[en_corpus.word != "\n"].word.size
print("Nb word in corpus", nb_word_in_corpus)

Nb word in corpus 4170


In [195]:
en_corpus.head()
en_corpus.loc[en_corpus['ne-tag'] == 'ORG']

Unnamed: 0,word,ne-tag
1335,Sadducees,ORG


In [196]:
tagSet = en_corpus["ne-tag"].dropna().unique()
if BINARY:
    tagSet = ['NE', 'O']
tag2int = {j: i for i, j in enumerate(tagSet)}
int2tag = {i: j for i, j in enumerate(tagSet)}
print(tag2int)

{'O': 0, 'MISC': 1, 'PER': 2, 'LOC': 3, 'ORG': 4}


In [197]:
en_nb_of_phrases

210

In [198]:
en_corpus.describe()

Unnamed: 0,word,ne-tag
count,4379,4170
unique,904,5
top,the,O
freq,313,3779


In [199]:
en_corpus.head(10)

Unnamed: 0,word,ne-tag
0,The,O
1,Promise,O
2,of,O
3,the,O
4,Holy,MISC
5,Spirit,MISC
6,\n,
7,In,O
8,the,O
9,first,O


In [200]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(en_corpus[en_corpus['ne-tag']==tag].shape[0] * 100 / en_corpus[en_corpus['ne-tag']!='\n'].shape[0], 2)))

O % = 86.3 %
MISC % = 2.4 %
PER % = 5.59 %
LOC % = 0.91 %
ORG % = 0.02 %


In [201]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(en_corpus[en_corpus['ne-tag']==tag].word.unique().shape[0] * 100 / en_corpus[en_corpus['ne-tag']!='\n'].word.unique().shape[0], 2)))

O % = 89.16 %
MISC % = 1.88 %
PER % = 8.96 %
LOC % = 1.99 %
ORG % = 0.11 %


In [202]:
en_corpus[en_corpus.word == "\n"].shape

(209, 2)

In [203]:
print("Nb of bi-phrases", en_nb_of_phrases)

Nb of bi-phrases 210


In [204]:
en_fingerprints = corpus_fingerprint(en_corpus, en_nb_of_phrases)

Normalization strategy: mean_log


In [205]:
en_fingerprints.head(10)

Unnamed: 0,The,Promise,of,the,Holy,Spirit,In,first,book,O,...,considered,dream,She,save,fulfill,Immanuel,us),woke,sleep,knew
0,8.680833,11.078728,5.842286,5.332525,8.370678,8.439671,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,6.599773,0.0,0.0,10.554217,11.65283,11.65283,11.65283,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,6.292995,9.331148,9.400141,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,7.800138,7.290377,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,7.449357,6.939595,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,6.071361,9.109513,9.178506,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,8.554736,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,6.10994,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,6.847793,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,7.408932,6.899171,9.937324,10.006316,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [206]:
(4170 / nb_word_in_corpus)

1.0

In [207]:
en_corpus.shape

(4379, 2)

In [208]:
en_fingerprints['you'].values.shape

(210,)

In [209]:
en_corpus[en_corpus.word != "\n"].shape

(4170, 2)

In [210]:
if is_only_vocab:
    text = list(en_corpus[en_corpus.word != "\n"].word.unique())
else:
    text = list(en_corpus[en_corpus.word != "\n"].word)
en_vocab = pd.DataFrame({'text': text})
en_vocab.describe()

Unnamed: 0,text
count,903
unique,903
top,estate
freq,1


In [211]:
if is_only_vocab:
    X = np.zeros((en_vocab.shape[0] * duplication, en_nb_of_phrases))
    target = np.zeros((en_vocab.shape[0] * duplication))
    p=0
    for i, row in en_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X[p] = en_fingerprints[c.split(" ")[0]]
            target[p] = tag2int[getTag(en_corpus[en_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X, target = shuffle_data(X, target)
    print(X.shape, en_fingerprints.shape, target.shape)

(903, 210) (210, 903) (903,)


In [212]:
en_vocab[-20:]

Unnamed: 0,text
883,Eliud
884,Eleazar
885,Matthan
886,husband
887,fourteen
888,unwilling
889,shame
890,resolved
891,divorce
892,quietly


In [213]:
if not is_only_vocab:
    X, target = corpus2trainingdata(en_corpus[en_corpus.word != "\n"], en_fingerprints)

In [214]:
if shuffle:
    X, target = shuffle_data(X, target)

In [215]:
y = target.copy()
y[0:100]
if not BINARY:
    y = np_utils.to_categorical(y, len(tagSet))
y.shape

(903, 5)

In [216]:
from sklearn.decomposition import PCA

def visualize(X, y):
    pca = PCA(n_components=2)
    X_embeded = pca.fit_transform(X)
    plt.figure(figsize=(5, 5))
    plt.scatter(X_embeded[:, 0], X_embeded[:, 1], c=y)
    plt.legend()
    plt.show()

In [217]:
# visualize(X, target)

In [218]:
X_train, X_val, y_train, y_val = model_selection.train_test_split(X, y, test_size=0.33)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])
print("X_train.shape =", X_train.shape)
print("y_train.shape =", y_train.shape)
print("X_val.shape =", X_val.shape)
print("y_val.shape =", y_val.shape)

tTarget = np.array([np.argmax(yy) for yy in y_train])
vTarget = np.array([np.argmax(yy) for yy in y_val])

for tag in tagSet:
    print("{0} % in training data = {1} %".format(tag, np.round(tTarget[tTarget==tag2int[tag]].size * 100 / tTarget.shape[0], 2)))
    print("{0} % in validation data = {1} %".format(tag, np.round(vTarget[vTarget==tag2int[tag]].size * 100 / vTarget.shape[0], 2)))

X_train.shape = (605, 210)
y_train.shape = (605, 5)
X_val.shape = (298, 210)
y_val.shape = (298, 5)
O % in training data = 89.42 %
O % in validation data = 85.91 %
MISC % in training data = 0.83 %
MISC % in validation data = 2.01 %
PER % in training data = 7.77 %
PER % in validation data = 10.07 %
LOC % in training data = 1.98 %
LOC % in validation data = 1.68 %
ORG % in training data = 0.0 %
ORG % in validation data = 0.34 %


In [219]:
ewo_corpus, ewo_nb_of_phrases = load_corpus(ewo_corpus_file, max_nb_of_phrases)

In [220]:
ewo_corpus.loc[ewo_corpus['ne-tag'] == 'PER']

Unnamed: 0,word,ne-tag
6,Teofil,PER
15,Yesus,PER
86,Yohannes,PER
104,Yesus,PER
230,Yesus,PER
...,...,...
3676,Maria,PER
3697,Yesus,PER
3740,Emmanuel,PER
3750,Yosef,PER


In [221]:
ewo_nb_of_phrases

210

In [222]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(ewo_corpus[ewo_corpus['ne-tag']==tag].shape[0] * 100 / ewo_corpus[ewo_corpus['ne-tag']!='\n'].shape[0], 2)))

O % = 84.15 %
MISC % = 2.54 %
PER % = 6.69 %
LOC % = 1.03 %
ORG % = 0.05 %


In [223]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(ewo_corpus[ewo_corpus['ne-tag']==tag].word.unique().shape[0] * 100 / ewo_corpus[ewo_corpus['ne-tag']!='\n'].word.unique().shape[0], 2)))

O % = 89.94 %
MISC % = 1.17 %
PER % = 8.3 %
LOC % = 1.86 %
ORG % = 0.2 %


In [224]:
ewo_corpus.describe()

Unnamed: 0,word,ne-tag
count,3779,3570
unique,1024,5
top,\n,O
freq,209,3180


In [225]:
ewo_corpus.head()

Unnamed: 0,word,ne-tag
0,Mfufub,MISC
1,Nsisim,MISC
2,ayi,O
3,sò,O
4,\n,


In [226]:
ewo_fingerprints = corpus_fingerprint(ewo_corpus, en_nb_of_phrases)

Normalization strategy: mean_log


In [227]:
if is_only_vocab:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word.unique())
else:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word)
ewo_vocab = pd.DataFrame({"text":text})

In [228]:
if is_only_vocab:
    X_ewo = np.zeros((ewo_vocab.shape[0] * duplication, en_nb_of_phrases))
    ewo_target = np.zeros((ewo_vocab.shape[0] * duplication))
    p=0
    for i, row in ewo_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X_ewo[p] = ewo_fingerprints[c.split(" ")[0]]
            ewo_target[p] = tag2int[getTag(ewo_corpus[ewo_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [229]:
ewo_vocab[-10:]

Unnamed: 0,text
1013,sik
1014,Ntud
1015,bëyole
1016,Emmanuel
1017,Avëbë
1018,angavëbë
1019,oyò
1020,angabende
1021,anganòṅ
1022,angayole


In [230]:
if not is_only_vocab:
    X_ewo, ewo_target = corpus2trainingdata(ewo_corpus[ewo_corpus.word != "\n"], ewo_fingerprints)

In [231]:
if shuffle:
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [232]:
y_ewo = ewo_target.copy()
print(y_ewo.shape, len(ewo_vocab))

(1023,) 1023


In [233]:
X_ewo.shape

(1023, 210)

In [234]:
y_ewo = ewo_target.copy()
y_ewo[:20]
if not BINARY:
    y_ewo = np_utils.to_categorical(y_ewo)

In [235]:
X_ewo = X_ewo.reshape((X_ewo.shape[0], en_nb_of_phrases))

In [236]:
# model = create_model(X.shape[1], len(tagSet))
# resultEval, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)

In [237]:
# resultEval

In [238]:
# train_by_tag

In [239]:
# test_by_tag

In [240]:
# ewo_by_tag

In [241]:
# resultEval.mean()

In [242]:
# resultEval.std()

In [243]:
resultCrossVal, trainByTagResult, testByTagResult, ewoByTagResult, model = algoCrossVal(X, y, X_ewo, y_ewo, repeat=10)

AlgoCrossValIter - 1
Model: "sequential_21"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.25042, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.25042 to 0.23072, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.23072

Epoch 00004: val_loss did not improve from 0.23072

Epoch 00005: val_loss did not improve from 0.23072

Epoch 00006: 




Epoch 00002: val_loss did not improve from 0.17474

Epoch 00003: val_loss did not improve from 0.17474

Epoch 00004: val_loss did not improve from 0.17474

Epoch 00005: val_loss did not improve from 0.17474

Epoch 00006: val_loss did not improve from 0.17474

Epoch 00007: val_loss did not improve from 0.17474

Epoch 00008: val_loss did not improve from 0.17474

Epoch 00009: val_loss did not improve from 0.17474

Epoch 00010: val_loss did not improve from 0.17474





Epoch 00001: val_loss improved from inf to 0.15540, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.15540 to 0.13522, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.13522

Epoch 00004: val_loss did not improve from 0.13522

Epoch 00005: val_loss did not improve from 0.13522

Epoch 00006: val_loss did not improve from 0.13522

Epoch 00007: val_loss did not improve from 0.13522

Epoch 00008: val_loss did not improve from 0.13522

Epoch 00009: val_loss did not improve from 0.13522

Epoch 00010: val_loss did not improve from 0.13522





Epoch 00001: val_loss improved from inf to 0.09829, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09829

Epoch 00003: val_loss did not improve from 0.09829

Epoch 00004: val_loss did not improve from 0.09829

Epoch 00005: val_loss did not improve from 0.09829

Epoch 00006: val_loss did not improve from 0.09829

Epoch 00007: val_loss did not improve from 0.09829

Epoch 00008: val_loss did not improve from 0.09829

Epoch 00009: val_loss did not improve from 0.09829

Epoch 00010: val_loss did not improve from 0.09829





Epoch 00001: val_loss improved from inf to 0.09952, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09952

Epoch 00003: val_loss did not improve from 0.09952

Epoch 00004: val_loss did not improve from 0.09952

Epoch 00005: val_loss did not improve from 0.09952

Epoch 00006: val_loss did not improve from 0.09952

Epoch 00007: val_loss did not improve from 0.09952

Epoch 00008: val_loss did not improve from 0.09952

Epoch 00009: val_loss did not improve from 0.09952

Epoch 00010: val_loss did not improve from 0.09952





Epoch 00001: val_loss improved from inf to 0.09671, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09671

Epoch 00003: val_loss did not improve from 0.09671

Epoch 00004: val_loss did not improve from 0.09671

Epoch 00005: val_loss did not improve from 0.09671

Epoch 00006: val_loss did not improve from 0.09671

Epoch 00007: val_loss did not improve from 0.09671

Epoch 00008: val_loss did not improve from 0.09671

Epoch 00009: val_loss did not improve from 0.09671

Epoch 00010: val_loss did not improve from 0.09671





Epoch 00001: val_loss improved from inf to 0.11358, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11358

Epoch 00003: val_loss did not improve from 0.11358

Epoch 00004: val_loss did not improve from 0.11358

Epoch 00005: val_loss did not improve from 0.11358

Epoch 00006: val_loss did not improve from 0.11358

Epoch 00007: val_loss did not improve from 0.11358

Epoch 00008: val_loss did not improve from 0.11358

Epoch 00009: val_loss did not improve from 0.11358

Epoch 00010: val_loss did not improve from 0.11358





Epoch 00001: val_loss improved from inf to 0.04422, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04422

Epoch 00003: val_loss did not improve from 0.04422

Epoch 00004: val_loss did not improve from 0.04422

Epoch 00005: val_loss did not improve from 0.04422

Epoch 00006: val_loss did not improve from 0.04422

Epoch 00007: val_loss did not improve from 0.04422

Epoch 00008: val_loss did not improve from 0.04422

Epoch 00009: val_loss did not improve from 0.04422

Epoch 00010: val_loss did not improve from 0.04422

Epoch 00001: val_loss improved from inf to 0.07577, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07577

Epoch 00003: val_loss did not improve from 0.07577

Epoch 00004: val_loss did not improve from 0.07577

Epoch 00005: val_loss did not improve from 0.07577

Epoch 00006: val_loss did not improve from 0.07577

Epoch 00007: val_loss did not improve from 0.07577

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.04071, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04071

Epoch 00003: val_loss did not improve from 0.04071

Epoch 00004: val_loss did not improve from 0.04071

Epoch 00005: val_loss did not improve from 0.04071

Epoch 00006: val_loss did not improve from 0.04071

Epoch 00007: val_loss did not improve from 0.04071

Epoch 00008: val_loss did not improve from 0.04071

Epoch 00009: val_loss did not improve from 0.04071

Epoch 00010: val_loss did not improve from 0.04071




AlgoCrossValIter - 2
Model: "sequential_22"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.23907, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.23907

Epoch 00003: val_loss improved from 0.23907 to 0.20286, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.20286

Epoch 00005: val_loss did not improve from 0.20286

Epoch 00006: 




Epoch 00001: val_loss improved from inf to 0.24000, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.24000 to 0.23393, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.23393

Epoch 00004: val_loss did not improve from 0.23393

Epoch 00005: val_loss did not improve from 0.23393

Epoch 00006: val_loss did not improve from 0.23393

Epoch 00007: val_loss did not improve from 0.23393

Epoch 00008: val_loss did not improve from 0.23393

Epoch 00009: val_loss did not improve from 0.23393

Epoch 00010: val_loss did not improve from 0.23393





Epoch 00001: val_loss improved from inf to 0.13167, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.13167

Epoch 00003: val_loss did not improve from 0.13167

Epoch 00004: val_loss did not improve from 0.13167

Epoch 00005: val_loss did not improve from 0.13167

Epoch 00006: val_loss did not improve from 0.13167

Epoch 00007: val_loss did not improve from 0.13167

Epoch 00008: val_loss did not improve from 0.13167

Epoch 00009: val_loss did not improve from 0.13167

Epoch 00010: val_loss did not improve from 0.13167





Epoch 00001: val_loss improved from inf to 0.18803, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.18803

Epoch 00003: val_loss improved from 0.18803 to 0.16696, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.16696

Epoch 00005: val_loss did not improve from 0.16696

Epoch 00006: val_loss did not improve from 0.16696

Epoch 00007: val_loss did not improve from 0.16696

Epoch 00008: val_loss did not improve from 0.16696

Epoch 00009: val_loss did not improve from 0.16696

Epoch 00010: val_loss did not improve from 0.16696





Epoch 00001: val_loss improved from inf to 0.11817, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11817

Epoch 00003: val_loss did not improve from 0.11817

Epoch 00004: val_loss did not improve from 0.11817

Epoch 00005: val_loss did not improve from 0.11817

Epoch 00006: val_loss did not improve from 0.11817

Epoch 00007: val_loss did not improve from 0.11817

Epoch 00008: val_loss did not improve from 0.11817

Epoch 00009: val_loss did not improve from 0.11817

Epoch 00010: val_loss did not improve from 0.11817





Epoch 00001: val_loss improved from inf to 0.10813, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10813

Epoch 00003: val_loss did not improve from 0.10813

Epoch 00004: val_loss did not improve from 0.10813

Epoch 00005: val_loss did not improve from 0.10813

Epoch 00006: val_loss did not improve from 0.10813

Epoch 00007: val_loss did not improve from 0.10813

Epoch 00008: val_loss did not improve from 0.10813

Epoch 00009: val_loss did not improve from 0.10813

Epoch 00010: val_loss did not improve from 0.10813





Epoch 00001: val_loss improved from inf to 0.10466, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10466

Epoch 00003: val_loss did not improve from 0.10466

Epoch 00004: val_loss did not improve from 0.10466

Epoch 00005: val_loss did not improve from 0.10466

Epoch 00006: val_loss did not improve from 0.10466

Epoch 00007: val_loss did not improve from 0.10466

Epoch 00008: val_loss did not improve from 0.10466

Epoch 00009: val_loss did not improve from 0.10466

Epoch 00010: val_loss did not improve from 0.10466





Epoch 00001: val_loss improved from inf to 0.05553, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05553 to 0.05345, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.05345

Epoch 00004: val_loss did not improve from 0.05345

Epoch 00005: val_loss did not improve from 0.05345

Epoch 00006: val_loss did not improve from 0.05345

Epoch 00007: val_loss did not improve from 0.05345

Epoch 00008: val_loss did not improve from 0.05345

Epoch 00009: val_loss did not improve from 0.05345

Epoch 00010: val_loss did not improve from 0.05345

Epoch 00001: val_loss improved from inf to 0.06420, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06420

Epoch 00003: val_loss did not improve from 0.06420

Epoch 00004: val_loss did not improve from 0.06420

Epoch 00005: val_loss did not improve from 0.06420

Epoch 00006: val_loss did not improve from 0.06420

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.05336, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05336

Epoch 00003: val_loss did not improve from 0.05336

Epoch 00004: val_loss did not improve from 0.05336

Epoch 00005: val_loss did not improve from 0.05336

Epoch 00006: val_loss did not improve from 0.05336

Epoch 00007: val_loss did not improve from 0.05336

Epoch 00008: val_loss did not improve from 0.05336

Epoch 00009: val_loss did not improve from 0.05336

Epoch 00010: val_loss did not improve from 0.05336




AlgoCrossValIter - 3
Model: "sequential_23"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.30190, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.30190

Epoch 00003: val_loss improved from 0.30190 to 0.24213, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.24213

Epoch 00005: val_loss did not improve from 0.24213

Epoch 00006: 




Epoch 00001: val_loss improved from inf to 0.17897, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.17897

Epoch 00003: val_loss did not improve from 0.17897

Epoch 00004: val_loss did not improve from 0.17897

Epoch 00005: val_loss did not improve from 0.17897

Epoch 00006: val_loss did not improve from 0.17897

Epoch 00007: val_loss did not improve from 0.17897

Epoch 00008: val_loss did not improve from 0.17897

Epoch 00009: val_loss did not improve from 0.17897

Epoch 00010: val_loss did not improve from 0.17897





Epoch 00001: val_loss improved from inf to 0.12827, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12827

Epoch 00003: val_loss did not improve from 0.12827

Epoch 00004: val_loss did not improve from 0.12827

Epoch 00005: val_loss did not improve from 0.12827

Epoch 00006: val_loss did not improve from 0.12827

Epoch 00007: val_loss did not improve from 0.12827

Epoch 00008: val_loss did not improve from 0.12827

Epoch 00009: val_loss did not improve from 0.12827

Epoch 00010: val_loss did not improve from 0.12827





Epoch 00001: val_loss improved from inf to 0.10730, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10730

Epoch 00003: val_loss did not improve from 0.10730

Epoch 00004: val_loss did not improve from 0.10730

Epoch 00005: val_loss did not improve from 0.10730

Epoch 00006: val_loss did not improve from 0.10730

Epoch 00007: val_loss did not improve from 0.10730

Epoch 00008: val_loss did not improve from 0.10730

Epoch 00009: val_loss did not improve from 0.10730

Epoch 00010: val_loss did not improve from 0.10730





Epoch 00001: val_loss improved from inf to 0.10559, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10559 to 0.10059, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.10059

Epoch 00004: val_loss did not improve from 0.10059

Epoch 00005: val_loss did not improve from 0.10059

Epoch 00006: val_loss did not improve from 0.10059

Epoch 00007: val_loss did not improve from 0.10059

Epoch 00008: val_loss did not improve from 0.10059

Epoch 00009: val_loss did not improve from 0.10059

Epoch 00010: val_loss did not improve from 0.10059





Epoch 00001: val_loss improved from inf to 0.11469, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11469

Epoch 00003: val_loss did not improve from 0.11469

Epoch 00004: val_loss did not improve from 0.11469

Epoch 00005: val_loss did not improve from 0.11469

Epoch 00006: val_loss did not improve from 0.11469

Epoch 00007: val_loss did not improve from 0.11469

Epoch 00008: val_loss did not improve from 0.11469

Epoch 00009: val_loss did not improve from 0.11469

Epoch 00010: val_loss did not improve from 0.11469





Epoch 00001: val_loss improved from inf to 0.10460, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10460

Epoch 00003: val_loss did not improve from 0.10460

Epoch 00004: val_loss did not improve from 0.10460

Epoch 00005: val_loss did not improve from 0.10460

Epoch 00006: val_loss did not improve from 0.10460

Epoch 00007: val_loss did not improve from 0.10460

Epoch 00008: val_loss did not improve from 0.10460

Epoch 00009: val_loss did not improve from 0.10460

Epoch 00010: val_loss did not improve from 0.10460





Epoch 00001: val_loss improved from inf to 0.07346, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07346 to 0.06770, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06770

Epoch 00004: val_loss improved from 0.06770 to 0.06237, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.06237

Epoch 00006: val_loss did not improve from 0.06237

Epoch 00007: val_loss did not improve from 0.06237

Epoch 00008: val_loss did not improve from 0.06237

Epoch 00009: val_loss did not improve from 0.06237

Epoch 00010: val_loss did not improve from 0.06237

Epoch 00001: val_loss improved from inf to 0.07368, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07368

Epoch 00003: val_loss did not improve from 0.07368

Epoch 00004: val_loss did not improve from 0.07368

Epoch 00005: val_loss did not improve from 0.07368

Epoch 00006: val_loss did not improve from 0.07368

Epo




Epoch 00001: val_loss improved from inf to 0.04539, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04539

Epoch 00003: val_loss did not improve from 0.04539

Epoch 00004: val_loss did not improve from 0.04539

Epoch 00005: val_loss did not improve from 0.04539

Epoch 00006: val_loss did not improve from 0.04539

Epoch 00007: val_loss did not improve from 0.04539

Epoch 00008: val_loss did not improve from 0.04539

Epoch 00009: val_loss did not improve from 0.04539

Epoch 00010: val_loss did not improve from 0.04539




AlgoCrossValIter - 4
Model: "sequential_24"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.35599, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.35599 to 0.22507, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.22507 to 0.20881, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.20881 to 0.20590, saving model to be




Epoch 00001: val_loss improved from inf to 0.18318, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.18318

Epoch 00003: val_loss did not improve from 0.18318

Epoch 00004: val_loss did not improve from 0.18318

Epoch 00005: val_loss did not improve from 0.18318

Epoch 00006: val_loss did not improve from 0.18318

Epoch 00007: val_loss did not improve from 0.18318

Epoch 00008: val_loss did not improve from 0.18318

Epoch 00009: val_loss did not improve from 0.18318

Epoch 00010: val_loss did not improve from 0.18318





Epoch 00001: val_loss improved from inf to 0.10872, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10872

Epoch 00003: val_loss did not improve from 0.10872

Epoch 00004: val_loss did not improve from 0.10872

Epoch 00005: val_loss did not improve from 0.10872

Epoch 00006: val_loss did not improve from 0.10872

Epoch 00007: val_loss did not improve from 0.10872

Epoch 00008: val_loss did not improve from 0.10872

Epoch 00009: val_loss did not improve from 0.10872

Epoch 00010: val_loss did not improve from 0.10872





Epoch 00001: val_loss improved from inf to 0.12137, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12137

Epoch 00003: val_loss did not improve from 0.12137

Epoch 00004: val_loss did not improve from 0.12137

Epoch 00005: val_loss did not improve from 0.12137

Epoch 00006: val_loss did not improve from 0.12137

Epoch 00007: val_loss did not improve from 0.12137

Epoch 00008: val_loss did not improve from 0.12137

Epoch 00009: val_loss did not improve from 0.12137

Epoch 00010: val_loss did not improve from 0.12137





Epoch 00001: val_loss improved from inf to 0.11322, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11322

Epoch 00003: val_loss did not improve from 0.11322

Epoch 00004: val_loss did not improve from 0.11322

Epoch 00005: val_loss did not improve from 0.11322

Epoch 00006: val_loss did not improve from 0.11322

Epoch 00007: val_loss did not improve from 0.11322

Epoch 00008: val_loss did not improve from 0.11322

Epoch 00009: val_loss did not improve from 0.11322

Epoch 00010: val_loss did not improve from 0.11322





Epoch 00001: val_loss improved from inf to 0.10644, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10644

Epoch 00003: val_loss did not improve from 0.10644

Epoch 00004: val_loss did not improve from 0.10644

Epoch 00005: val_loss did not improve from 0.10644

Epoch 00006: val_loss did not improve from 0.10644

Epoch 00007: val_loss did not improve from 0.10644

Epoch 00008: val_loss did not improve from 0.10644

Epoch 00009: val_loss did not improve from 0.10644

Epoch 00010: val_loss did not improve from 0.10644





Epoch 00001: val_loss improved from inf to 0.10549, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10549

Epoch 00003: val_loss did not improve from 0.10549

Epoch 00004: val_loss did not improve from 0.10549

Epoch 00005: val_loss did not improve from 0.10549

Epoch 00006: val_loss did not improve from 0.10549

Epoch 00007: val_loss did not improve from 0.10549

Epoch 00008: val_loss did not improve from 0.10549

Epoch 00009: val_loss did not improve from 0.10549

Epoch 00010: val_loss did not improve from 0.10549





Epoch 00001: val_loss improved from inf to 0.04865, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.04865 to 0.04859, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.04859

Epoch 00004: val_loss did not improve from 0.04859

Epoch 00005: val_loss did not improve from 0.04859

Epoch 00006: val_loss did not improve from 0.04859

Epoch 00007: val_loss did not improve from 0.04859

Epoch 00008: val_loss did not improve from 0.04859

Epoch 00009: val_loss did not improve from 0.04859

Epoch 00010: val_loss did not improve from 0.04859

Epoch 00001: val_loss improved from inf to 0.06518, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06518

Epoch 00003: val_loss did not improve from 0.06518

Epoch 00004: val_loss did not improve from 0.06518

Epoch 00005: val_loss did not improve from 0.06518

Epoch 00006: val_loss did not improve from 0.06518

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.04832, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04832

Epoch 00003: val_loss did not improve from 0.04832

Epoch 00004: val_loss did not improve from 0.04832

Epoch 00005: val_loss did not improve from 0.04832

Epoch 00006: val_loss did not improve from 0.04832

Epoch 00007: val_loss did not improve from 0.04832

Epoch 00008: val_loss did not improve from 0.04832

Epoch 00009: val_loss did not improve from 0.04832

Epoch 00010: val_loss did not improve from 0.04832




AlgoCrossValIter - 5
Model: "sequential_25"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.25224, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.25224 to 0.22601, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.22601

Epoch 00004: val_loss did not improve from 0.22601

Epoch 00005: val_loss did not improve from 0.22601

Epoch 00006: 




Epoch 00002: val_loss did not improve from 0.18073

Epoch 00003: val_loss did not improve from 0.18073

Epoch 00004: val_loss did not improve from 0.18073

Epoch 00005: val_loss did not improve from 0.18073

Epoch 00006: val_loss did not improve from 0.18073

Epoch 00007: val_loss did not improve from 0.18073

Epoch 00008: val_loss did not improve from 0.18073

Epoch 00009: val_loss did not improve from 0.18073

Epoch 00010: val_loss did not improve from 0.18073





Epoch 00001: val_loss improved from inf to 0.11092, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11092

Epoch 00003: val_loss did not improve from 0.11092

Epoch 00004: val_loss did not improve from 0.11092

Epoch 00005: val_loss did not improve from 0.11092

Epoch 00006: val_loss did not improve from 0.11092

Epoch 00007: val_loss did not improve from 0.11092

Epoch 00008: val_loss did not improve from 0.11092

Epoch 00009: val_loss did not improve from 0.11092

Epoch 00010: val_loss did not improve from 0.11092





Epoch 00001: val_loss improved from inf to 0.08711, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08711

Epoch 00003: val_loss did not improve from 0.08711

Epoch 00004: val_loss did not improve from 0.08711

Epoch 00005: val_loss did not improve from 0.08711

Epoch 00006: val_loss did not improve from 0.08711

Epoch 00007: val_loss did not improve from 0.08711

Epoch 00008: val_loss did not improve from 0.08711

Epoch 00009: val_loss did not improve from 0.08711

Epoch 00010: val_loss did not improve from 0.08711





Epoch 00001: val_loss improved from inf to 0.10984, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10984

Epoch 00003: val_loss did not improve from 0.10984

Epoch 00004: val_loss did not improve from 0.10984

Epoch 00005: val_loss did not improve from 0.10984

Epoch 00006: val_loss did not improve from 0.10984

Epoch 00007: val_loss did not improve from 0.10984

Epoch 00008: val_loss did not improve from 0.10984

Epoch 00009: val_loss did not improve from 0.10984

Epoch 00010: val_loss did not improve from 0.10984





Epoch 00001: val_loss improved from inf to 0.11318, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11318

Epoch 00003: val_loss did not improve from 0.11318

Epoch 00004: val_loss did not improve from 0.11318

Epoch 00005: val_loss did not improve from 0.11318

Epoch 00006: val_loss did not improve from 0.11318

Epoch 00007: val_loss did not improve from 0.11318

Epoch 00008: val_loss did not improve from 0.11318

Epoch 00009: val_loss did not improve from 0.11318

Epoch 00010: val_loss did not improve from 0.11318





Epoch 00001: val_loss improved from inf to 0.11424, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11424

Epoch 00003: val_loss did not improve from 0.11424

Epoch 00004: val_loss did not improve from 0.11424

Epoch 00005: val_loss did not improve from 0.11424

Epoch 00006: val_loss did not improve from 0.11424

Epoch 00007: val_loss did not improve from 0.11424

Epoch 00008: val_loss did not improve from 0.11424

Epoch 00009: val_loss did not improve from 0.11424

Epoch 00010: val_loss did not improve from 0.11424





Epoch 00001: val_loss improved from inf to 0.05356, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05356

Epoch 00003: val_loss did not improve from 0.05356

Epoch 00004: val_loss did not improve from 0.05356

Epoch 00005: val_loss did not improve from 0.05356

Epoch 00006: val_loss did not improve from 0.05356

Epoch 00007: val_loss did not improve from 0.05356

Epoch 00008: val_loss did not improve from 0.05356

Epoch 00009: val_loss did not improve from 0.05356

Epoch 00010: val_loss did not improve from 0.05356

Epoch 00001: val_loss improved from inf to 0.06664, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06664

Epoch 00003: val_loss did not improve from 0.06664

Epoch 00004: val_loss did not improve from 0.06664

Epoch 00005: val_loss did not improve from 0.06664

Epoch 00006: val_loss did not improve from 0.06664

Epoch 00007: val_loss did not improve from 0.06664

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.04204, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04204

Epoch 00003: val_loss did not improve from 0.04204

Epoch 00004: val_loss did not improve from 0.04204

Epoch 00005: val_loss did not improve from 0.04204

Epoch 00006: val_loss did not improve from 0.04204

Epoch 00007: val_loss did not improve from 0.04204

Epoch 00008: val_loss did not improve from 0.04204

Epoch 00009: val_loss did not improve from 0.04204

Epoch 00010: val_loss did not improve from 0.04204




AlgoCrossValIter - 6
Model: "sequential_26"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.72878, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.72878 to 0.32068, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.32068 to 0.20240, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.20240

Epoch 00005: val_loss 




Epoch 00001: val_loss improved from inf to 0.20865, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.20865 to 0.20350, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.20350

Epoch 00004: val_loss did not improve from 0.20350

Epoch 00005: val_loss did not improve from 0.20350

Epoch 00006: val_loss did not improve from 0.20350

Epoch 00007: val_loss did not improve from 0.20350

Epoch 00008: val_loss did not improve from 0.20350

Epoch 00009: val_loss did not improve from 0.20350

Epoch 00010: val_loss did not improve from 0.20350





Epoch 00001: val_loss improved from inf to 0.12301, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12301

Epoch 00003: val_loss did not improve from 0.12301

Epoch 00004: val_loss did not improve from 0.12301

Epoch 00005: val_loss did not improve from 0.12301

Epoch 00006: val_loss did not improve from 0.12301

Epoch 00007: val_loss did not improve from 0.12301

Epoch 00008: val_loss did not improve from 0.12301

Epoch 00009: val_loss did not improve from 0.12301

Epoch 00010: val_loss did not improve from 0.12301





Epoch 00001: val_loss improved from inf to 0.10322, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10322

Epoch 00003: val_loss did not improve from 0.10322

Epoch 00004: val_loss did not improve from 0.10322

Epoch 00005: val_loss did not improve from 0.10322

Epoch 00006: val_loss did not improve from 0.10322

Epoch 00007: val_loss did not improve from 0.10322

Epoch 00008: val_loss did not improve from 0.10322

Epoch 00009: val_loss did not improve from 0.10322

Epoch 00010: val_loss did not improve from 0.10322





Epoch 00001: val_loss improved from inf to 0.10376, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10376

Epoch 00003: val_loss did not improve from 0.10376

Epoch 00004: val_loss did not improve from 0.10376

Epoch 00005: val_loss did not improve from 0.10376

Epoch 00006: val_loss did not improve from 0.10376

Epoch 00007: val_loss did not improve from 0.10376

Epoch 00008: val_loss did not improve from 0.10376

Epoch 00009: val_loss did not improve from 0.10376

Epoch 00010: val_loss did not improve from 0.10376





Epoch 00001: val_loss improved from inf to 0.10147, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10147

Epoch 00003: val_loss did not improve from 0.10147

Epoch 00004: val_loss did not improve from 0.10147

Epoch 00005: val_loss did not improve from 0.10147

Epoch 00006: val_loss did not improve from 0.10147

Epoch 00007: val_loss did not improve from 0.10147

Epoch 00008: val_loss did not improve from 0.10147

Epoch 00009: val_loss did not improve from 0.10147

Epoch 00010: val_loss did not improve from 0.10147





Epoch 00001: val_loss improved from inf to 0.10661, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10661

Epoch 00003: val_loss did not improve from 0.10661

Epoch 00004: val_loss did not improve from 0.10661

Epoch 00005: val_loss did not improve from 0.10661

Epoch 00006: val_loss did not improve from 0.10661

Epoch 00007: val_loss did not improve from 0.10661

Epoch 00008: val_loss did not improve from 0.10661

Epoch 00009: val_loss did not improve from 0.10661

Epoch 00010: val_loss did not improve from 0.10661





Epoch 00001: val_loss improved from inf to 0.05380, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05380

Epoch 00003: val_loss did not improve from 0.05380

Epoch 00004: val_loss did not improve from 0.05380

Epoch 00005: val_loss did not improve from 0.05380

Epoch 00006: val_loss did not improve from 0.05380

Epoch 00007: val_loss did not improve from 0.05380

Epoch 00008: val_loss did not improve from 0.05380

Epoch 00009: val_loss did not improve from 0.05380

Epoch 00010: val_loss did not improve from 0.05380

Epoch 00001: val_loss improved from inf to 0.06367, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06367

Epoch 00003: val_loss did not improve from 0.06367

Epoch 00004: val_loss did not improve from 0.06367

Epoch 00005: val_loss did not improve from 0.06367

Epoch 00006: val_loss did not improve from 0.06367

Epoch 00007: val_loss did not improve from 0.06367

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.06249, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06249

Epoch 00003: val_loss did not improve from 0.06249

Epoch 00004: val_loss did not improve from 0.06249

Epoch 00005: val_loss did not improve from 0.06249

Epoch 00006: val_loss did not improve from 0.06249

Epoch 00007: val_loss did not improve from 0.06249

Epoch 00008: val_loss did not improve from 0.06249

Epoch 00009: val_loss did not improve from 0.06249

Epoch 00010: val_loss did not improve from 0.06249




AlgoCrossValIter - 7
Model: "sequential_27"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.28338, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.28338 to 0.21029, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.21029

Epoch 00004: val_loss did not improve from 0.21029

Epoch 00005: val_loss did not improve from 0.21029

Epoch 00006: 




Epoch 00001: val_loss improved from inf to 0.17984, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.17984

Epoch 00003: val_loss did not improve from 0.17984

Epoch 00004: val_loss did not improve from 0.17984

Epoch 00005: val_loss did not improve from 0.17984

Epoch 00006: val_loss did not improve from 0.17984

Epoch 00007: val_loss did not improve from 0.17984

Epoch 00008: val_loss did not improve from 0.17984

Epoch 00009: val_loss did not improve from 0.17984

Epoch 00010: val_loss did not improve from 0.17984





Epoch 00001: val_loss improved from inf to 0.11549, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11549

Epoch 00003: val_loss did not improve from 0.11549

Epoch 00004: val_loss did not improve from 0.11549

Epoch 00005: val_loss did not improve from 0.11549

Epoch 00006: val_loss did not improve from 0.11549

Epoch 00007: val_loss did not improve from 0.11549

Epoch 00008: val_loss did not improve from 0.11549

Epoch 00009: val_loss did not improve from 0.11549

Epoch 00010: val_loss did not improve from 0.11549





Epoch 00001: val_loss improved from inf to 0.09467, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09467

Epoch 00003: val_loss did not improve from 0.09467

Epoch 00004: val_loss did not improve from 0.09467

Epoch 00005: val_loss did not improve from 0.09467

Epoch 00006: val_loss did not improve from 0.09467

Epoch 00007: val_loss did not improve from 0.09467

Epoch 00008: val_loss did not improve from 0.09467

Epoch 00009: val_loss did not improve from 0.09467

Epoch 00010: val_loss did not improve from 0.09467





Epoch 00001: val_loss improved from inf to 0.09568, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09568

Epoch 00003: val_loss did not improve from 0.09568

Epoch 00004: val_loss did not improve from 0.09568

Epoch 00005: val_loss did not improve from 0.09568

Epoch 00006: val_loss did not improve from 0.09568

Epoch 00007: val_loss did not improve from 0.09568

Epoch 00008: val_loss did not improve from 0.09568

Epoch 00009: val_loss did not improve from 0.09568

Epoch 00010: val_loss did not improve from 0.09568





Epoch 00001: val_loss improved from inf to 0.09026, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09026

Epoch 00003: val_loss did not improve from 0.09026

Epoch 00004: val_loss did not improve from 0.09026

Epoch 00005: val_loss did not improve from 0.09026

Epoch 00006: val_loss did not improve from 0.09026

Epoch 00007: val_loss did not improve from 0.09026

Epoch 00008: val_loss did not improve from 0.09026

Epoch 00009: val_loss did not improve from 0.09026

Epoch 00010: val_loss did not improve from 0.09026





Epoch 00001: val_loss improved from inf to 0.12259, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12259

Epoch 00003: val_loss did not improve from 0.12259

Epoch 00004: val_loss did not improve from 0.12259

Epoch 00005: val_loss did not improve from 0.12259

Epoch 00006: val_loss did not improve from 0.12259

Epoch 00007: val_loss did not improve from 0.12259

Epoch 00008: val_loss did not improve from 0.12259

Epoch 00009: val_loss did not improve from 0.12259

Epoch 00010: val_loss did not improve from 0.12259





Epoch 00001: val_loss improved from inf to 0.04323, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04323

Epoch 00003: val_loss did not improve from 0.04323

Epoch 00004: val_loss did not improve from 0.04323

Epoch 00005: val_loss did not improve from 0.04323

Epoch 00006: val_loss did not improve from 0.04323

Epoch 00007: val_loss did not improve from 0.04323

Epoch 00008: val_loss did not improve from 0.04323

Epoch 00009: val_loss did not improve from 0.04323

Epoch 00010: val_loss did not improve from 0.04323

Epoch 00001: val_loss improved from inf to 0.06691, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06691

Epoch 00003: val_loss did not improve from 0.06691

Epoch 00004: val_loss did not improve from 0.06691

Epoch 00005: val_loss did not improve from 0.06691

Epoch 00006: val_loss did not improve from 0.06691

Epoch 00007: val_loss did not improve from 0.06691

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.05002, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05002 to 0.04883, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.04883

Epoch 00004: val_loss did not improve from 0.04883

Epoch 00005: val_loss did not improve from 0.04883

Epoch 00006: val_loss did not improve from 0.04883

Epoch 00007: val_loss did not improve from 0.04883

Epoch 00008: val_loss did not improve from 0.04883

Epoch 00009: val_loss did not improve from 0.04883

Epoch 00010: val_loss did not improve from 0.04883




AlgoCrossValIter - 8
Model: "sequential_28"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.56571, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.56571 to 0.22250, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.22250 to 0.22007, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.22007 to 0.21000, saving model to be




Epoch 00001: val_loss improved from inf to 0.19235, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.19235

Epoch 00003: val_loss did not improve from 0.19235

Epoch 00004: val_loss did not improve from 0.19235

Epoch 00005: val_loss did not improve from 0.19235

Epoch 00006: val_loss did not improve from 0.19235

Epoch 00007: val_loss did not improve from 0.19235

Epoch 00008: val_loss did not improve from 0.19235

Epoch 00009: val_loss did not improve from 0.19235

Epoch 00010: val_loss did not improve from 0.19235





Epoch 00001: val_loss improved from inf to 0.11330, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11330

Epoch 00003: val_loss did not improve from 0.11330

Epoch 00004: val_loss did not improve from 0.11330

Epoch 00005: val_loss did not improve from 0.11330

Epoch 00006: val_loss did not improve from 0.11330

Epoch 00007: val_loss did not improve from 0.11330

Epoch 00008: val_loss did not improve from 0.11330

Epoch 00009: val_loss did not improve from 0.11330

Epoch 00010: val_loss did not improve from 0.11330





Epoch 00001: val_loss improved from inf to 0.12812, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.12812 to 0.12101, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.12101

Epoch 00004: val_loss did not improve from 0.12101

Epoch 00005: val_loss did not improve from 0.12101

Epoch 00006: val_loss did not improve from 0.12101

Epoch 00007: val_loss did not improve from 0.12101

Epoch 00008: val_loss did not improve from 0.12101

Epoch 00009: val_loss did not improve from 0.12101

Epoch 00010: val_loss did not improve from 0.12101





Epoch 00001: val_loss improved from inf to 0.09325, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09325

Epoch 00003: val_loss did not improve from 0.09325

Epoch 00004: val_loss did not improve from 0.09325

Epoch 00005: val_loss did not improve from 0.09325

Epoch 00006: val_loss did not improve from 0.09325

Epoch 00007: val_loss did not improve from 0.09325

Epoch 00008: val_loss did not improve from 0.09325

Epoch 00009: val_loss did not improve from 0.09325

Epoch 00010: val_loss did not improve from 0.09325





Epoch 00001: val_loss improved from inf to 0.14544, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.14544 to 0.10911, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.10911

Epoch 00004: val_loss did not improve from 0.10911

Epoch 00005: val_loss did not improve from 0.10911

Epoch 00006: val_loss did not improve from 0.10911

Epoch 00007: val_loss did not improve from 0.10911

Epoch 00008: val_loss did not improve from 0.10911

Epoch 00009: val_loss did not improve from 0.10911

Epoch 00010: val_loss did not improve from 0.10911





Epoch 00001: val_loss improved from inf to 0.10768, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10768

Epoch 00003: val_loss did not improve from 0.10768

Epoch 00004: val_loss did not improve from 0.10768

Epoch 00005: val_loss did not improve from 0.10768

Epoch 00006: val_loss did not improve from 0.10768

Epoch 00007: val_loss did not improve from 0.10768

Epoch 00008: val_loss did not improve from 0.10768

Epoch 00009: val_loss did not improve from 0.10768

Epoch 00010: val_loss did not improve from 0.10768





Epoch 00001: val_loss improved from inf to 0.05018, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05018

Epoch 00003: val_loss did not improve from 0.05018

Epoch 00004: val_loss did not improve from 0.05018

Epoch 00005: val_loss did not improve from 0.05018

Epoch 00006: val_loss did not improve from 0.05018

Epoch 00007: val_loss did not improve from 0.05018

Epoch 00008: val_loss did not improve from 0.05018

Epoch 00009: val_loss did not improve from 0.05018

Epoch 00010: val_loss did not improve from 0.05018

Epoch 00001: val_loss improved from inf to 0.08091, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08091

Epoch 00003: val_loss did not improve from 0.08091

Epoch 00004: val_loss did not improve from 0.08091

Epoch 00005: val_loss did not improve from 0.08091

Epoch 00006: val_loss did not improve from 0.08091

Epoch 00007: val_loss did not improve from 0.08091

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.06783, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.06783 to 0.06045, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06045

Epoch 00004: val_loss did not improve from 0.06045

Epoch 00005: val_loss did not improve from 0.06045

Epoch 00006: val_loss did not improve from 0.06045

Epoch 00007: val_loss did not improve from 0.06045

Epoch 00008: val_loss did not improve from 0.06045

Epoch 00009: val_loss did not improve from 0.06045

Epoch 00010: val_loss did not improve from 0.06045




AlgoCrossValIter - 9
Model: "sequential_29"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.24366, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.24366 to 0.24244, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.24244

Epoch 00004: val_loss improved from 0.24244 to 0.21771, saving model to best-model-conll.hdfs

Epoch 00005: val_loss 




Epoch 00001: val_loss improved from inf to 0.16627, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.16627

Epoch 00003: val_loss did not improve from 0.16627

Epoch 00004: val_loss did not improve from 0.16627

Epoch 00005: val_loss did not improve from 0.16627

Epoch 00006: val_loss did not improve from 0.16627

Epoch 00007: val_loss did not improve from 0.16627

Epoch 00008: val_loss did not improve from 0.16627

Epoch 00009: val_loss did not improve from 0.16627

Epoch 00010: val_loss did not improve from 0.16627





Epoch 00001: val_loss improved from inf to 0.10422, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10422

Epoch 00003: val_loss did not improve from 0.10422

Epoch 00004: val_loss did not improve from 0.10422

Epoch 00005: val_loss did not improve from 0.10422

Epoch 00006: val_loss did not improve from 0.10422

Epoch 00007: val_loss did not improve from 0.10422

Epoch 00008: val_loss did not improve from 0.10422

Epoch 00009: val_loss did not improve from 0.10422

Epoch 00010: val_loss did not improve from 0.10422





Epoch 00001: val_loss improved from inf to 0.11350, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11350

Epoch 00003: val_loss did not improve from 0.11350

Epoch 00004: val_loss did not improve from 0.11350

Epoch 00005: val_loss did not improve from 0.11350

Epoch 00006: val_loss did not improve from 0.11350

Epoch 00007: val_loss did not improve from 0.11350

Epoch 00008: val_loss did not improve from 0.11350

Epoch 00009: val_loss did not improve from 0.11350

Epoch 00010: val_loss did not improve from 0.11350





Epoch 00001: val_loss improved from inf to 0.11455, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11455

Epoch 00003: val_loss did not improve from 0.11455

Epoch 00004: val_loss did not improve from 0.11455

Epoch 00005: val_loss did not improve from 0.11455

Epoch 00006: val_loss did not improve from 0.11455

Epoch 00007: val_loss did not improve from 0.11455

Epoch 00008: val_loss did not improve from 0.11455

Epoch 00009: val_loss did not improve from 0.11455

Epoch 00010: val_loss did not improve from 0.11455





Epoch 00001: val_loss improved from inf to 0.10047, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10047

Epoch 00003: val_loss did not improve from 0.10047

Epoch 00004: val_loss did not improve from 0.10047

Epoch 00005: val_loss did not improve from 0.10047

Epoch 00006: val_loss did not improve from 0.10047

Epoch 00007: val_loss did not improve from 0.10047

Epoch 00008: val_loss did not improve from 0.10047

Epoch 00009: val_loss did not improve from 0.10047

Epoch 00010: val_loss did not improve from 0.10047





Epoch 00001: val_loss improved from inf to 0.10173, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10173

Epoch 00003: val_loss did not improve from 0.10173

Epoch 00004: val_loss did not improve from 0.10173

Epoch 00005: val_loss did not improve from 0.10173

Epoch 00006: val_loss did not improve from 0.10173

Epoch 00007: val_loss did not improve from 0.10173

Epoch 00008: val_loss did not improve from 0.10173

Epoch 00009: val_loss did not improve from 0.10173

Epoch 00010: val_loss did not improve from 0.10173





Epoch 00001: val_loss improved from inf to 0.04088, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04088

Epoch 00003: val_loss did not improve from 0.04088

Epoch 00004: val_loss did not improve from 0.04088

Epoch 00005: val_loss did not improve from 0.04088

Epoch 00006: val_loss did not improve from 0.04088

Epoch 00007: val_loss did not improve from 0.04088

Epoch 00008: val_loss did not improve from 0.04088

Epoch 00009: val_loss did not improve from 0.04088

Epoch 00010: val_loss did not improve from 0.04088

Epoch 00001: val_loss improved from inf to 0.06786, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06786

Epoch 00003: val_loss did not improve from 0.06786

Epoch 00004: val_loss did not improve from 0.06786

Epoch 00005: val_loss did not improve from 0.06786

Epoch 00006: val_loss did not improve from 0.06786

Epoch 00007: val_loss did not improve from 0.06786

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.04098, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04098

Epoch 00003: val_loss did not improve from 0.04098

Epoch 00004: val_loss did not improve from 0.04098

Epoch 00005: val_loss did not improve from 0.04098

Epoch 00006: val_loss did not improve from 0.04098

Epoch 00007: val_loss did not improve from 0.04098

Epoch 00008: val_loss did not improve from 0.04098

Epoch 00009: val_loss did not improve from 0.04098

Epoch 00010: val_loss did not improve from 0.04098




AlgoCrossValIter - 10
Model: "sequential_30"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.27203, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.27203

Epoch 00003: val_loss improved from 0.27203 to 0.23500, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.23500 to 0.21253, saving model to best-model-conll.hdfs

Epoch 00005: val_loss




Epoch 00001: val_loss improved from inf to 0.18409, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.18409

Epoch 00003: val_loss did not improve from 0.18409

Epoch 00004: val_loss did not improve from 0.18409

Epoch 00005: val_loss did not improve from 0.18409

Epoch 00006: val_loss did not improve from 0.18409

Epoch 00007: val_loss did not improve from 0.18409

Epoch 00008: val_loss did not improve from 0.18409

Epoch 00009: val_loss did not improve from 0.18409

Epoch 00010: val_loss did not improve from 0.18409





Epoch 00001: val_loss improved from inf to 0.10303, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10303

Epoch 00003: val_loss did not improve from 0.10303

Epoch 00004: val_loss did not improve from 0.10303

Epoch 00005: val_loss did not improve from 0.10303

Epoch 00006: val_loss did not improve from 0.10303

Epoch 00007: val_loss did not improve from 0.10303

Epoch 00008: val_loss did not improve from 0.10303

Epoch 00009: val_loss did not improve from 0.10303

Epoch 00010: val_loss did not improve from 0.10303





Epoch 00001: val_loss improved from inf to 0.12327, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12327

Epoch 00003: val_loss did not improve from 0.12327

Epoch 00004: val_loss did not improve from 0.12327

Epoch 00005: val_loss did not improve from 0.12327

Epoch 00006: val_loss did not improve from 0.12327

Epoch 00007: val_loss did not improve from 0.12327

Epoch 00008: val_loss did not improve from 0.12327

Epoch 00009: val_loss did not improve from 0.12327

Epoch 00010: val_loss did not improve from 0.12327





Epoch 00001: val_loss improved from inf to 0.14613, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.14613 to 0.14395, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.14395

Epoch 00004: val_loss did not improve from 0.14395

Epoch 00005: val_loss did not improve from 0.14395

Epoch 00006: val_loss did not improve from 0.14395

Epoch 00007: val_loss did not improve from 0.14395

Epoch 00008: val_loss did not improve from 0.14395

Epoch 00009: val_loss did not improve from 0.14395

Epoch 00010: val_loss did not improve from 0.14395





Epoch 00001: val_loss improved from inf to 0.10607, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10607

Epoch 00003: val_loss did not improve from 0.10607

Epoch 00004: val_loss did not improve from 0.10607

Epoch 00005: val_loss did not improve from 0.10607

Epoch 00006: val_loss did not improve from 0.10607

Epoch 00007: val_loss did not improve from 0.10607

Epoch 00008: val_loss did not improve from 0.10607

Epoch 00009: val_loss did not improve from 0.10607

Epoch 00010: val_loss did not improve from 0.10607





Epoch 00001: val_loss improved from inf to 0.11538, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11538

Epoch 00003: val_loss did not improve from 0.11538

Epoch 00004: val_loss did not improve from 0.11538

Epoch 00005: val_loss did not improve from 0.11538

Epoch 00006: val_loss did not improve from 0.11538

Epoch 00007: val_loss did not improve from 0.11538

Epoch 00008: val_loss did not improve from 0.11538

Epoch 00009: val_loss did not improve from 0.11538

Epoch 00010: val_loss did not improve from 0.11538





Epoch 00001: val_loss improved from inf to 0.05689, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05689

Epoch 00003: val_loss did not improve from 0.05689

Epoch 00004: val_loss did not improve from 0.05689

Epoch 00005: val_loss did not improve from 0.05689

Epoch 00006: val_loss did not improve from 0.05689

Epoch 00007: val_loss did not improve from 0.05689

Epoch 00008: val_loss did not improve from 0.05689

Epoch 00009: val_loss did not improve from 0.05689

Epoch 00010: val_loss did not improve from 0.05689

Epoch 00001: val_loss improved from inf to 0.06783, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06783

Epoch 00003: val_loss did not improve from 0.06783

Epoch 00004: val_loss did not improve from 0.06783

Epoch 00005: val_loss did not improve from 0.06783

Epoch 00006: val_loss did not improve from 0.06783

Epoch 00007: val_loss did not improve from 0.06783

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.06358, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.06358 to 0.05871, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.05871

Epoch 00004: val_loss did not improve from 0.05871

Epoch 00005: val_loss did not improve from 0.05871

Epoch 00006: val_loss did not improve from 0.05871

Epoch 00007: val_loss did not improve from 0.05871

Epoch 00008: val_loss did not improve from 0.05871

Epoch 00009: val_loss did not improve from 0.05871

Epoch 00010: val_loss did not improve from 0.05871




In [244]:
normalization_strategy

'mean_log'

In [245]:
resultCrossVal.to_csv("results.csv")
resultCrossVal

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
P_val,78.282,75.075,79.71,79.154,76.961,80.975,83.369,79.989,82.291,81.156
P_train,77.042,84.914,84.127,86.251,76.95,86.88,85.426,88.491,86.356,86.243
P_ewo,66.003,70.388,70.997,74.374,66.537,73.928,71.995,76.143,74.662,73.364
R_val,69.843,70.026,82.712,70.804,72.027,70.249,77.247,63.888,70.638,72.954
R_train,73.344,72.893,82.292,76.2,73.376,75.981,76.772,72.639,74.374,78.362
R_ewo,61.759,61.574,69.445,63.797,61.018,63.425,64.075,59.353,62.036,65.186
F1-val,80.521111,79.398889,80.621,73.67,82.453333,73.768,79.883,69.878,75.264,75.51
F1-train,82.86,75.831,82.792,79.549,83.132222,80.14,80.283,77.336,78.578,80.879
F1-ewo,70.371111,63.555,69.735,67.471,70.072222,67.342,67.178,64.467,65.752,67.812


In [246]:
resultCrossVal.mean(axis=1).to_frame()

Unnamed: 0,0
P_val,79.6962
P_train,84.268
P_ewo,71.8391
R_val,72.0388
R_train,75.6233
R_ewo,63.1668
F1-val,77.096733
F1-train,80.138022
F1-ewo,67.375533


In [247]:
resultCrossVal.std(axis=1).to_frame()

Unnamed: 0,0
P_val,2.476679
P_train,4.005486
P_ewo,3.409437
R_val,4.98753
R_train,3.01697
R_ewo,2.783715
F1-val,4.037503
F1-train,2.430602
F1-ewo,2.303243


In [248]:
# trainByTagResult.to_csv("results/train-by-tag.csv")
# trainByTagResult

In [249]:
# trainByTagResult.mean(axis=1).to_frame()

In [250]:
# trainByTagResult.std(axis=1).to_frame()

In [251]:
# testByTagResult.to_csv("results/test-by-tag.csv")
# testByTagResult

In [252]:
# testByTagResult.mean(axis=1).to_frame()

In [253]:
# testByTagResult.std(axis=1).to_frame()

In [254]:
# ewoByTagResult.to_csv("results/ewo-by-tag.csv")

In [255]:
# ewoByTagResult = pd.read_csv("results/ewo-by-tag.csv", index_col=0)
# ewoByTagResult

In [256]:
# ewoByTagResult.mean(axis=1).to_frame()

In [257]:
# ewoByTagResult.std(axis=1).to_frame()

In [258]:
# columns = en_fingerprints.columns

# print("Pred", "Real", "Freq", "Word", sep="\t")
# for c in columns:
#     prediction = model.predict(en_fingerprints[c].values.reshape((1, 210)))
#     pred_tag = int2tag[np.argmax(prediction)]
#     real_tag = en_corpus[en_corpus.word == c].iloc[0]['ne-tag']
    
#     if pred_tag != real_tag:
#         print(pred_tag, real_tag, en_fingerprints[c].max(), c, sep="\t")