In [325]:
# import
import keras
import sys
import numpy as np
import string
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense
from keras.utils import np_utils, plot_model
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn import model_selection
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, precision_recall_fscore_support
import h5py as h5py

In [326]:
# if we are doeing binary classification. That means say if a token is a named entity or not
BINARY = False

# number of epochs for training
epochs = 10 

# the english side of the corpus
en_corpus_file = "corpus-en.txt"

# the ewondo side of the corpus
ewo_corpus_file = "corpus-ewo.txt"

# name of the file to same the model 
best_model_file = "best-model-conll.hdfs"

# The maximal number of phrases to use
max_nb_of_phrases =  -1

# the maximal number of duplicates for each word in the corpus
duplication = 1

# wether we are using only the vocabulary, ro redundancy
is_only_vocab = True

# if word should be shuffle or not
shuffle = is_only_vocab

# the number of neurons in the first layer
h1_size = 640

# number of neurons in the second layer
h2_size = 160  

In [327]:
def getTag(aString):
    """
        convert a string to a tag
    """
    tag = "O"
    if BINARY:
        if aString != "O":
            return "NE"
    else:
        tag = aString
    return tag
     

In [328]:
def load_corpus(file, max_nb_of_phrases):
    """
    Load a corpus stored in a file
    Input:
        - file: the name of the file of the corpus
        - max_nb_of_phases: maximal number of phrases to load
    
    Return:
        - a DataFrame representing the corpus
        - the number of phrases in the corpus
    """
    nb_of_phrases = 0
    dataset = {"word": [], "ne-tag": []}
    with open(file) as f:
        prev_line = None
        for cpt, line in enumerate(f):
            if cpt == 0:
                continue
            if nb_of_phrases == max_nb_of_phrases:
                break;

            l = line.strip()
            if len(l) == 0 and len(prev_line) != 0:
                nb_of_phrases += 1
                dataset["word"].append(line)
                dataset["ne-tag"].append(None)
            else:
                l = l.split("\t")
                if l[0] not in string.punctuation:
                    dataset["word"].append(l[0])
                    dataset["ne-tag"].append(ne_type(l[1]))
            prev_line = line.strip()
        
    return pd.DataFrame(dataset), nb_of_phrases+1

In [329]:
def corpus_fingerprint(aDataframe, nb_of_biphrases):
    """
    Create the distributionnal signature of each word in the corpus
    Input:
        -aDataFrame: the corpus DataFrame
        -nb_of_biphrases: number of phrases in the corpus
    Return:
        a DataFrame: corpus fingerprint, the columns are the words in the corpus
    """
    fingerprints = {}
    current_bi_phrase_index = 0
    nb_word_in_corpus = aDataframe[aDataframe.word != "\n"].word.size
    words_in_current_phrase = []
    for index, row in aDataframe.iterrows():
        if current_bi_phrase_index > nb_of_biphrases:
            break
            
        word = row['word']
        
        if word != "\n":
            words_in_current_phrase.append(word)
            if word not in fingerprints:
                fingerprints[word] = np.zeros(nb_of_biphrases, dtype=np.float32)
            fingerprints[word][current_bi_phrase_index] += 1
        else:
            nb_word_in_current_phrase = len(words_in_current_phrase)
#             for w in words_in_current_phrase:
#                 fingerprints[w][current_bi_phrase_index] = nb_word_in_corpus / fingerprints[w][current_bi_phrase_index]                
            current_bi_phrase_index += 1
            words_in_current_phrase = []
    for word in fingerprints:
        for i in range(nb_of_biphrases):
            if fingerprints[word][i] != 0:
                fingerprints[word][i] = nb_word_in_corpus / fingerprints[word][i]
#         fingerprints[word][nb_of_biphrases] = nb_word_in_corpus / aDataframe[aDataframe.word == word].word.size
        
    return pd.DataFrame(fingerprints)

In [330]:
def corpus2trainingdata(aDataframe, fingerprintsDataFrame):
    """
    Convert corpus to training data => numpy array
    
    Input:
        -aDataFrame: Corpus dataframe
        -fingerprintsDataFrame: distributionnal signature of words in the corpus
    Return:
        (X, y): X is the array of words (signature) in the corpus and y is the corresponding labels (NE tags)
    """
    X = np.zeros((aDataframe.shape[0], fingerprintsDataFrame.shape[0]), dtype=np.int8)
    y = np.zeros(aDataframe.shape[0], dtype=np.int8)
    i = 0
    for row in aDataframe.iterrows():
        X[i] = fingerprintsDataFrame[row[1]['word']].values
        y[i] = tag2int[getTag(row[1]['ne-tag'])]
        i += 1
    return X, y

In [331]:
def train_test_split(X, y, test_size = 0.33):
    total = X.shape[0]
    train_length = round(total * (1 - test_size)) 
    return X[:train_length], X[train_length:], y[:train_length], y[train_length:]

In [332]:
# A utility function to convert NE tags
def ne_type(aType):
    aType = aType.lower()
    if 'per' in aType:
        t =  'NE' if BINARY else 'PER' 
    elif 'loc' in aType:
        t =  'NE' if BINARY else 'LOC'
    elif 'org' in aType:
        t =  'NE' if BINARY else 'ORG'
    elif 'hour' in aType:
        t =  'NE' if BINARY else 'MISC'
    elif aType != 'o' and len(aType) > 0 :
        t =  'NE' if BINARY else 'MISC'
    else:
        t = 'O'
    return t

In [333]:
def compute_performance(y_true, y_pred, words=None, BINARY=False):
    """
    Return the precision, recall, f1-score, accuracy and a dataframe comparing model predictions to ground truth
    """
    if BINARY:
        p = precision_score(y_true, y_pred, pos_label=tag2int['NE'])
        r = recall_score(y_true, y_pred, pos_label=tag2int['NE'])
        f1 = f1_score(y_true, y_pred, pos_label=tag2int['NE'])
        acc = accuracy_score(y_true, y_pred)
    else:
        p = precision_score(y_pred, y_true, average='macro')
        r = recall_score(y_pred, y_true, average='macro')
        f1 = f1_score(y_pred, y_true, average='macro')
        acc = accuracy_score(y_pred, y_true)
    if words is None:
        model_output_vs = pd.DataFrame({'y_true': [int2tag[i] for i in y_true], 'y_pred': [int2tag[i] for i in y_pred]})
    else:
        model_output_vs = pd.DataFrame({'word': words, 'y_true': [int2tag[i] for i in y_true], 'y_pred': [int2tag[i] for i in y_pred]})

    return p, r, f1, acc, model_output_vs

In [334]:
def P_R_F1(y_pred, y_true, neg_class):
    same = y_pred[y_true==y_pred]
    tp = same[same != neg_class].size
    nb_of_pos_exple = y_true[y_true != neg_class].size
    nb_of_pos_pred = y_pred[y_pred != neg_class].size
    p = r = f1 = 0
    try:
        p = np.round(tp*100/nb_of_pos_pred, 2)
    except ZeroDivisionError:
        print("number of correct positive predictions is 0")
        
    try:
        r = np.round(tp*100/nb_of_pos_exple, 2)
    except ZeroDivisionError:
        print("number of position exple is 0")
        
    try:
        f1 = np.round(2*r*p/(r+p), 2)
    except ZeroDivisionError:
        print("Recall and precision are 0")

    return p, r, f1

In [335]:
def shuffle_data(X, y):
    indices = [i for i in  range(X.shape[0])]
    np.random.shuffle(indices)
    return X[indices], y[indices]

In [336]:
def create_model(input_dim, output_dim):
    model = Sequential()
    model.add(Dense(h1_size, input_dim=input_dim, activation='sigmoid', name="hidden1"))
    model.add(Dense(h2_size, activation='sigmoid', name="hidden2"))
    if BINARY:
        model.add(Dense(1, activation='sigmoid', name="outputlayer"))
        model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['binary_accuracy'])
    else:
        model.add(Dense(output_dim, activation='softmax', name="outputlayer"))
        model.compile(loss='categorical_crossentropy', optimizer="rmsprop", metrics=['accuracy'])
    model.summary()
    return model

In [337]:
def train_model(model, X_train, y_train, X_val, y_val, epochs=epochs):
    # stop learning if the error is the same between two consecutive epochs
    early_stop = EarlyStopping(patience=20, verbose=2)
    
    # saving best model
    best_model_cp = ModelCheckpoint(best_model_file, save_best_only=True, verbose=1)
    
    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, verbose=0, shuffle=shuffle, callbacks=[best_model_cp, early_stop])
    
    #loading and returning the best model
    return keras.models.load_model(best_model_file)

In [338]:
def predict(model, X, y, binary=BINARY):
    if BINARY:
        y_pred = np.round(model.predict(X))
        y_true = y
    else:
        predictions = model.predict(X)
        y_pred = np.array([np.argmax(p) for p in predictions])
        y_true = np.array([np.argmax(t) for t in y ])
    return y_true, y_pred

In [339]:
def model_performance(y_true, y_pred):
    return P_R_F1(y_pred, y_true, tag2int['O']) #precision, recall, f1-score

In [340]:
def model_performace_by_tag(y_true, y_pred, tag):
    p, r, f1 = 0, 0, 0
    
    eq = y_pred[y_pred==y_true]
    correctly_pred = eq[eq==tag].size
    try:
        p = np.round(100 * correctly_pred / y_pred[y_pred==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        r = np.round(100 * correctly_pred / y_true[y_true==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        f1 = np.round(2 * r * p / (r + p), 2)
    except ZeroDivisionError:
        pass
    
    return p, r, f1

In [341]:
def algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, epochs=epochs, model=None):
    """
    Train a model on (X, y) and validate on (X_val, y_val) then project on (X_ewo)
    """
    test_precision, train_precision, ewo_precision = [], [], []
    test_recall, train_recall, ewo_recall = [], [], []
    test_fscore, train_fscore, ewo_fscore = [], [], []
    
    test_result_by_tag = {}
    train_result_by_tag = {}
    ewo_result_by_tag = {}
    for t in tagSet:
        f1_key = "F1-"+t
        p_key = "P-"+t
        r_key = "R-"+t
        train_result_by_tag[f1_key], train_result_by_tag[p_key], train_result_by_tag[r_key] = [], [], []
        test_result_by_tag[f1_key], test_result_by_tag[p_key], test_result_by_tag[r_key] = [], [], []
        ewo_result_by_tag[f1_key], ewo_result_by_tag[p_key], ewo_result_by_tag[r_key] = [], [], []

    m = train_model(model, X_train, y_train, X_val, y_val, epochs=epochs)
        
    y_true, y_pred = predict(m, X_train, y_train)
    p_train, r_train, f1_train = model_performance(y_true, y_pred)
        
    y_true_val, y_pred_val = predict(m, X_val, y_val)
    p_val, r_val, f1_val = model_performance(y_true_val, y_pred_val)
        
    y_true_ewo, y_pred_ewo = predict(m, X_ewo, y_ewo) 
    p_ewo, r_ewo, f1_ewo = model_performance(y_true_ewo, y_pred_ewo)
        
    for t in range(len(int2tag)):
        f1_key = "F1-" + int2tag[t]
        p_key = "P-" + int2tag[t]
        r_key = "R-" + int2tag[t]
            
        p, r, f1 = model_performace_by_tag(y_true, y_pred, t)
        train_result_by_tag[p_key].append(p)
        train_result_by_tag[r_key].append(r)
        train_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_val, y_pred_val, t)
        test_result_by_tag[p_key].append(p)
        test_result_by_tag[r_key].append(r)
        test_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_ewo, y_pred_ewo, t)
        ewo_result_by_tag[p_key].append(p)
        ewo_result_by_tag[r_key].append(r)
        ewo_result_by_tag[f1_key].append(f1)
                
    test_precision.append(p_val)
    train_precision.append(p_train)
    ewo_precision.append(p_ewo)
        
    test_recall.append(r_val)
    train_recall.append(r_train)
    ewo_recall.append(r_ewo)
        
    test_fscore.append(f1_val)
    train_fscore.append(f1_train)
    ewo_fscore.append(f1_ewo)
    return pd.DataFrame({
        'P_test': test_precision, 
        'P_train': train_precision, 
        'P_ewo': ewo_precision, 'R_test': test_recall, 'R_train': train_recall, 
        'R_ewo': ewo_recall, 'F1-test': test_fscore, 'F1-train': train_fscore, 'F1-ewo': ewo_fscore}), pd.DataFrame(train_result_by_tag), pd.DataFrame(test_result_by_tag), pd.DataFrame(ewo_result_by_tag)

In [342]:
def algoCrossVal(X, y, X_ewo, y_ewo, k = 10, repeat=1): 
    """
    Traing a model with k-fold cross validation
    """
    block_size = int(X.shape[0] / k)   
    output = None
    model = None
    train_by_tags, test_by_tags, ewo_by_tags = None, None, None
    for it in range(repeat):
        print("AlgoCrossValIter -", it+1)
        model = create_model(X.shape[1], len(tagSet))
        results = None
        train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = None, None, None
        for i in range(k):
            X_val, y_val = X[i*block_size:i*block_size+block_size], y[i*block_size:i*block_size+block_size]
            X_train = np.concatenate((X[0:i*block_size], X[i*block_size+block_size:]))
            y_train = np.concatenate((y[0:i*block_size], y[i*block_size+block_size:]))

            X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
            X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])

            result, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)
            if results is None:
                results = result.copy()
                train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = train_by_tag.copy(), test_by_tag.copy(), ewo_by_tag.copy()
            else:
                results = pd.concat([results, result], ignore_index=True)
                train_by_tagsTmp = pd.concat([train_by_tagsTmp, train_by_tag], ignore_index=True)
                test_by_tagsTmp = pd.concat([test_by_tagsTmp, test_by_tag], ignore_index=True)
                ewo_by_tagsTmp = pd.concat([ewo_by_tagsTmp, ewo_by_tag], ignore_index=True)
        
        if output is None:
            output = results.mean(axis=0).to_frame()
            train_by_tags = train_by_tagsTmp.mean(axis=0).to_frame()
            test_by_tags = test_by_tagsTmp.mean(axis=0).to_frame()
            ewo_by_tags = ewo_by_tagsTmp.mean(axis=0).to_frame()
        else:
            output = pd.concat([output, results.mean(axis=0).to_frame()], axis=1)
            train_by_tags = pd.concat([train_by_tags, train_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            test_by_tags = pd.concat([test_by_tags, test_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            ewo_by_tags = pd.concat([ewo_by_tags, ewo_by_tagsTmp.mean(axis=0).to_frame()], axis=1)

    return output, train_by_tags, test_by_tags, ewo_by_tags, model

In [343]:
en_corpus, en_nb_of_phrases = load_corpus(en_corpus_file, max_nb_of_phrases)

In [344]:
en_corpus.head()
en_corpus.loc[en_corpus['ne-tag'] == 'ORG']

Unnamed: 0,word,ne-tag
1335,Sadducees,ORG


In [345]:
tagSet = en_corpus["ne-tag"].dropna().unique()
if BINARY:
    tagSet = ['NE', 'O']
tag2int = {j: i for i, j in enumerate(tagSet)}
int2tag = {i: j for i, j in enumerate(tagSet)}
print(tag2int)

{'O': 0, 'MISC': 1, 'PER': 2, 'LOC': 3, 'ORG': 4}


In [346]:
en_nb_of_phrases

210

In [347]:
en_corpus.describe()

Unnamed: 0,word,ne-tag
count,4379,4170
unique,904,5
top,the,O
freq,313,3779


In [348]:
en_corpus.head(10)

Unnamed: 0,word,ne-tag
0,The,O
1,Promise,O
2,of,O
3,the,O
4,Holy,MISC
5,Spirit,MISC
6,\n,
7,In,O
8,the,O
9,first,O


In [349]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(en_corpus[en_corpus['ne-tag']==tag].shape[0] * 100 / en_corpus[en_corpus['ne-tag']!='\n'].shape[0], 2)))

O % = 86.3 %
MISC % = 2.4 %
PER % = 5.59 %
LOC % = 0.91 %
ORG % = 0.02 %


In [350]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(en_corpus[en_corpus['ne-tag']==tag].word.unique().shape[0] * 100 / en_corpus[en_corpus['ne-tag']!='\n'].word.unique().shape[0], 2)))

O % = 89.16 %
MISC % = 1.88 %
PER % = 8.96 %
LOC % = 1.99 %
ORG % = 0.11 %


In [351]:
en_corpus[en_corpus.word == "\n"].shape

(209, 2)

In [352]:
print("Nb of bi-phrases", en_nb_of_phrases)
en_fingerprints = corpus_fingerprint(en_corpus, en_nb_of_phrases)

Nb of bi-phrases 210


In [353]:
en_fingerprints.head(5)

Unnamed: 0,The,Promise,of,the,Holy,Spirit,In,first,book,O,...,considered,dream,She,save,fulfill,Immanuel,us),woke,sleep,knew
0,4170.0,4170.0,4170.0,4170.0,4170.0,4170.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,4170.0,0.0,0.0,4170.0,4170.0,4170.0,4170.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,1390.0,4170.0,4170.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,4170.0,4170.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,4170.0,2085.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [354]:
en_fingerprints['you'].values.shape

(210,)

In [355]:
en_corpus[en_corpus.word != "\n"].shape

(4170, 2)

In [356]:
if is_only_vocab:
    text = list(en_corpus[en_corpus.word != "\n"].word.unique())
else:
    text = list(en_corpus[en_corpus.word != "\n"].word)
en_vocab = pd.DataFrame({'text': text})
en_vocab.describe()

Unnamed: 0,text
count,903
unique,903
top,rather
freq,1


In [357]:
if is_only_vocab:
    X = np.zeros((en_vocab.shape[0] * duplication, en_nb_of_phrases))
    target = np.zeros((en_vocab.shape[0] * duplication))
    p=0
    for i, row in en_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X[p] = en_fingerprints[c.split(" ")[0]]
            target[p] = tag2int[getTag(en_corpus[en_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X, target = shuffle_data(X, target)
    print(X.shape, en_fingerprints.shape, target.shape)

(903, 210) (210, 903) (903,)


In [358]:
en_vocab[-20:]

Unnamed: 0,text
883,Eliud
884,Eleazar
885,Matthan
886,husband
887,fourteen
888,unwilling
889,shame
890,resolved
891,divorce
892,quietly


In [359]:
if not is_only_vocab:
    X, target = corpus2trainingdata(en_corpus[en_corpus.word != "\n"], en_fingerprints)

In [360]:
if shuffle:
    X, target = shuffle_data(X, target)

In [361]:
y = target.copy()
y[0:100]
if not BINARY:
    y = np_utils.to_categorical(y, len(tagSet))
y.shape

(903, 5)

In [362]:
from sklearn.decomposition import PCA

def visualize(X, y):
    pca = PCA(n_components=2)
    X_embeded = pca.fit_transform(X)
    plt.figure(figsize=(5, 5))
    plt.scatter(X_embeded[:, 0], X_embeded[:, 1], c=y)
    plt.legend()
    plt.show()

In [363]:
# visualize(X, target)

In [364]:
X_train, X_val, y_train, y_val = model_selection.train_test_split(X, y, test_size=0.33)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])
print("X_train.shape =", X_train.shape)
print("y_train.shape =", y_train.shape)
print("X_val.shape =", X_val.shape)
print("y_val.shape =", y_val.shape)

tTarget = np.array([np.argmax(yy) for yy in y_train])
vTarget = np.array([np.argmax(yy) for yy in y_val])

for tag in tagSet:
    print("{0} % in training data = {1} %".format(tag, np.round(tTarget[tTarget==tag2int[tag]].size * 100 / tTarget.shape[0], 2)))
    print("{0} % in validation data = {1} %".format(tag, np.round(vTarget[vTarget==tag2int[tag]].size * 100 / vTarget.shape[0], 2)))

X_train.shape = (605, 210)
y_train.shape = (605, 5)
X_val.shape = (298, 210)
y_val.shape = (298, 5)
O % in training data = 86.28 %
O % in validation data = 92.28 %
MISC % in training data = 1.49 %
MISC % in validation data = 0.67 %
PER % in training data = 9.75 %
PER % in validation data = 6.04 %
LOC % in training data = 2.31 %
LOC % in validation data = 1.01 %
ORG % in training data = 0.17 %
ORG % in validation data = 0.0 %


In [365]:
ewo_corpus, ewo_nb_of_phrases = load_corpus(ewo_corpus_file, max_nb_of_phrases)

In [366]:
ewo_corpus.loc[ewo_corpus['ne-tag'] == 'PER']

Unnamed: 0,word,ne-tag
6,Teofil,PER
15,Yesus,PER
86,Yohannes,PER
104,Yesus,PER
230,Yesus,PER
...,...,...
3676,Maria,PER
3697,Yesus,PER
3740,Emmanuel,PER
3750,Yosef,PER


In [367]:
ewo_nb_of_phrases

210

In [368]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(ewo_corpus[ewo_corpus['ne-tag']==tag].shape[0] * 100 / ewo_corpus[ewo_corpus['ne-tag']!='\n'].shape[0], 2)))

O % = 84.15 %
MISC % = 2.54 %
PER % = 6.69 %
LOC % = 1.03 %
ORG % = 0.05 %


In [369]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(ewo_corpus[ewo_corpus['ne-tag']==tag].word.unique().shape[0] * 100 / ewo_corpus[ewo_corpus['ne-tag']!='\n'].word.unique().shape[0], 2)))

O % = 89.94 %
MISC % = 1.17 %
PER % = 8.3 %
LOC % = 1.86 %
ORG % = 0.2 %


In [370]:
ewo_corpus.describe()

Unnamed: 0,word,ne-tag
count,3779,3570
unique,1024,5
top,\n,O
freq,209,3180


In [371]:
ewo_corpus.head()

Unnamed: 0,word,ne-tag
0,Mfufub,MISC
1,Nsisim,MISC
2,ayi,O
3,sò,O
4,\n,


In [372]:
ewo_fingerprints = corpus_fingerprint(ewo_corpus, en_nb_of_phrases)

In [373]:
if is_only_vocab:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word.unique())
else:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word)
ewo_vocab = pd.DataFrame({"text":text})

In [374]:
if is_only_vocab:
    X_ewo = np.zeros((ewo_vocab.shape[0] * duplication, en_nb_of_phrases))
    ewo_target = np.zeros((ewo_vocab.shape[0] * duplication))
    p=0
    for i, row in ewo_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X_ewo[p] = ewo_fingerprints[c.split(" ")[0]]
            ewo_target[p] = tag2int[getTag(ewo_corpus[ewo_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [375]:
ewo_vocab[-10:]

Unnamed: 0,text
1013,sik
1014,Ntud
1015,bëyole
1016,Emmanuel
1017,Avëbë
1018,angavëbë
1019,oyò
1020,angabende
1021,anganòṅ
1022,angayole


In [376]:
if not is_only_vocab:
    X_ewo, ewo_target = corpus2trainingdata(ewo_corpus[ewo_corpus.word != "\n"], ewo_fingerprints)

In [377]:
if shuffle:
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [378]:
y_ewo = ewo_target.copy()
print(y_ewo.shape, len(ewo_vocab))

(1023,) 1023


In [379]:
X_ewo.shape

(1023, 210)

In [380]:
y_ewo = ewo_target.copy()
y_ewo[:20]
if not BINARY:
    y_ewo = np_utils.to_categorical(y_ewo)

In [381]:
X_ewo = X_ewo.reshape((X_ewo.shape[0], en_nb_of_phrases))

In [382]:
# model = create_model(X.shape[1], len(tagSet))
# resultEval, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)

In [383]:
# resultEval

In [384]:
# train_by_tag

In [385]:
# test_by_tag

In [386]:
# ewo_by_tag

In [387]:
# resultEval.mean()

In [388]:
# resultEval.std()

In [310]:
resultCrossVal, trainByTagResult, testByTagResult, ewoByTagResult, model = algoCrossVal(X, y, X_ewo, y_ewo, repeat=10)

AlgoCrossValIter - 1
Model: "sequential_31"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.55556, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.55556 to 0.54094, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.54094 to 0.44100, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.44100

Epoch 00005: val_loss 




Epoch 00001: val_loss improved from inf to 0.15547, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.15547 to 0.07253, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07253

Epoch 00004: val_loss did not improve from 0.07253

Epoch 00005: val_loss did not improve from 0.07253

Epoch 00006: val_loss did not improve from 0.07253

Epoch 00007: val_loss did not improve from 0.07253

Epoch 00008: val_loss did not improve from 0.07253

Epoch 00009: val_loss did not improve from 0.07253

Epoch 00010: val_loss did not improve from 0.07253





Epoch 00001: val_loss improved from inf to 0.07330, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07330

Epoch 00003: val_loss did not improve from 0.07330

Epoch 00004: val_loss did not improve from 0.07330

Epoch 00005: val_loss did not improve from 0.07330

Epoch 00006: val_loss did not improve from 0.07330

Epoch 00007: val_loss did not improve from 0.07330

Epoch 00008: val_loss did not improve from 0.07330

Epoch 00009: val_loss did not improve from 0.07330

Epoch 00010: val_loss did not improve from 0.07330





Epoch 00001: val_loss improved from inf to 0.12933, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12933

Epoch 00003: val_loss did not improve from 0.12933

Epoch 00004: val_loss did not improve from 0.12933

Epoch 00005: val_loss did not improve from 0.12933

Epoch 00006: val_loss did not improve from 0.12933

Epoch 00007: val_loss did not improve from 0.12933

Epoch 00008: val_loss did not improve from 0.12933

Epoch 00009: val_loss did not improve from 0.12933

Epoch 00010: val_loss did not improve from 0.12933





Epoch 00001: val_loss improved from inf to 0.05774, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05774

Epoch 00003: val_loss did not improve from 0.05774

Epoch 00004: val_loss did not improve from 0.05774

Epoch 00005: val_loss did not improve from 0.05774

Epoch 00006: val_loss did not improve from 0.05774

Epoch 00007: val_loss did not improve from 0.05774

Epoch 00008: val_loss did not improve from 0.05774

Epoch 00009: val_loss did not improve from 0.05774

Epoch 00010: val_loss did not improve from 0.05774





Epoch 00001: val_loss improved from inf to 0.06496, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.06496 to 0.06457, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06457

Epoch 00004: val_loss did not improve from 0.06457

Epoch 00005: val_loss did not improve from 0.06457

Epoch 00006: val_loss did not improve from 0.06457

Epoch 00007: val_loss did not improve from 0.06457

Epoch 00008: val_loss did not improve from 0.06457

Epoch 00009: val_loss did not improve from 0.06457

Epoch 00010: val_loss did not improve from 0.06457

Epoch 00001: val_loss improved from inf to 0.03705, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.03705 to 0.03108, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.03108

Epoch 00004: val_loss did not improve from 0.03108

Epoch 00005: val_loss did not improve from 0.03108

Epoch 00006: val_loss did not improve from 0.03108

Epo




Epoch 00001: val_loss improved from inf to 0.11610, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11610

Epoch 00003: val_loss did not improve from 0.11610

Epoch 00004: val_loss did not improve from 0.11610

Epoch 00005: val_loss did not improve from 0.11610

Epoch 00006: val_loss did not improve from 0.11610

Epoch 00007: val_loss did not improve from 0.11610

Epoch 00008: val_loss did not improve from 0.11610

Epoch 00009: val_loss did not improve from 0.11610

Epoch 00010: val_loss did not improve from 0.11610

Epoch 00001: val_loss improved from inf to 0.10420, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10420

Epoch 00003: val_loss did not improve from 0.10420

Epoch 00004: val_loss did not improve from 0.10420

Epoch 00005: val_loss did not improve from 0.10420

Epoch 00006: val_loss did not improve from 0.10420

Epoch 00007: val_loss did not improve from 0.10420

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.07788, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07788

Epoch 00003: val_loss did not improve from 0.07788

Epoch 00004: val_loss improved from 0.07788 to 0.07625, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.07625

Epoch 00006: val_loss did not improve from 0.07625

Epoch 00007: val_loss did not improve from 0.07625

Epoch 00008: val_loss did not improve from 0.07625

Epoch 00009: val_loss did not improve from 0.07625

Epoch 00010: val_loss did not improve from 0.07625





Epoch 00001: val_loss improved from inf to 0.08130, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08130

Epoch 00003: val_loss did not improve from 0.08130

Epoch 00004: val_loss did not improve from 0.08130

Epoch 00005: val_loss did not improve from 0.08130

Epoch 00006: val_loss did not improve from 0.08130

Epoch 00007: val_loss did not improve from 0.08130

Epoch 00008: val_loss did not improve from 0.08130

Epoch 00009: val_loss did not improve from 0.08130

Epoch 00010: val_loss did not improve from 0.08130





Epoch 00001: val_loss improved from inf to 0.13400, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.13400

Epoch 00003: val_loss did not improve from 0.13400

Epoch 00004: val_loss did not improve from 0.13400

Epoch 00005: val_loss did not improve from 0.13400

Epoch 00006: val_loss did not improve from 0.13400

Epoch 00007: val_loss did not improve from 0.13400

Epoch 00008: val_loss did not improve from 0.13400

Epoch 00009: val_loss did not improve from 0.13400

Epoch 00010: val_loss did not improve from 0.13400





Epoch 00001: val_loss improved from inf to 0.06349, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06349

Epoch 00003: val_loss did not improve from 0.06349

Epoch 00004: val_loss did not improve from 0.06349

Epoch 00005: val_loss did not improve from 0.06349

Epoch 00006: val_loss did not improve from 0.06349

Epoch 00007: val_loss did not improve from 0.06349

Epoch 00008: val_loss did not improve from 0.06349

Epoch 00009: val_loss did not improve from 0.06349

Epoch 00010: val_loss did not improve from 0.06349





Epoch 00001: val_loss improved from inf to 0.08019, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.08019 to 0.05703, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.05703

Epoch 00004: val_loss did not improve from 0.05703

Epoch 00005: val_loss did not improve from 0.05703

Epoch 00006: val_loss did not improve from 0.05703

Epoch 00007: val_loss did not improve from 0.05703

Epoch 00008: val_loss did not improve from 0.05703

Epoch 00009: val_loss did not improve from 0.05703

Epoch 00010: val_loss did not improve from 0.05703

Epoch 00001: val_loss improved from inf to 0.03460, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03460

Epoch 00003: val_loss did not improve from 0.03460

Epoch 00004: val_loss did not improve from 0.03460

Epoch 00005: val_loss did not improve from 0.03460

Epoch 00006: val_loss did not improve from 0.03460

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.11626, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11626

Epoch 00003: val_loss did not improve from 0.11626

Epoch 00004: val_loss did not improve from 0.11626

Epoch 00005: val_loss did not improve from 0.11626

Epoch 00006: val_loss did not improve from 0.11626

Epoch 00007: val_loss did not improve from 0.11626

Epoch 00008: val_loss did not improve from 0.11626

Epoch 00009: val_loss did not improve from 0.11626

Epoch 00010: val_loss did not improve from 0.11626

Epoch 00001: val_loss improved from inf to 0.09598, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09598

Epoch 00003: val_loss did not improve from 0.09598

Epoch 00004: val_loss did not improve from 0.09598

Epoch 00005: val_loss did not improve from 0.09598

Epoch 00006: val_loss did not improve from 0.09598

Epoch 00007: val_loss did not improve from 0.09598

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.07657, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07657

Epoch 00003: val_loss did not improve from 0.07657

Epoch 00004: val_loss did not improve from 0.07657

Epoch 00005: val_loss did not improve from 0.07657

Epoch 00006: val_loss did not improve from 0.07657

Epoch 00007: val_loss did not improve from 0.07657

Epoch 00008: val_loss did not improve from 0.07657

Epoch 00009: val_loss did not improve from 0.07657

Epoch 00010: val_loss did not improve from 0.07657





Epoch 00001: val_loss improved from inf to 0.06115, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06115

Epoch 00003: val_loss did not improve from 0.06115

Epoch 00004: val_loss did not improve from 0.06115

Epoch 00005: val_loss did not improve from 0.06115

Epoch 00006: val_loss did not improve from 0.06115

Epoch 00007: val_loss did not improve from 0.06115

Epoch 00008: val_loss did not improve from 0.06115

Epoch 00009: val_loss did not improve from 0.06115

Epoch 00010: val_loss did not improve from 0.06115





Epoch 00001: val_loss improved from inf to 0.12084, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12084

Epoch 00003: val_loss did not improve from 0.12084

Epoch 00004: val_loss did not improve from 0.12084

Epoch 00005: val_loss did not improve from 0.12084

Epoch 00006: val_loss did not improve from 0.12084

Epoch 00007: val_loss did not improve from 0.12084

Epoch 00008: val_loss did not improve from 0.12084

Epoch 00009: val_loss did not improve from 0.12084

Epoch 00010: val_loss did not improve from 0.12084





Epoch 00001: val_loss improved from inf to 0.04899, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04899

Epoch 00003: val_loss did not improve from 0.04899

Epoch 00004: val_loss did not improve from 0.04899

Epoch 00005: val_loss did not improve from 0.04899

Epoch 00006: val_loss did not improve from 0.04899

Epoch 00007: val_loss did not improve from 0.04899

Epoch 00008: val_loss did not improve from 0.04899

Epoch 00009: val_loss did not improve from 0.04899

Epoch 00010: val_loss did not improve from 0.04899





Epoch 00001: val_loss improved from inf to 0.06382, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06382

Epoch 00003: val_loss did not improve from 0.06382

Epoch 00004: val_loss did not improve from 0.06382

Epoch 00005: val_loss did not improve from 0.06382

Epoch 00006: val_loss did not improve from 0.06382

Epoch 00007: val_loss did not improve from 0.06382

Epoch 00008: val_loss did not improve from 0.06382

Epoch 00009: val_loss did not improve from 0.06382

Epoch 00010: val_loss did not improve from 0.06382





Epoch 00001: val_loss improved from inf to 0.04395, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.04395 to 0.03103, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.03103

Epoch 00004: val_loss did not improve from 0.03103

Epoch 00005: val_loss did not improve from 0.03103

Epoch 00006: val_loss did not improve from 0.03103

Epoch 00007: val_loss did not improve from 0.03103

Epoch 00008: val_loss did not improve from 0.03103

Epoch 00009: val_loss did not improve from 0.03103

Epoch 00010: val_loss did not improve from 0.03103





Epoch 00001: val_loss improved from inf to 0.04511, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04511

Epoch 00003: val_loss did not improve from 0.04511

Epoch 00004: val_loss did not improve from 0.04511

Epoch 00005: val_loss did not improve from 0.04511

Epoch 00006: val_loss did not improve from 0.04511

Epoch 00007: val_loss did not improve from 0.04511

Epoch 00008: val_loss did not improve from 0.04511

Epoch 00009: val_loss did not improve from 0.04511

Epoch 00010: val_loss did not improve from 0.04511

Epoch 00001: val_loss improved from inf to 0.12163, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12163

Epoch 00003: val_loss did not improve from 0.12163

Epoch 00004: val_loss did not improve from 0.12163

Epoch 00005: val_loss did not improve from 0.12163

Epoch 00006: val_loss did not improve from 0.12163

Epoch 00007: val_loss did not improve from 0.12163

Epoch 00008: val_loss did not improve



AlgoCrossValIter - 4
Model: "sequential_34"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.57408, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.57408

Epoch 00003: val_loss improved from 0.57408 to 0.49558, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.49558 to 0.44804, saving model to best-model-conll.hdfs

Epoch 00005: val_loss 




Epoch 00001: val_loss improved from inf to 0.10512, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10512

Epoch 00003: val_loss did not improve from 0.10512

Epoch 00004: val_loss improved from 0.10512 to 0.10138, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.10138

Epoch 00006: val_loss did not improve from 0.10138

Epoch 00007: val_loss did not improve from 0.10138

Epoch 00008: val_loss did not improve from 0.10138

Epoch 00009: val_loss did not improve from 0.10138

Epoch 00010: val_loss did not improve from 0.10138





Epoch 00001: val_loss improved from inf to 0.06598, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06598

Epoch 00003: val_loss did not improve from 0.06598

Epoch 00004: val_loss did not improve from 0.06598

Epoch 00005: val_loss did not improve from 0.06598

Epoch 00006: val_loss did not improve from 0.06598

Epoch 00007: val_loss did not improve from 0.06598

Epoch 00008: val_loss did not improve from 0.06598

Epoch 00009: val_loss did not improve from 0.06598

Epoch 00010: val_loss did not improve from 0.06598





Epoch 00001: val_loss improved from inf to 0.16399, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.16399

Epoch 00003: val_loss improved from 0.16399 to 0.15565, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.15565

Epoch 00005: val_loss did not improve from 0.15565

Epoch 00006: val_loss did not improve from 0.15565

Epoch 00007: val_loss did not improve from 0.15565

Epoch 00008: val_loss did not improve from 0.15565

Epoch 00009: val_loss did not improve from 0.15565

Epoch 00010: val_loss did not improve from 0.15565





Epoch 00001: val_loss improved from inf to 0.05626, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05626

Epoch 00003: val_loss did not improve from 0.05626

Epoch 00004: val_loss did not improve from 0.05626

Epoch 00005: val_loss did not improve from 0.05626

Epoch 00006: val_loss did not improve from 0.05626

Epoch 00007: val_loss did not improve from 0.05626

Epoch 00008: val_loss did not improve from 0.05626

Epoch 00009: val_loss did not improve from 0.05626

Epoch 00010: val_loss did not improve from 0.05626





Epoch 00001: val_loss improved from inf to 0.05715, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05715

Epoch 00003: val_loss did not improve from 0.05715

Epoch 00004: val_loss did not improve from 0.05715

Epoch 00005: val_loss did not improve from 0.05715

Epoch 00006: val_loss did not improve from 0.05715

Epoch 00007: val_loss did not improve from 0.05715

Epoch 00008: val_loss did not improve from 0.05715

Epoch 00009: val_loss did not improve from 0.05715

Epoch 00010: val_loss did not improve from 0.05715

Epoch 00001: val_loss improved from inf to 0.03333, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03333

Epoch 00003: val_loss improved from 0.03333 to 0.03209, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.03209

Epoch 00005: val_loss did not improve from 0.03209

Epoch 00006: val_loss did not improve from 0.03209

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.04646, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04646

Epoch 00003: val_loss did not improve from 0.04646

Epoch 00004: val_loss did not improve from 0.04646

Epoch 00005: val_loss did not improve from 0.04646

Epoch 00006: val_loss did not improve from 0.04646

Epoch 00007: val_loss did not improve from 0.04646

Epoch 00008: val_loss did not improve from 0.04646

Epoch 00009: val_loss did not improve from 0.04646

Epoch 00010: val_loss did not improve from 0.04646

Epoch 00001: val_loss improved from inf to 0.13988, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.13988

Epoch 00003: val_loss did not improve from 0.13988

Epoch 00004: val_loss did not improve from 0.13988

Epoch 00005: val_loss did not improve from 0.13988

Epoch 00006: val_loss did not improve from 0.13988

Epoch 00007: val_loss did not improve from 0.13988

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.09761, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09761

Epoch 00003: val_loss did not improve from 0.09761

Epoch 00004: val_loss did not improve from 0.09761

Epoch 00005: val_loss did not improve from 0.09761

Epoch 00006: val_loss did not improve from 0.09761

Epoch 00007: val_loss did not improve from 0.09761

Epoch 00008: val_loss did not improve from 0.09761

Epoch 00009: val_loss did not improve from 0.09761

Epoch 00010: val_loss did not improve from 0.09761
AlgoCrossValIter - 5
Model: "sequential_35"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
o




Epoch 00001: val_loss improved from inf to 0.08157, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08157

Epoch 00003: val_loss improved from 0.08157 to 0.08059, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.08059

Epoch 00005: val_loss did not improve from 0.08059

Epoch 00006: val_loss did not improve from 0.08059

Epoch 00007: val_loss did not improve from 0.08059

Epoch 00008: val_loss did not improve from 0.08059

Epoch 00009: val_loss did not improve from 0.08059

Epoch 00010: val_loss did not improve from 0.08059





Epoch 00001: val_loss improved from inf to 0.07634, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07634

Epoch 00003: val_loss did not improve from 0.07634

Epoch 00004: val_loss did not improve from 0.07634

Epoch 00005: val_loss did not improve from 0.07634

Epoch 00006: val_loss did not improve from 0.07634

Epoch 00007: val_loss did not improve from 0.07634

Epoch 00008: val_loss did not improve from 0.07634

Epoch 00009: val_loss did not improve from 0.07634

Epoch 00010: val_loss did not improve from 0.07634





Epoch 00001: val_loss improved from inf to 0.10627, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10627

Epoch 00003: val_loss did not improve from 0.10627

Epoch 00004: val_loss did not improve from 0.10627

Epoch 00005: val_loss did not improve from 0.10627

Epoch 00006: val_loss did not improve from 0.10627

Epoch 00007: val_loss did not improve from 0.10627

Epoch 00008: val_loss did not improve from 0.10627

Epoch 00009: val_loss did not improve from 0.10627

Epoch 00010: val_loss did not improve from 0.10627





Epoch 00001: val_loss improved from inf to 0.07735, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07735 to 0.06628, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06628

Epoch 00004: val_loss did not improve from 0.06628

Epoch 00005: val_loss did not improve from 0.06628

Epoch 00006: val_loss did not improve from 0.06628

Epoch 00007: val_loss did not improve from 0.06628

Epoch 00008: val_loss did not improve from 0.06628

Epoch 00009: val_loss did not improve from 0.06628

Epoch 00010: val_loss did not improve from 0.06628





Epoch 00001: val_loss improved from inf to 0.06131, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06131

Epoch 00003: val_loss did not improve from 0.06131

Epoch 00004: val_loss did not improve from 0.06131

Epoch 00005: val_loss did not improve from 0.06131

Epoch 00006: val_loss did not improve from 0.06131

Epoch 00007: val_loss did not improve from 0.06131

Epoch 00008: val_loss did not improve from 0.06131

Epoch 00009: val_loss did not improve from 0.06131

Epoch 00010: val_loss did not improve from 0.06131





Epoch 00001: val_loss improved from inf to 0.03007, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.03007 to 0.02753, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.02753

Epoch 00004: val_loss did not improve from 0.02753

Epoch 00005: val_loss did not improve from 0.02753

Epoch 00006: val_loss did not improve from 0.02753

Epoch 00007: val_loss did not improve from 0.02753

Epoch 00008: val_loss did not improve from 0.02753

Epoch 00009: val_loss did not improve from 0.02753

Epoch 00010: val_loss did not improve from 0.02753





Epoch 00001: val_loss improved from inf to 0.04730, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04730

Epoch 00003: val_loss did not improve from 0.04730

Epoch 00004: val_loss did not improve from 0.04730

Epoch 00005: val_loss did not improve from 0.04730

Epoch 00006: val_loss did not improve from 0.04730

Epoch 00007: val_loss did not improve from 0.04730

Epoch 00008: val_loss did not improve from 0.04730

Epoch 00009: val_loss did not improve from 0.04730

Epoch 00010: val_loss did not improve from 0.04730

Epoch 00001: val_loss improved from inf to 0.14343, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.14343

Epoch 00003: val_loss improved from 0.14343 to 0.13549, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.13549

Epoch 00005: val_loss did not improve from 0.13549

Epoch 00006: val_loss did not improve from 0.13549

Epoch 00007: val_loss did not improve from 0.1




Epoch 00001: val_loss improved from inf to 0.09396, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09396

Epoch 00003: val_loss did not improve from 0.09396

Epoch 00004: val_loss did not improve from 0.09396

Epoch 00005: val_loss did not improve from 0.09396

Epoch 00006: val_loss did not improve from 0.09396

Epoch 00007: val_loss did not improve from 0.09396

Epoch 00008: val_loss did not improve from 0.09396

Epoch 00009: val_loss did not improve from 0.09396

Epoch 00010: val_loss did not improve from 0.09396





Epoch 00001: val_loss improved from inf to 0.13781, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.13781 to 0.07376, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07376

Epoch 00004: val_loss did not improve from 0.07376

Epoch 00005: val_loss did not improve from 0.07376

Epoch 00006: val_loss did not improve from 0.07376

Epoch 00007: val_loss did not improve from 0.07376

Epoch 00008: val_loss did not improve from 0.07376

Epoch 00009: val_loss did not improve from 0.07376

Epoch 00010: val_loss did not improve from 0.07376





Epoch 00001: val_loss improved from inf to 0.12217, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12217

Epoch 00003: val_loss did not improve from 0.12217

Epoch 00004: val_loss did not improve from 0.12217

Epoch 00005: val_loss did not improve from 0.12217

Epoch 00006: val_loss did not improve from 0.12217

Epoch 00007: val_loss did not improve from 0.12217

Epoch 00008: val_loss did not improve from 0.12217

Epoch 00009: val_loss did not improve from 0.12217

Epoch 00010: val_loss did not improve from 0.12217





Epoch 00001: val_loss improved from inf to 0.05437, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05437

Epoch 00003: val_loss did not improve from 0.05437

Epoch 00004: val_loss did not improve from 0.05437

Epoch 00005: val_loss did not improve from 0.05437

Epoch 00006: val_loss did not improve from 0.05437

Epoch 00007: val_loss did not improve from 0.05437

Epoch 00008: val_loss did not improve from 0.05437

Epoch 00009: val_loss did not improve from 0.05437

Epoch 00010: val_loss did not improve from 0.05437





Epoch 00001: val_loss improved from inf to 0.05405, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05405

Epoch 00003: val_loss did not improve from 0.05405

Epoch 00004: val_loss did not improve from 0.05405

Epoch 00005: val_loss did not improve from 0.05405

Epoch 00006: val_loss did not improve from 0.05405

Epoch 00007: val_loss did not improve from 0.05405

Epoch 00008: val_loss did not improve from 0.05405

Epoch 00009: val_loss did not improve from 0.05405

Epoch 00010: val_loss did not improve from 0.05405

Epoch 00001: val_loss improved from inf to 0.03359, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03359

Epoch 00003: val_loss did not improve from 0.03359

Epoch 00004: val_loss did not improve from 0.03359

Epoch 00005: val_loss did not improve from 0.03359

Epoch 00006: val_loss did not improve from 0.03359

Epoch 00007: val_loss did not improve from 0.03359

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.10008, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10008

Epoch 00003: val_loss did not improve from 0.10008

Epoch 00004: val_loss did not improve from 0.10008

Epoch 00005: val_loss did not improve from 0.10008

Epoch 00006: val_loss did not improve from 0.10008

Epoch 00007: val_loss did not improve from 0.10008

Epoch 00008: val_loss did not improve from 0.10008

Epoch 00009: val_loss did not improve from 0.10008

Epoch 00010: val_loss did not improve from 0.10008
AlgoCrossValIter - 7
Model: "sequential_37"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
o




Epoch 00001: val_loss improved from inf to 0.12230, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12230

Epoch 00003: val_loss improved from 0.12230 to 0.07619, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.07619

Epoch 00005: val_loss did not improve from 0.07619

Epoch 00006: val_loss did not improve from 0.07619

Epoch 00007: val_loss did not improve from 0.07619

Epoch 00008: val_loss did not improve from 0.07619

Epoch 00009: val_loss did not improve from 0.07619

Epoch 00010: val_loss did not improve from 0.07619





Epoch 00001: val_loss improved from inf to 0.09037, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09037 to 0.07728, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07728

Epoch 00004: val_loss did not improve from 0.07728

Epoch 00005: val_loss did not improve from 0.07728

Epoch 00006: val_loss did not improve from 0.07728

Epoch 00007: val_loss did not improve from 0.07728

Epoch 00008: val_loss did not improve from 0.07728

Epoch 00009: val_loss did not improve from 0.07728

Epoch 00010: val_loss did not improve from 0.07728





Epoch 00001: val_loss improved from inf to 0.10668, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10668

Epoch 00003: val_loss did not improve from 0.10668

Epoch 00004: val_loss did not improve from 0.10668

Epoch 00005: val_loss did not improve from 0.10668

Epoch 00006: val_loss did not improve from 0.10668

Epoch 00007: val_loss did not improve from 0.10668

Epoch 00008: val_loss did not improve from 0.10668

Epoch 00009: val_loss did not improve from 0.10668

Epoch 00010: val_loss did not improve from 0.10668





Epoch 00001: val_loss improved from inf to 0.05447, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05447 to 0.05057, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.05057

Epoch 00004: val_loss did not improve from 0.05057

Epoch 00005: val_loss did not improve from 0.05057

Epoch 00006: val_loss did not improve from 0.05057

Epoch 00007: val_loss did not improve from 0.05057

Epoch 00008: val_loss did not improve from 0.05057

Epoch 00009: val_loss did not improve from 0.05057

Epoch 00010: val_loss did not improve from 0.05057

Epoch 00001: val_loss improved from inf to 0.05304, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05304

Epoch 00003: val_loss did not improve from 0.05304

Epoch 00004: val_loss did not improve from 0.05304

Epoch 00005: val_loss did not improve from 0.05304

Epoch 00006: val_loss did not improve from 0.05304

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.05965, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05965 to 0.02985, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.02985

Epoch 00004: val_loss did not improve from 0.02985

Epoch 00005: val_loss did not improve from 0.02985

Epoch 00006: val_loss did not improve from 0.02985

Epoch 00007: val_loss did not improve from 0.02985

Epoch 00008: val_loss did not improve from 0.02985

Epoch 00009: val_loss did not improve from 0.02985

Epoch 00010: val_loss did not improve from 0.02985

Epoch 00001: val_loss improved from inf to 0.04543, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04543

Epoch 00003: val_loss did not improve from 0.04543

Epoch 00004: val_loss did not improve from 0.04543

Epoch 00005: val_loss did not improve from 0.04543

Epoch 00006: val_loss did not improve from 0.04543

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.10559, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10559 to 0.09334, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.09334

Epoch 00004: val_loss did not improve from 0.09334

Epoch 00005: val_loss did not improve from 0.09334

Epoch 00006: val_loss did not improve from 0.09334

Epoch 00007: val_loss did not improve from 0.09334

Epoch 00008: val_loss improved from 0.09334 to 0.09312, saving model to best-model-conll.hdfs

Epoch 00009: val_loss did not improve from 0.09312

Epoch 00010: val_loss did not improve from 0.09312





Epoch 00001: val_loss improved from inf to 0.06828, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06828

Epoch 00003: val_loss did not improve from 0.06828

Epoch 00004: val_loss did not improve from 0.06828

Epoch 00005: val_loss did not improve from 0.06828

Epoch 00006: val_loss did not improve from 0.06828

Epoch 00007: val_loss did not improve from 0.06828

Epoch 00008: val_loss did not improve from 0.06828

Epoch 00009: val_loss did not improve from 0.06828

Epoch 00010: val_loss did not improve from 0.06828





Epoch 00001: val_loss improved from inf to 0.13733, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.13733

Epoch 00003: val_loss did not improve from 0.13733

Epoch 00004: val_loss did not improve from 0.13733

Epoch 00005: val_loss did not improve from 0.13733

Epoch 00006: val_loss did not improve from 0.13733

Epoch 00007: val_loss did not improve from 0.13733

Epoch 00008: val_loss did not improve from 0.13733

Epoch 00009: val_loss did not improve from 0.13733

Epoch 00010: val_loss did not improve from 0.13733





Epoch 00001: val_loss improved from inf to 0.05391, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05391

Epoch 00003: val_loss did not improve from 0.05391

Epoch 00004: val_loss did not improve from 0.05391

Epoch 00005: val_loss did not improve from 0.05391

Epoch 00006: val_loss did not improve from 0.05391

Epoch 00007: val_loss did not improve from 0.05391

Epoch 00008: val_loss did not improve from 0.05391

Epoch 00009: val_loss did not improve from 0.05391

Epoch 00010: val_loss did not improve from 0.05391





Epoch 00001: val_loss improved from inf to 0.06785, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.06785 to 0.06337, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06337

Epoch 00004: val_loss did not improve from 0.06337

Epoch 00005: val_loss did not improve from 0.06337

Epoch 00006: val_loss did not improve from 0.06337

Epoch 00007: val_loss did not improve from 0.06337

Epoch 00008: val_loss did not improve from 0.06337

Epoch 00009: val_loss did not improve from 0.06337

Epoch 00010: val_loss did not improve from 0.06337





Epoch 00001: val_loss improved from inf to 0.03067, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03067

Epoch 00003: val_loss did not improve from 0.03067

Epoch 00004: val_loss did not improve from 0.03067

Epoch 00005: val_loss did not improve from 0.03067

Epoch 00006: val_loss did not improve from 0.03067

Epoch 00007: val_loss did not improve from 0.03067

Epoch 00008: val_loss did not improve from 0.03067

Epoch 00009: val_loss did not improve from 0.03067

Epoch 00010: val_loss did not improve from 0.03067

Epoch 00001: val_loss improved from inf to 0.04653, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.04653 to 0.04650, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.04650

Epoch 00004: val_loss did not improve from 0.04650

Epoch 00005: val_loss did not improve from 0.04650

Epoch 00006: val_loss did not improve from 0.04650

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.12052, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12052

Epoch 00003: val_loss did not improve from 0.12052

Epoch 00004: val_loss did not improve from 0.12052

Epoch 00005: val_loss did not improve from 0.12052

Epoch 00006: val_loss did not improve from 0.12052

Epoch 00007: val_loss did not improve from 0.12052

Epoch 00008: val_loss did not improve from 0.12052

Epoch 00009: val_loss did not improve from 0.12052

Epoch 00010: val_loss did not improve from 0.12052

Epoch 00001: val_loss improved from inf to 0.10111, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10111

Epoch 00003: val_loss did not improve from 0.10111

Epoch 00004: val_loss did not improve from 0.10111

Epoch 00005: val_loss did not improve from 0.10111

Epoch 00006: val_loss did not improve from 0.10111

Epoch 00007: val_loss did not improve from 0.10111

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.10499, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10499 to 0.08099, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08099

Epoch 00004: val_loss did not improve from 0.08099

Epoch 00005: val_loss did not improve from 0.08099

Epoch 00006: val_loss did not improve from 0.08099

Epoch 00007: val_loss did not improve from 0.08099

Epoch 00008: val_loss did not improve from 0.08099

Epoch 00009: val_loss did not improve from 0.08099

Epoch 00010: val_loss did not improve from 0.08099





Epoch 00001: val_loss improved from inf to 0.09576, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09576

Epoch 00003: val_loss improved from 0.09576 to 0.08727, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.08727

Epoch 00005: val_loss did not improve from 0.08727

Epoch 00006: val_loss did not improve from 0.08727

Epoch 00007: val_loss did not improve from 0.08727

Epoch 00008: val_loss did not improve from 0.08727

Epoch 00009: val_loss did not improve from 0.08727

Epoch 00010: val_loss did not improve from 0.08727





Epoch 00001: val_loss improved from inf to 0.13175, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.13175

Epoch 00003: val_loss did not improve from 0.13175

Epoch 00004: val_loss did not improve from 0.13175

Epoch 00005: val_loss did not improve from 0.13175

Epoch 00006: val_loss did not improve from 0.13175

Epoch 00007: val_loss did not improve from 0.13175

Epoch 00008: val_loss did not improve from 0.13175

Epoch 00009: val_loss did not improve from 0.13175

Epoch 00010: val_loss did not improve from 0.13175





Epoch 00001: val_loss improved from inf to 0.04905, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04905

Epoch 00003: val_loss did not improve from 0.04905

Epoch 00004: val_loss did not improve from 0.04905

Epoch 00005: val_loss did not improve from 0.04905

Epoch 00006: val_loss did not improve from 0.04905

Epoch 00007: val_loss did not improve from 0.04905

Epoch 00008: val_loss did not improve from 0.04905

Epoch 00009: val_loss did not improve from 0.04905

Epoch 00010: val_loss did not improve from 0.04905





Epoch 00001: val_loss improved from inf to 0.05744, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05744

Epoch 00003: val_loss did not improve from 0.05744

Epoch 00004: val_loss did not improve from 0.05744

Epoch 00005: val_loss did not improve from 0.05744

Epoch 00006: val_loss did not improve from 0.05744

Epoch 00007: val_loss did not improve from 0.05744

Epoch 00008: val_loss did not improve from 0.05744

Epoch 00009: val_loss did not improve from 0.05744

Epoch 00010: val_loss did not improve from 0.05744

Epoch 00001: val_loss improved from inf to 0.04646, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.04646 to 0.04440, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.04440

Epoch 00004: val_loss did not improve from 0.04440

Epoch 00005: val_loss improved from 0.04440 to 0.03430, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.03430

Epo




Epoch 00001: val_loss improved from inf to 0.12814, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12814

Epoch 00003: val_loss did not improve from 0.12814

Epoch 00004: val_loss did not improve from 0.12814

Epoch 00005: val_loss did not improve from 0.12814

Epoch 00006: val_loss did not improve from 0.12814

Epoch 00007: val_loss did not improve from 0.12814

Epoch 00008: val_loss did not improve from 0.12814

Epoch 00009: val_loss did not improve from 0.12814

Epoch 00010: val_loss did not improve from 0.12814

Epoch 00001: val_loss improved from inf to 0.10030, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10030 to 0.09945, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.09945

Epoch 00004: val_loss did not improve from 0.09945

Epoch 00005: val_loss did not improve from 0.09945

Epoch 00006: val_loss did not improve from 0.09945

Epoch 00007: val_loss did not improve from 0.0



AlgoCrossValIter - 10
Model: "sequential_40"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.55145, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.55145 to 0.52563, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.52563 to 0.46404, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.46404

Epoch 00005: val_loss




Epoch 00001: val_loss improved from inf to 0.06947, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06947

Epoch 00003: val_loss did not improve from 0.06947

Epoch 00004: val_loss did not improve from 0.06947

Epoch 00005: val_loss did not improve from 0.06947

Epoch 00006: val_loss did not improve from 0.06947

Epoch 00007: val_loss did not improve from 0.06947

Epoch 00008: val_loss did not improve from 0.06947

Epoch 00009: val_loss did not improve from 0.06947

Epoch 00010: val_loss did not improve from 0.06947





Epoch 00001: val_loss improved from inf to 0.07766, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07766

Epoch 00003: val_loss did not improve from 0.07766

Epoch 00004: val_loss did not improve from 0.07766

Epoch 00005: val_loss did not improve from 0.07766

Epoch 00006: val_loss did not improve from 0.07766

Epoch 00007: val_loss did not improve from 0.07766

Epoch 00008: val_loss did not improve from 0.07766

Epoch 00009: val_loss did not improve from 0.07766

Epoch 00010: val_loss did not improve from 0.07766





Epoch 00001: val_loss improved from inf to 0.11629, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11629

Epoch 00003: val_loss did not improve from 0.11629

Epoch 00004: val_loss did not improve from 0.11629

Epoch 00005: val_loss did not improve from 0.11629

Epoch 00006: val_loss did not improve from 0.11629

Epoch 00007: val_loss did not improve from 0.11629

Epoch 00008: val_loss did not improve from 0.11629

Epoch 00009: val_loss did not improve from 0.11629

Epoch 00010: val_loss did not improve from 0.11629





Epoch 00001: val_loss improved from inf to 0.05708, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05708

Epoch 00003: val_loss did not improve from 0.05708

Epoch 00004: val_loss did not improve from 0.05708

Epoch 00005: val_loss did not improve from 0.05708

Epoch 00006: val_loss did not improve from 0.05708

Epoch 00007: val_loss did not improve from 0.05708

Epoch 00008: val_loss did not improve from 0.05708

Epoch 00009: val_loss did not improve from 0.05708

Epoch 00010: val_loss did not improve from 0.05708





Epoch 00001: val_loss improved from inf to 0.05512, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05512

Epoch 00003: val_loss did not improve from 0.05512

Epoch 00004: val_loss did not improve from 0.05512

Epoch 00005: val_loss did not improve from 0.05512

Epoch 00006: val_loss did not improve from 0.05512

Epoch 00007: val_loss did not improve from 0.05512

Epoch 00008: val_loss did not improve from 0.05512

Epoch 00009: val_loss did not improve from 0.05512

Epoch 00010: val_loss did not improve from 0.05512





Epoch 00001: val_loss improved from inf to 0.03393, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03393

Epoch 00003: val_loss did not improve from 0.03393

Epoch 00004: val_loss did not improve from 0.03393

Epoch 00005: val_loss did not improve from 0.03393

Epoch 00006: val_loss did not improve from 0.03393

Epoch 00007: val_loss did not improve from 0.03393

Epoch 00008: val_loss did not improve from 0.03393

Epoch 00009: val_loss improved from 0.03393 to 0.03216, saving model to best-model-conll.hdfs

Epoch 00010: val_loss did not improve from 0.03216





Epoch 00001: val_loss improved from inf to 0.04104, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04104

Epoch 00003: val_loss did not improve from 0.04104

Epoch 00004: val_loss did not improve from 0.04104

Epoch 00005: val_loss did not improve from 0.04104

Epoch 00006: val_loss did not improve from 0.04104

Epoch 00007: val_loss did not improve from 0.04104

Epoch 00008: val_loss did not improve from 0.04104

Epoch 00009: val_loss did not improve from 0.04104

Epoch 00010: val_loss did not improve from 0.04104

Epoch 00001: val_loss improved from inf to 0.13144, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.13144

Epoch 00003: val_loss did not improve from 0.13144

Epoch 00004: val_loss did not improve from 0.13144

Epoch 00005: val_loss did not improve from 0.13144

Epoch 00006: val_loss did not improve from 0.13144

Epoch 00007: val_loss did not improve from 0.13144

Epoch 00008: val_loss did not improve

In [311]:
resultCrossVal.to_csv("results.csv")
resultCrossVal

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
P_test,83.983,86.314,85.726,84.397,81.651,81.777,84.974,84.606,87.226,88.068
P_train,87.141,88.337,87.095,87.909,86.844,86.702,87.013,86.044,89.351,88.108
P_ewo,75.261,78.601,78.183,79.672,75.381,77.582,75.684,74.492,80.787,79.831
R_test,82.775,80.235,81.41,76.419,81.941,83.394,83.219,81.234,79.783,80.909
R_train,85.551,83.57,83.113,84.083,87.043,86.65,85.972,86.951,82.226,83.363
R_ewo,71.018,66.296,65.648,66.759,72.315,71.389,68.982,69.63,65.184,67.407
F1-test,82.038,82.064,80.977,77.433,81.372,81.006,82.372,80.48,80.848,82.035
F1-train,85.272,85.607,84.763,85.8,86.566,86.569,86.224,86.287,85.254,85.413
F1-ewo,71.817,71.536,70.906,72.356,73.322,74.109,71.821,71.406,71.504,72.756


In [312]:
resultCrossVal.mean(axis=1).to_frame()

Unnamed: 0,0
P_test,84.8722
P_train,87.4544
P_ewo,77.5474
R_test,81.1319
R_train,84.8522
R_ewo,68.4628
F1-test,81.0625
F1-train,85.7755
F1-ewo,72.1533


In [313]:
resultCrossVal.std(axis=1).to_frame()

Unnamed: 0,0
P_test,2.098025
P_train,0.963793
P_ewo,2.222803
R_test,2.049399
R_train,1.781201
R_ewo,2.559283
F1-test,1.425172
F1-train,0.617193
F1-ewo,0.985187


In [314]:
trainByTagResult.to_csv("results/train-by-tag.csv")
trainByTagResult

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
F1-O,98.154,98.181,98.091,98.182,98.222,98.221,98.217,98.207,98.156,98.155
P-O,98.118,97.863,97.825,97.919,98.284,98.235,98.174,98.303,97.682,97.829
R-O,98.217,98.51,98.369,98.452,98.175,98.214,98.273,98.12,98.651,98.494
F1-MISC,78.788,76.051,75.208,74.99,79.866,79.339,77.192,77.565,77.207,77.628889
P-MISC,92.071,95.278,89.167,95.139,93.056,90.556,94.571,92.412,92.511,85.417
R-MISC,70.769,64.304,69.031,63.193,70.769,71.769,66.416,68.436,67.547,59.88
F1-PER,88.301,88.69,88.206,89.271,89.723,89.65,89.573,89.71,88.427,89.566
P-PER,87.691,89.124,88.131,88.78,87.951,88.041,87.579,87.209,89.904,88.466
R-PER,90.658,89.006,88.775,89.878,92.075,91.53,91.912,92.524,87.756,90.896
F1-LOC,75.785,77.576,74.936,75.775,77.387,77.735,76.186,76.953,75.426,78.441111


In [315]:
trainByTagResult.mean(axis=1).to_frame()

Unnamed: 0,0
F1-O,98.1786
P-O,98.0232
R-O,98.3475
F1-MISC,77.383489
P-MISC,92.0178
R-MISC,67.2114
F1-PER,89.1117
P-PER,88.2876
R-PER,90.501
F1-LOC,76.620011


In [316]:
trainByTagResult.std(axis=1).to_frame()

Unnamed: 0,0
F1-O,0.041267
P-O,0.224308
R-O,0.174253
F1-MISC,1.645114
P-MISC,3.022204
R-MISC,3.800447
F1-PER,0.63159
P-PER,0.803233
R-PER,1.596444
F1-LOC,1.15633


In [317]:
testByTagResult.to_csv("results/test-by-tag.csv")
testByTagResult

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
F1-O,97.725,97.859,97.672,97.305,97.851,97.602,97.792,97.61,97.748,97.871
P-O,97.651,97.401,97.436,96.708,97.736,97.656,97.655,97.431,97.203,97.316
R-O,97.855,98.365,97.993,97.995,97.988,97.611,97.991,97.867,98.376,98.505
F1-MISC,68.33375,66.667143,68.33375,63.81,63.81,68.33375,63.81,63.81,63.81,68.33375
P-MISC,56.667,50.0,56.667,46.667,46.667,56.667,46.667,46.667,46.667,56.667
R-MISC,55.0,45.0,55.0,45.0,45.0,55.0,45.0,45.0,45.0,55.0
F1-PER,87.184,85.395,87.124,83.55,86.384,85.619,88.442,86.024,86.374,86.072
P-PER,87.259,86.89,88.954,87.603,84.708,83.954,87.825,88.065,88.954,88.714
R-PER,88.342,85.644,87.548,82.746,88.659,88.659,90.365,86.437,86.437,85.603
F1-LOC,49.63,54.815556,44.075556,49.58375,55.185556,49.63,51.482222,49.731111,55.185556,56.173333


In [318]:
testByTagResult.mean(axis=1).to_frame()

Unnamed: 0,0
F1-O,97.7035
P-O,97.4193
R-O,98.0546
F1-MISC,65.905214
P-MISC,51.0003
R-MISC,49.0
F1-PER,86.2168
P-PER,87.2926
R-PER,87.044
F1-LOC,51.549264


In [319]:
testByTagResult.std(axis=1).to_frame()

Unnamed: 0,0
F1-O,0.17098
P-O,0.30363
R-O,0.276917
F1-MISC,2.263755
P-MISC,4.981454
R-MISC,5.163978
F1-PER,1.289503
P-PER,1.716162
R-PER,2.145551
F1-LOC,3.784365


In [320]:
ewoByTagResult.to_csv("results/ewo-by-tag.csv")

In [321]:
ewoByTagResult = pd.read_csv("results/ewo-by-tag.csv", index_col=0)
ewoByTagResult

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
F1-O,96.964,97.085,97.091,97.207,97.133,97.209,97.069,96.931,97.177,97.228
P-O,96.787,96.262,96.244,96.319,96.984,96.79,96.613,96.661,96.164,96.382
R-O,97.17,97.934,97.968,98.119,97.301,97.638,97.542,97.222,98.229,98.1
F1-MISC,54.962,44.672,48.09,48.963333,54.337,54.311,48.283,54.123,49.95,47.904444
P-MISC,66.569,66.67,53.891,60.479,67.621,65.914,66.392,68.176,66.035,60.003
R-MISC,50.0,35.0,40.0,36.25,47.5,47.5,40.0,47.5,42.5,35.0
F1-PER,76.721,77.371,77.546,78.554,78.98,79.702,77.941,77.106,77.695,79.49
P-PER,75.977,79.47,79.47,81.036,76.863,79.103,77.467,76.261,81.903,80.557
R-PER,79.875,76.375,76.375,76.5,82.0,80.75,78.875,78.625,75.0,78.75
F1-LOC,56.972,54.187,49.397,54.305,54.962,57.919,52.717,53.245,50.198,55.371111


In [322]:
ewoByTagResult.mean(axis=1).to_frame()

Unnamed: 0,0
F1-O,97.1094
P-O,96.5206
R-O,97.7223
F1-MISC,50.559578
P-MISC,64.175
R-MISC,42.125
F1-PER,78.1106
P-PER,78.8107
R-PER,78.3125
F1-LOC,53.927311


In [323]:
ewoByTagResult.std(axis=1).to_frame()

Unnamed: 0,0
F1-O,0.1022917
P-O,0.2822395
R-O,0.3992218
F1-MISC,3.595304
P-MISC,4.571222
R-MISC,5.714565
F1-PER,1.021722
P-PER,2.072265
R-PER,2.219899
F1-LOC,2.685989


In [324]:
columns = en_fingerprints.columns

print("Pred", "Real", "Freq", "Word", sep="\t")
for c in columns:
    prediction = model.predict(en_fingerprints[c].values.reshape((1, 210)))
    pred_tag = int2tag[np.argmax(prediction)]
    real_tag = en_corpus[en_corpus.word == c].iloc[0]['ne-tag']
    
    if pred_tag != real_tag:
        print(pred_tag, real_tag, en_fingerprints[c].max(), c, sep="\t")

Pred	Real	Freq	Word
O	LOC	4170.0	Samaria
O	LOC	4170.0	Olivet
O	MISC	4170.0	Sabbath
PER	O	4170.0	upper
PER	O	4170.0	room
PER	O	4170.0	where
PER	O	4170.0	Zealot
LOC	O	4170.0	so
LOC	O	4170.0	language
O	MISC	4170.0	Psalms
PER	O	4170.0	forward
MISC	O	4170.0	going
O	PER	4170.0	Moses
PER	O	4170.0	proclaimed
ORG	O	4170.0	captain
PER	O	4170.0	high-priestly
PER	O	4170.0	family
O	PER	4170.0	Pontius
O	PER	4170.0	Barnabas
O	LOC	4170.0	Cyprus
O	PER	4170.0	Elijah
O	MISC	4170.0	r
PER	O	4170.0	Ju
PER	O	4170.0	h
LOC	O	4170.0	deportation
PER	O	4170.0	us)
