In [88]:
# import
import keras
import sys
import numpy as np
import string
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense
from keras.utils import np_utils, plot_model
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn import model_selection
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, precision_recall_fscore_support
import h5py as h5py

In [89]:
# if we are doeing binary classification. That means say if a token is a named entity or not
BINARY = False

# number of epochs for training
epochs = 10 

# the english side of the corpus
en_corpus_file = "corpus-en.txt"

# the ewondo side of the corpus
ewo_corpus_file = "corpus-ewo.txt"

# name of the file to same the model 
best_model_file = "best-model-conll.hdfs"

# The maximal number of phrases to use
max_nb_of_phrases =  -1

# the maximal number of duplicates for each word in the corpus
duplication = 1

# wether we are using only the vocabulary, ro redundancy
is_only_vocab = True

# if word should be shuffle or not
shuffle = is_only_vocab

# the number of neurons in the first layer
h1_size = 640

# number of neurons in the second layer
h2_size = 160  

In [90]:
def getTag(aString):
    """
        convert a string to a tag
    """
    tag = "O"
    if BINARY:
        if aString != "O":
            return "NE"
    else:
        tag = aString
    return tag
     

In [91]:
def load_corpus(file, max_nb_of_phrases):
    """
    Load a corpus stored in a file
    Input:
        - file: the name of the file of the corpus
        - max_nb_of_phases: maximal number of phrases to load
    
    Return:
        - a DataFrame representing the corpus
        - the number of phrases in the corpus
    """
    nb_of_phrases = 0
    dataset = {"word": [], "ne-tag": []}
    with open(file) as f:
        prev_line = None
        for cpt, line in enumerate(f):
            if cpt == 0:
                continue
            if nb_of_phrases == max_nb_of_phrases:
                break;

            l = line.strip()
            if len(l) == 0 and len(prev_line) != 0:
                nb_of_phrases += 1
                dataset["word"].append(line)
                dataset["ne-tag"].append(None)
            else:
                l = l.split("\t")
                if l[0] not in string.punctuation:
                    dataset["word"].append(l[0])
                    dataset["ne-tag"].append(ne_type(l[1]))
            prev_line = line.strip()
        
    return pd.DataFrame(dataset), nb_of_phrases+1

In [92]:
def corpus_fingerprint(aDataframe, nb_of_biphrases):
    """
    Create the distributionnal signature of each word in the corpus
    Input:
        -aDataFrame: the corpus DataFrame
        -nb_of_biphrases: number of phrases in the corpus
    Return:
        a DataFrame: corpus fingerprint, the columns are the words in the corpus
    """
    fingerprints = {}
    current_bi_phrase_index = 0
    nb_word_in_corpus = aDataframe[aDataframe.word != "\n"].word.size
    words_in_current_phrase = []
    for index, row in aDataframe.iterrows():
        if current_bi_phrase_index > nb_of_biphrases:
            break
            
        word = row['word']
        
        if word != "\n":
            words_in_current_phrase.append(word)
            if word not in fingerprints:
                fingerprints[word] = np.zeros(nb_of_biphrases, dtype=np.float32)
            fingerprints[word][current_bi_phrase_index] += 1
        else:
            nb_word_in_current_phrase = len(words_in_current_phrase)
#             for w in words_in_current_phrase:
#                 fingerprints[w][current_bi_phrase_index] = nb_word_in_corpus / fingerprints[w][current_bi_phrase_index]                
            current_bi_phrase_index += 1
            words_in_current_phrase = []
    for word in fingerprints:
        for i in range(nb_of_biphrases):
            if fingerprints[word][i] != 0:
                fingerprints[word][i] = nb_word_in_corpus / fingerprints[word][i]
#         fingerprints[word][nb_of_biphrases] = nb_word_in_corpus / aDataframe[aDataframe.word == word].word.size
        
    return pd.DataFrame(fingerprints)

In [93]:
def corpus2trainingdata(aDataframe, fingerprintsDataFrame):
    """
    Convert corpus to training data => numpy array
    
    Input:
        -aDataFrame: Corpus dataframe
        -fingerprintsDataFrame: distributionnal signature of words in the corpus
    Return:
        (X, y): X is the array of words (signature) in the corpus and y is the corresponding labels (NE tags)
    """
    X = np.zeros((aDataframe.shape[0], fingerprintsDataFrame.shape[0]), dtype=np.int8)
    y = np.zeros(aDataframe.shape[0], dtype=np.int8)
    i = 0
    for row in aDataframe.iterrows():
        X[i] = fingerprintsDataFrame[row[1]['word']].values
        y[i] = tag2int[getTag(row[1]['ne-tag'])]
        i += 1
    return X, y

In [94]:
def train_test_split(X, y, test_size = 0.33):
    total = X.shape[0]
    train_length = round(total * (1 - test_size)) 
    return X[:train_length], X[train_length:], y[:train_length], y[train_length:]

In [95]:
# A utility function to convert NE tags
def ne_type(aType):
    aType = aType.lower()
    if 'per' in aType:
        t =  'NE' if BINARY else 'PER' 
    elif 'loc' in aType:
        t =  'NE' if BINARY else 'LOC'
    elif 'org' in aType:
        t =  'NE' if BINARY else 'ORG'
    elif 'hour' in aType:
        t =  'NE' if BINARY else 'MISC'
    elif aType != 'o' and len(aType) > 0 :
        t =  'NE' if BINARY else 'MISC'
    else:
        t = 'O'
    return t

In [96]:
def compute_performance(y_true, y_pred, words=None, BINARY=False):
    """
    Return the precision, recall, f1-score, accuracy and a dataframe comparing model predictions to ground truth
    """
    if BINARY:
        p = precision_score(y_true, y_pred, pos_label=tag2int['NE'])
        r = recall_score(y_true, y_pred, pos_label=tag2int['NE'])
        f1 = f1_score(y_true, y_pred, pos_label=tag2int['NE'])
        acc = accuracy_score(y_true, y_pred)
    else:
        p = precision_score(y_pred, y_true, average='macro')
        r = recall_score(y_pred, y_true, average='macro')
        f1 = f1_score(y_pred, y_true, average='macro')
        acc = accuracy_score(y_pred, y_true)
    if words is None:
        model_output_vs = pd.DataFrame({'y_true': [int2tag[i] for i in y_true], 'y_pred': [int2tag[i] for i in y_pred]})
    else:
        model_output_vs = pd.DataFrame({'word': words, 'y_true': [int2tag[i] for i in y_true], 'y_pred': [int2tag[i] for i in y_pred]})

    return p, r, f1, acc, model_output_vs

In [97]:
def P_R_F1(y_pred, y_true, neg_class):
    same = y_pred[y_true==y_pred]
    tp = same[same != neg_class].size
    nb_of_pos_exple = y_true[y_true != neg_class].size
    nb_of_pos_pred = y_pred[y_pred != neg_class].size
    p = r = f1 = 0
    try:
        p = np.round(tp*100/nb_of_pos_pred, 2)
    except ZeroDivisionError:
        print("number of correct positive predictions is 0")
        
    try:
        r = np.round(tp*100/nb_of_pos_exple, 2)
    except ZeroDivisionError:
        print("number of position exple is 0")
        
    try:
        f1 = np.round(2*r*p/(r+p), 2)
    except ZeroDivisionError:
        print("Recall and precision are 0")

    return p, r, f1

In [98]:
def shuffle_data(X, y):
    indices = [i for i in  range(X.shape[0])]
    np.random.shuffle(indices)
    return X[indices], y[indices]

In [99]:
def create_model(input_dim, output_dim):
    model = Sequential()
    model.add(Dense(h1_size, input_dim=input_dim, activation='sigmoid', name="hidden1"))
    model.add(Dense(h2_size, activation='sigmoid', name="hidden2"))
    if BINARY:
        model.add(Dense(1, activation='sigmoid', name="outputlayer"))
        model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['binary_accuracy'])
    else:
        model.add(Dense(output_dim, activation='softmax', name="outputlayer"))
        model.compile(loss='categorical_crossentropy', optimizer="rmsprop", metrics=['accuracy'])
    model.summary()
    return model

In [100]:
def train_model(model, X_train, y_train, X_val, y_val, epochs=epochs):
    # stop learning if the error is the same between two consecutive epochs
    early_stop = EarlyStopping(patience=20, verbose=2)
    
    # saving best model
    best_model_cp = ModelCheckpoint(best_model_file, save_best_only=True, verbose=1)
    
    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, verbose=0, shuffle=shuffle, callbacks=[best_model_cp, early_stop])
    
    #loading and returning the best model
    return keras.models.load_model(best_model_file)

In [101]:
def predict(model, X, y, binary=BINARY):
    if BINARY:
        y_pred = np.round(model.predict(X))
        y_true = y
    else:
        predictions = model.predict(X)
        y_pred = np.array([np.argmax(p) for p in predictions])
        y_true = np.array([np.argmax(t) for t in y ])
    return y_true, y_pred

In [102]:
def model_performance(y_true, y_pred):
    return P_R_F1(y_pred, y_true, tag2int['O']) #precision, recall, f1-score

In [103]:
def model_performace_by_tag(y_true, y_pred, tag):
    p, r, f1 = 0, 0, 0
    
    eq = y_pred[y_pred==y_true]
    correctly_pred = eq[eq==tag].size
    try:
        p = np.round(100 * correctly_pred / y_pred[y_pred==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        r = np.round(100 * correctly_pred / y_true[y_true==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        f1 = np.round(2 * r * p / (r + p), 2)
    except ZeroDivisionError:
        pass
    
    return p, r, f1

In [104]:
def algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, epochs=epochs, model=None):
    """
    Train a model on (X, y) and validate on (X_val, y_val) then project on (X_ewo)
    """
    test_precision, train_precision, ewo_precision = [], [], []
    test_recall, train_recall, ewo_recall = [], [], []
    test_fscore, train_fscore, ewo_fscore = [], [], []
    
    test_result_by_tag = {}
    train_result_by_tag = {}
    ewo_result_by_tag = {}
    for t in tagSet:
        f1_key = "F1-"+t
        p_key = "P-"+t
        r_key = "R-"+t
        train_result_by_tag[f1_key], train_result_by_tag[p_key], train_result_by_tag[r_key] = [], [], []
        test_result_by_tag[f1_key], test_result_by_tag[p_key], test_result_by_tag[r_key] = [], [], []
        ewo_result_by_tag[f1_key], ewo_result_by_tag[p_key], ewo_result_by_tag[r_key] = [], [], []

    m = train_model(model, X_train, y_train, X_val, y_val, epochs=epochs)
        
    y_true, y_pred = predict(m, X_train, y_train)
    p_train, r_train, f1_train = model_performance(y_true, y_pred)
        
    y_true_val, y_pred_val = predict(m, X_val, y_val)
    p_val, r_val, f1_val = model_performance(y_true_val, y_pred_val)
        
    y_true_ewo, y_pred_ewo = predict(m, X_ewo, y_ewo) 
    p_ewo, r_ewo, f1_ewo = model_performance(y_true_ewo, y_pred_ewo)
        
    for t in range(len(int2tag)):
        f1_key = "F1-" + int2tag[t]
        p_key = "P-" + int2tag[t]
        r_key = "R-" + int2tag[t]
            
        p, r, f1 = model_performace_by_tag(y_true, y_pred, t)
        train_result_by_tag[p_key].append(p)
        train_result_by_tag[r_key].append(r)
        train_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_val, y_pred_val, t)
        test_result_by_tag[p_key].append(p)
        test_result_by_tag[r_key].append(r)
        test_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_ewo, y_pred_ewo, t)
        ewo_result_by_tag[p_key].append(p)
        ewo_result_by_tag[r_key].append(r)
        ewo_result_by_tag[f1_key].append(f1)
                
    test_precision.append(p_val)
    train_precision.append(p_train)
    ewo_precision.append(p_ewo)
        
    test_recall.append(r_val)
    train_recall.append(r_train)
    ewo_recall.append(r_ewo)
        
    test_fscore.append(f1_val)
    train_fscore.append(f1_train)
    ewo_fscore.append(f1_ewo)
    return pd.DataFrame({
        'P_test': test_precision, 
        'P_train': train_precision, 
        'P_ewo': ewo_precision, 'R_test': test_recall, 'R_train': train_recall, 
        'R_ewo': ewo_recall, 'F1-test': test_fscore, 'F1-train': train_fscore, 'F1-ewo': ewo_fscore}), pd.DataFrame(train_result_by_tag), pd.DataFrame(test_result_by_tag), pd.DataFrame(ewo_result_by_tag)

In [105]:
def algoCrossVal(X, y, X_ewo, y_ewo, k = 10, repeat=1): 
    """
    Traing a model with k-fold cross validation
    """
    block_size = int(X.shape[0] / k)   
    output = None
    model = None
    train_by_tags, test_by_tags, ewo_by_tags = None, None, None
    for it in range(repeat):
        print("AlgoCrossValIter -", it+1)
        model = create_model(X.shape[1], len(tagSet))
        results = None
        train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = None, None, None
        for i in range(k):
            X_val, y_val = X[i*block_size:i*block_size+block_size], y[i*block_size:i*block_size+block_size]
            X_train = np.concatenate((X[0:i*block_size], X[i*block_size+block_size:]))
            y_train = np.concatenate((y[0:i*block_size], y[i*block_size+block_size:]))

            X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
            X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])

            result, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)
            if results is None:
                results = result.copy()
                train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = train_by_tag.copy(), test_by_tag.copy(), ewo_by_tag.copy()
            else:
                results = pd.concat([results, result], ignore_index=True)
                train_by_tagsTmp = pd.concat([train_by_tagsTmp, train_by_tag], ignore_index=True)
                test_by_tagsTmp = pd.concat([test_by_tagsTmp, test_by_tag], ignore_index=True)
                ewo_by_tagsTmp = pd.concat([ewo_by_tagsTmp, ewo_by_tag], ignore_index=True)
        
        if output is None:
            output = results.mean(axis=0).to_frame()
            train_by_tags = train_by_tagsTmp.mean(axis=0).to_frame()
            test_by_tags = test_by_tagsTmp.mean(axis=0).to_frame()
            ewo_by_tags = ewo_by_tagsTmp.mean(axis=0).to_frame()
        else:
            output = pd.concat([output, results.mean(axis=0).to_frame()], axis=1)
            train_by_tags = pd.concat([train_by_tags, train_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            test_by_tags = pd.concat([test_by_tags, test_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            ewo_by_tags = pd.concat([ewo_by_tags, ewo_by_tagsTmp.mean(axis=0).to_frame()], axis=1)

    return output, train_by_tags, test_by_tags, ewo_by_tags, model

In [106]:
en_corpus, en_nb_of_phrases = load_corpus(en_corpus_file, max_nb_of_phrases)

In [107]:
en_corpus.head()
en_corpus.loc[en_corpus['ne-tag'] == 'ORG']

Unnamed: 0,word,ne-tag
1335,Sadducees,ORG


In [108]:
tagSet = en_corpus["ne-tag"].dropna().unique()
if BINARY:
    tagSet = ['NE', 'O']
tag2int = {j: i for i, j in enumerate(tagSet)}
int2tag = {i: j for i, j in enumerate(tagSet)}
print(tag2int)

{'O': 0, 'MISC': 1, 'PER': 2, 'LOC': 3, 'ORG': 4}


In [109]:
en_nb_of_phrases

210

In [110]:
en_corpus.describe()

Unnamed: 0,word,ne-tag
count,4379,4170
unique,904,5
top,the,O
freq,313,3779


In [111]:
en_corpus.head(10)

Unnamed: 0,word,ne-tag
0,The,O
1,Promise,O
2,of,O
3,the,O
4,Holy,MISC
5,Spirit,MISC
6,\n,
7,In,O
8,the,O
9,first,O


In [112]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(en_corpus[en_corpus['ne-tag']==tag].shape[0] * 100 / en_corpus[en_corpus['ne-tag']!='\n'].shape[0], 2)))

O % = 86.3 %
MISC % = 2.4 %
PER % = 5.59 %
LOC % = 0.91 %
ORG % = 0.02 %


In [113]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(en_corpus[en_corpus['ne-tag']==tag].word.unique().shape[0] * 100 / en_corpus[en_corpus['ne-tag']!='\n'].word.unique().shape[0], 2)))

O % = 89.16 %
MISC % = 1.88 %
PER % = 8.96 %
LOC % = 1.99 %
ORG % = 0.11 %


In [114]:
en_corpus[en_corpus.word == "\n"].shape

(209, 2)

In [115]:
print("Nb of bi-phrases", en_nb_of_phrases)
en_fingerprints = corpus_fingerprint(en_corpus, en_nb_of_phrases)

Nb of bi-phrases 210


In [116]:
en_fingerprints.head(5)

Unnamed: 0,The,Promise,of,the,Holy,Spirit,In,first,book,O,...,considered,dream,She,save,fulfill,Immanuel,us),woke,sleep,knew
0,4170.0,4170.0,4170.0,4170.0,4170.0,4170.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,4170.0,0.0,0.0,4170.0,4170.0,4170.0,4170.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,1390.0,4170.0,4170.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,4170.0,4170.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,4170.0,2085.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [117]:
en_fingerprints['you'].values.shape

(210,)

In [118]:
en_corpus[en_corpus.word != "\n"].shape

(4170, 2)

In [119]:
if is_only_vocab:
    text = list(en_corpus[en_corpus.word != "\n"].word.unique())
else:
    text = list(en_corpus[en_corpus.word != "\n"].word)
en_vocab = pd.DataFrame({'text': text})
en_vocab.describe()

Unnamed: 0,text
count,903
unique,903
top,rather
freq,1


In [120]:
if is_only_vocab:
    X = np.zeros((en_vocab.shape[0] * duplication, en_nb_of_phrases))
    target = np.zeros((en_vocab.shape[0] * duplication))
    p=0
    for i, row in en_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X[p] = en_fingerprints[c.split(" ")[0]]
            target[p] = tag2int[getTag(en_corpus[en_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X, target = shuffle_data(X, target)
    print(X.shape, en_fingerprints.shape, target.shape)

(903, 210) (210, 903) (903,)


In [121]:
en_vocab[-20:]

Unnamed: 0,text
883,Eliud
884,Eleazar
885,Matthan
886,husband
887,fourteen
888,unwilling
889,shame
890,resolved
891,divorce
892,quietly


In [122]:
if not is_only_vocab:
    X, target = corpus2trainingdata(en_corpus[en_corpus.word != "\n"], en_fingerprints)

In [123]:
if shuffle:
    X, target = shuffle_data(X, target)

In [124]:
y = target.copy()
y[0:100]
if not BINARY:
    y = np_utils.to_categorical(y, len(tagSet))
y.shape

(903, 5)

In [125]:
from sklearn.decomposition import PCA

def visualize(X, y):
    pca = PCA(n_components=2)
    X_embeded = pca.fit_transform(X)
    plt.figure(figsize=(5, 5))
    plt.scatter(X_embeded[:, 0], X_embeded[:, 1], c=y)
    plt.legend()
    plt.show()

In [126]:
# visualize(X, target)

In [127]:
X_train, X_val, y_train, y_val = model_selection.train_test_split(X, y, test_size=0.33)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])
print("X_train.shape =", X_train.shape)
print("y_train.shape =", y_train.shape)
print("X_val.shape =", X_val.shape)
print("y_val.shape =", y_val.shape)

tTarget = np.array([np.argmax(yy) for yy in y_train])
vTarget = np.array([np.argmax(yy) for yy in y_val])

for tag in tagSet:
    print("{0} % in training data = {1} %".format(tag, np.round(tTarget[tTarget==tag2int[tag]].size * 100 / tTarget.shape[0], 2)))
    print("{0} % in validation data = {1} %".format(tag, np.round(vTarget[vTarget==tag2int[tag]].size * 100 / vTarget.shape[0], 2)))

X_train.shape = (605, 210)
y_train.shape = (605, 5)
X_val.shape = (298, 210)
y_val.shape = (298, 5)
O % in training data = 87.93 %
O % in validation data = 88.93 %
MISC % in training data = 1.16 %
MISC % in validation data = 1.34 %
PER % in training data = 8.6 %
PER % in validation data = 8.39 %
LOC % in training data = 2.31 %
LOC % in validation data = 1.01 %
ORG % in training data = 0.0 %
ORG % in validation data = 0.34 %


In [128]:
ewo_corpus, ewo_nb_of_phrases = load_corpus(ewo_corpus_file, max_nb_of_phrases)

In [129]:
ewo_corpus.loc[ewo_corpus['ne-tag'] == 'PER']

Unnamed: 0,word,ne-tag
6,Teofil,PER
15,Yesus,PER
86,Yohannes,PER
104,Yesus,PER
230,Yesus,PER
...,...,...
3676,Maria,PER
3697,Yesus,PER
3740,Emmanuel,PER
3750,Yosef,PER


In [130]:
ewo_nb_of_phrases

210

In [131]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(ewo_corpus[ewo_corpus['ne-tag']==tag].shape[0] * 100 / ewo_corpus[ewo_corpus['ne-tag']!='\n'].shape[0], 2)))

O % = 84.15 %
MISC % = 2.54 %
PER % = 6.69 %
LOC % = 1.03 %
ORG % = 0.05 %


In [132]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(ewo_corpus[ewo_corpus['ne-tag']==tag].word.unique().shape[0] * 100 / ewo_corpus[ewo_corpus['ne-tag']!='\n'].word.unique().shape[0], 2)))

O % = 89.94 %
MISC % = 1.17 %
PER % = 8.3 %
LOC % = 1.86 %
ORG % = 0.2 %


In [133]:
ewo_corpus.describe()

Unnamed: 0,word,ne-tag
count,3779,3570
unique,1024,5
top,\n,O
freq,209,3180


In [134]:
ewo_corpus.head()

Unnamed: 0,word,ne-tag
0,Mfufub,MISC
1,Nsisim,MISC
2,ayi,O
3,sò,O
4,\n,


In [135]:
ewo_fingerprints = corpus_fingerprint(ewo_corpus, en_nb_of_phrases)

In [136]:
if is_only_vocab:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word.unique())
else:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word)
ewo_vocab = pd.DataFrame({"text":text})

In [137]:
if is_only_vocab:
    X_ewo = np.zeros((ewo_vocab.shape[0] * duplication, en_nb_of_phrases))
    ewo_target = np.zeros((ewo_vocab.shape[0] * duplication))
    p=0
    for i, row in ewo_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X_ewo[p] = ewo_fingerprints[c.split(" ")[0]]
            ewo_target[p] = tag2int[getTag(ewo_corpus[ewo_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [138]:
ewo_vocab[-10:]

Unnamed: 0,text
1013,sik
1014,Ntud
1015,bëyole
1016,Emmanuel
1017,Avëbë
1018,angavëbë
1019,oyò
1020,angabende
1021,anganòṅ
1022,angayole


In [139]:
if not is_only_vocab:
    X_ewo, ewo_target = corpus2trainingdata(ewo_corpus[ewo_corpus.word != "\n"], ewo_fingerprints)

In [140]:
if shuffle:
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [141]:
y_ewo = ewo_target.copy()
print(y_ewo.shape, len(ewo_vocab))

(1023,) 1023


In [142]:
X_ewo.shape

(1023, 210)

In [143]:
y_ewo = ewo_target.copy()
y_ewo[:20]
if not BINARY:
    y_ewo = np_utils.to_categorical(y_ewo)

In [144]:
X_ewo = X_ewo.reshape((X_ewo.shape[0], en_nb_of_phrases))

In [145]:
# model = create_model(X.shape[1], len(tagSet))
# resultEval, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)

In [146]:
# resultEval

In [147]:
# train_by_tag

In [148]:
# test_by_tag

In [149]:
# ewo_by_tag

In [150]:
# resultEval.mean()

In [151]:
# resultEval.std()

In [152]:
resultCrossVal, trainByTagResult, testByTagResult, ewoByTagResult, model = algoCrossVal(X, y, X_ewo, y_ewo, repeat=10)

AlgoCrossValIter - 1
Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.35019, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.35019

Epoch 00003: val_loss improved from 0.35019 to 0.33094, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.33094 to 0.32961, saving model to best-model-conll.hdfs

Epoch 00005: val_loss 




Epoch 00001: val_loss improved from inf to 0.14567, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.14567

Epoch 00003: val_loss did not improve from 0.14567

Epoch 00004: val_loss did not improve from 0.14567

Epoch 00005: val_loss did not improve from 0.14567

Epoch 00006: val_loss did not improve from 0.14567

Epoch 00007: val_loss did not improve from 0.14567

Epoch 00008: val_loss did not improve from 0.14567

Epoch 00009: val_loss did not improve from 0.14567

Epoch 00010: val_loss did not improve from 0.14567





Epoch 00001: val_loss improved from inf to 0.07386, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07386

Epoch 00003: val_loss did not improve from 0.07386

Epoch 00004: val_loss did not improve from 0.07386

Epoch 00005: val_loss did not improve from 0.07386

Epoch 00006: val_loss did not improve from 0.07386

Epoch 00007: val_loss did not improve from 0.07386

Epoch 00008: val_loss did not improve from 0.07386

Epoch 00009: val_loss did not improve from 0.07386

Epoch 00010: val_loss did not improve from 0.07386





Epoch 00001: val_loss improved from inf to 0.09657, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09657

Epoch 00003: val_loss did not improve from 0.09657

Epoch 00004: val_loss did not improve from 0.09657

Epoch 00005: val_loss did not improve from 0.09657

Epoch 00006: val_loss did not improve from 0.09657

Epoch 00007: val_loss did not improve from 0.09657

Epoch 00008: val_loss did not improve from 0.09657

Epoch 00009: val_loss did not improve from 0.09657

Epoch 00010: val_loss did not improve from 0.09657





Epoch 00001: val_loss improved from inf to 0.06205, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.06205 to 0.04640, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.04640

Epoch 00004: val_loss did not improve from 0.04640

Epoch 00005: val_loss did not improve from 0.04640

Epoch 00006: val_loss did not improve from 0.04640

Epoch 00007: val_loss did not improve from 0.04640

Epoch 00008: val_loss did not improve from 0.04640

Epoch 00009: val_loss did not improve from 0.04640

Epoch 00010: val_loss did not improve from 0.04640





Epoch 00001: val_loss improved from inf to 0.06382, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06382

Epoch 00003: val_loss did not improve from 0.06382

Epoch 00004: val_loss did not improve from 0.06382

Epoch 00005: val_loss did not improve from 0.06382

Epoch 00006: val_loss did not improve from 0.06382

Epoch 00007: val_loss did not improve from 0.06382

Epoch 00008: val_loss did not improve from 0.06382

Epoch 00009: val_loss did not improve from 0.06382

Epoch 00010: val_loss did not improve from 0.06382





Epoch 00001: val_loss improved from inf to 0.08616, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08616

Epoch 00003: val_loss did not improve from 0.08616

Epoch 00004: val_loss did not improve from 0.08616

Epoch 00005: val_loss did not improve from 0.08616

Epoch 00006: val_loss did not improve from 0.08616

Epoch 00007: val_loss did not improve from 0.08616

Epoch 00008: val_loss did not improve from 0.08616

Epoch 00009: val_loss did not improve from 0.08616

Epoch 00010: val_loss did not improve from 0.08616





Epoch 00001: val_loss improved from inf to 0.11613, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.11613 to 0.08164, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08164

Epoch 00004: val_loss did not improve from 0.08164

Epoch 00005: val_loss improved from 0.08164 to 0.07975, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.07975

Epoch 00007: val_loss did not improve from 0.07975

Epoch 00008: val_loss did not improve from 0.07975

Epoch 00009: val_loss did not improve from 0.07975

Epoch 00010: val_loss did not improve from 0.07975

Epoch 00001: val_loss improved from inf to 0.02001, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02001

Epoch 00003: val_loss did not improve from 0.02001

Epoch 00004: val_loss did not improve from 0.02001

Epoch 00005: val_loss did not improve from 0.02001

Epoch 00006: val_loss did not improve from 0.02001

Epo




Epoch 00001: val_loss improved from inf to 0.06389, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06389

Epoch 00003: val_loss did not improve from 0.06389

Epoch 00004: val_loss did not improve from 0.06389

Epoch 00005: val_loss did not improve from 0.06389

Epoch 00006: val_loss did not improve from 0.06389

Epoch 00007: val_loss did not improve from 0.06389

Epoch 00008: val_loss did not improve from 0.06389

Epoch 00009: val_loss did not improve from 0.06389

Epoch 00010: val_loss did not improve from 0.06389




AlgoCrossValIter - 2
Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.54894, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.54894 to 0.48068, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.48068 to 0.33938, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.33938 to 0.32715, saving model to be




Epoch 00001: val_loss improved from inf to 0.12857, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.12857 to 0.12272, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.12272

Epoch 00004: val_loss did not improve from 0.12272

Epoch 00005: val_loss did not improve from 0.12272

Epoch 00006: val_loss did not improve from 0.12272

Epoch 00007: val_loss did not improve from 0.12272

Epoch 00008: val_loss did not improve from 0.12272

Epoch 00009: val_loss did not improve from 0.12272

Epoch 00010: val_loss did not improve from 0.12272





Epoch 00001: val_loss improved from inf to 0.07356, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07356

Epoch 00003: val_loss did not improve from 0.07356

Epoch 00004: val_loss did not improve from 0.07356

Epoch 00005: val_loss did not improve from 0.07356

Epoch 00006: val_loss did not improve from 0.07356

Epoch 00007: val_loss did not improve from 0.07356

Epoch 00008: val_loss did not improve from 0.07356

Epoch 00009: val_loss did not improve from 0.07356

Epoch 00010: val_loss did not improve from 0.07356





Epoch 00001: val_loss improved from inf to 0.14355, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.14355 to 0.13377, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.13377

Epoch 00004: val_loss did not improve from 0.13377

Epoch 00005: val_loss did not improve from 0.13377

Epoch 00006: val_loss did not improve from 0.13377

Epoch 00007: val_loss did not improve from 0.13377

Epoch 00008: val_loss did not improve from 0.13377

Epoch 00009: val_loss did not improve from 0.13377

Epoch 00010: val_loss did not improve from 0.13377





Epoch 00001: val_loss improved from inf to 0.04110, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04110

Epoch 00003: val_loss did not improve from 0.04110

Epoch 00004: val_loss did not improve from 0.04110

Epoch 00005: val_loss did not improve from 0.04110

Epoch 00006: val_loss did not improve from 0.04110

Epoch 00007: val_loss did not improve from 0.04110

Epoch 00008: val_loss did not improve from 0.04110

Epoch 00009: val_loss did not improve from 0.04110

Epoch 00010: val_loss did not improve from 0.04110





Epoch 00001: val_loss improved from inf to 0.08537, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.08537 to 0.07536, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07536

Epoch 00004: val_loss did not improve from 0.07536

Epoch 00005: val_loss did not improve from 0.07536

Epoch 00006: val_loss did not improve from 0.07536

Epoch 00007: val_loss did not improve from 0.07536

Epoch 00008: val_loss did not improve from 0.07536

Epoch 00009: val_loss did not improve from 0.07536

Epoch 00010: val_loss did not improve from 0.07536





Epoch 00001: val_loss improved from inf to 0.10552, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10552 to 0.10123, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.10123

Epoch 00004: val_loss did not improve from 0.10123

Epoch 00005: val_loss did not improve from 0.10123

Epoch 00006: val_loss did not improve from 0.10123

Epoch 00007: val_loss did not improve from 0.10123

Epoch 00008: val_loss did not improve from 0.10123

Epoch 00009: val_loss did not improve from 0.10123

Epoch 00010: val_loss did not improve from 0.10123





Epoch 00001: val_loss improved from inf to 0.08045, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08045

Epoch 00003: val_loss did not improve from 0.08045

Epoch 00004: val_loss did not improve from 0.08045

Epoch 00005: val_loss did not improve from 0.08045

Epoch 00006: val_loss did not improve from 0.08045

Epoch 00007: val_loss did not improve from 0.08045

Epoch 00008: val_loss did not improve from 0.08045

Epoch 00009: val_loss did not improve from 0.08045

Epoch 00010: val_loss did not improve from 0.08045





Epoch 00001: val_loss improved from inf to 0.01396, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.01396

Epoch 00003: val_loss did not improve from 0.01396

Epoch 00004: val_loss did not improve from 0.01396

Epoch 00005: val_loss did not improve from 0.01396

Epoch 00006: val_loss did not improve from 0.01396

Epoch 00007: val_loss did not improve from 0.01396

Epoch 00008: val_loss did not improve from 0.01396

Epoch 00009: val_loss did not improve from 0.01396

Epoch 00010: val_loss did not improve from 0.01396





Epoch 00001: val_loss improved from inf to 0.06528, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06528

Epoch 00003: val_loss did not improve from 0.06528

Epoch 00004: val_loss did not improve from 0.06528

Epoch 00005: val_loss did not improve from 0.06528

Epoch 00006: val_loss did not improve from 0.06528

Epoch 00007: val_loss did not improve from 0.06528

Epoch 00008: val_loss did not improve from 0.06528

Epoch 00009: val_loss did not improve from 0.06528

Epoch 00010: val_loss did not improve from 0.06528




AlgoCrossValIter - 3
Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.38389, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.38389 to 0.36174, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.36174 to 0.35460, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.35460 to 0.33502, saving model to be




Epoch 00001: val_loss improved from inf to 0.14564, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.14564

Epoch 00003: val_loss did not improve from 0.14564

Epoch 00004: val_loss did not improve from 0.14564

Epoch 00005: val_loss did not improve from 0.14564

Epoch 00006: val_loss did not improve from 0.14564

Epoch 00007: val_loss did not improve from 0.14564

Epoch 00008: val_loss did not improve from 0.14564

Epoch 00009: val_loss did not improve from 0.14564

Epoch 00010: val_loss did not improve from 0.14564





Epoch 00001: val_loss improved from inf to 0.08258, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08258

Epoch 00003: val_loss did not improve from 0.08258

Epoch 00004: val_loss did not improve from 0.08258

Epoch 00005: val_loss did not improve from 0.08258

Epoch 00006: val_loss did not improve from 0.08258

Epoch 00007: val_loss did not improve from 0.08258

Epoch 00008: val_loss did not improve from 0.08258

Epoch 00009: val_loss did not improve from 0.08258

Epoch 00010: val_loss did not improve from 0.08258





Epoch 00001: val_loss improved from inf to 0.13448, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.13448

Epoch 00003: val_loss did not improve from 0.13448

Epoch 00004: val_loss did not improve from 0.13448

Epoch 00005: val_loss did not improve from 0.13448

Epoch 00006: val_loss did not improve from 0.13448

Epoch 00007: val_loss did not improve from 0.13448

Epoch 00008: val_loss did not improve from 0.13448

Epoch 00009: val_loss did not improve from 0.13448

Epoch 00010: val_loss did not improve from 0.13448





Epoch 00001: val_loss improved from inf to 0.05431, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05431

Epoch 00003: val_loss did not improve from 0.05431

Epoch 00004: val_loss did not improve from 0.05431

Epoch 00005: val_loss did not improve from 0.05431

Epoch 00006: val_loss did not improve from 0.05431

Epoch 00007: val_loss did not improve from 0.05431

Epoch 00008: val_loss did not improve from 0.05431

Epoch 00009: val_loss did not improve from 0.05431

Epoch 00010: val_loss did not improve from 0.05431





Epoch 00001: val_loss improved from inf to 0.06259, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06259

Epoch 00003: val_loss did not improve from 0.06259

Epoch 00004: val_loss did not improve from 0.06259

Epoch 00005: val_loss did not improve from 0.06259

Epoch 00006: val_loss did not improve from 0.06259

Epoch 00007: val_loss did not improve from 0.06259

Epoch 00008: val_loss did not improve from 0.06259

Epoch 00009: val_loss did not improve from 0.06259

Epoch 00010: val_loss did not improve from 0.06259





Epoch 00001: val_loss improved from inf to 0.08389, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08389

Epoch 00003: val_loss did not improve from 0.08389

Epoch 00004: val_loss did not improve from 0.08389

Epoch 00005: val_loss did not improve from 0.08389

Epoch 00006: val_loss did not improve from 0.08389

Epoch 00007: val_loss did not improve from 0.08389

Epoch 00008: val_loss did not improve from 0.08389

Epoch 00009: val_loss did not improve from 0.08389

Epoch 00010: val_loss did not improve from 0.08389





Epoch 00001: val_loss improved from inf to 0.08337, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08337

Epoch 00003: val_loss did not improve from 0.08337

Epoch 00004: val_loss did not improve from 0.08337

Epoch 00005: val_loss did not improve from 0.08337

Epoch 00006: val_loss did not improve from 0.08337

Epoch 00007: val_loss did not improve from 0.08337

Epoch 00008: val_loss did not improve from 0.08337

Epoch 00009: val_loss did not improve from 0.08337

Epoch 00010: val_loss did not improve from 0.08337

Epoch 00001: val_loss improved from inf to 0.02685, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.02685 to 0.02504, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.02504

Epoch 00004: val_loss did not improve from 0.02504

Epoch 00005: val_loss did not improve from 0.02504

Epoch 00006: val_loss did not improve from 0.02504

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.07514, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07514 to 0.07474, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07474

Epoch 00004: val_loss did not improve from 0.07474

Epoch 00005: val_loss did not improve from 0.07474

Epoch 00006: val_loss did not improve from 0.07474

Epoch 00007: val_loss did not improve from 0.07474

Epoch 00008: val_loss did not improve from 0.07474

Epoch 00009: val_loss did not improve from 0.07474

Epoch 00010: val_loss did not improve from 0.07474




AlgoCrossValIter - 4
Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.37752, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.37752 to 0.37277, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.37277 to 0.32029, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.32029 to 0.30355, saving model to be




Epoch 00001: val_loss improved from inf to 0.14856, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.14856 to 0.13981, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.13981

Epoch 00004: val_loss did not improve from 0.13981

Epoch 00005: val_loss did not improve from 0.13981

Epoch 00006: val_loss did not improve from 0.13981

Epoch 00007: val_loss did not improve from 0.13981

Epoch 00008: val_loss did not improve from 0.13981

Epoch 00009: val_loss did not improve from 0.13981

Epoch 00010: val_loss did not improve from 0.13981





Epoch 00001: val_loss improved from inf to 0.07121, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07121

Epoch 00003: val_loss did not improve from 0.07121

Epoch 00004: val_loss did not improve from 0.07121

Epoch 00005: val_loss did not improve from 0.07121

Epoch 00006: val_loss did not improve from 0.07121

Epoch 00007: val_loss did not improve from 0.07121

Epoch 00008: val_loss did not improve from 0.07121

Epoch 00009: val_loss did not improve from 0.07121

Epoch 00010: val_loss did not improve from 0.07121





Epoch 00001: val_loss improved from inf to 0.15025, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.15025 to 0.13901, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.13901

Epoch 00004: val_loss did not improve from 0.13901

Epoch 00005: val_loss did not improve from 0.13901

Epoch 00006: val_loss did not improve from 0.13901

Epoch 00007: val_loss did not improve from 0.13901

Epoch 00008: val_loss did not improve from 0.13901

Epoch 00009: val_loss did not improve from 0.13901

Epoch 00010: val_loss did not improve from 0.13901





Epoch 00001: val_loss improved from inf to 0.03820, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03820

Epoch 00003: val_loss did not improve from 0.03820

Epoch 00004: val_loss did not improve from 0.03820

Epoch 00005: val_loss did not improve from 0.03820

Epoch 00006: val_loss did not improve from 0.03820

Epoch 00007: val_loss did not improve from 0.03820

Epoch 00008: val_loss did not improve from 0.03820

Epoch 00009: val_loss did not improve from 0.03820

Epoch 00010: val_loss did not improve from 0.03820





Epoch 00001: val_loss improved from inf to 0.06983, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06983

Epoch 00003: val_loss did not improve from 0.06983

Epoch 00004: val_loss did not improve from 0.06983

Epoch 00005: val_loss did not improve from 0.06983

Epoch 00006: val_loss did not improve from 0.06983

Epoch 00007: val_loss did not improve from 0.06983

Epoch 00008: val_loss did not improve from 0.06983

Epoch 00009: val_loss did not improve from 0.06983

Epoch 00010: val_loss did not improve from 0.06983





Epoch 00001: val_loss improved from inf to 0.08215, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08215

Epoch 00003: val_loss did not improve from 0.08215

Epoch 00004: val_loss did not improve from 0.08215

Epoch 00005: val_loss did not improve from 0.08215

Epoch 00006: val_loss did not improve from 0.08215

Epoch 00007: val_loss did not improve from 0.08215

Epoch 00008: val_loss did not improve from 0.08215

Epoch 00009: val_loss did not improve from 0.08215

Epoch 00010: val_loss did not improve from 0.08215





Epoch 00001: val_loss improved from inf to 0.08549, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08549

Epoch 00003: val_loss did not improve from 0.08549

Epoch 00004: val_loss improved from 0.08549 to 0.08153, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.08153

Epoch 00006: val_loss did not improve from 0.08153

Epoch 00007: val_loss did not improve from 0.08153

Epoch 00008: val_loss did not improve from 0.08153

Epoch 00009: val_loss did not improve from 0.08153

Epoch 00010: val_loss did not improve from 0.08153





Epoch 00001: val_loss improved from inf to 0.01236, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.01236

Epoch 00003: val_loss did not improve from 0.01236

Epoch 00004: val_loss did not improve from 0.01236

Epoch 00005: val_loss did not improve from 0.01236

Epoch 00006: val_loss did not improve from 0.01236

Epoch 00007: val_loss did not improve from 0.01236

Epoch 00008: val_loss did not improve from 0.01236

Epoch 00009: val_loss did not improve from 0.01236

Epoch 00010: val_loss did not improve from 0.01236

Epoch 00001: val_loss improved from inf to 0.07021, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07021

Epoch 00003: val_loss did not improve from 0.07021

Epoch 00004: val_loss did not improve from 0.07021

Epoch 00005: val_loss did not improve from 0.07021

Epoch 00006: val_loss did not improve from 0.07021

Epoch 00007: val_loss did not improve from 0.07021

Epoch 00008: val_loss did not improve



AlgoCrossValIter - 5
Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.40200, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.40200 to 0.34184, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.34184 to 0.32876, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.32876 to 0.30926, saving model to be




Epoch 00001: val_loss improved from inf to 0.14722, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.14722 to 0.14299, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.14299

Epoch 00004: val_loss did not improve from 0.14299

Epoch 00005: val_loss did not improve from 0.14299

Epoch 00006: val_loss did not improve from 0.14299

Epoch 00007: val_loss did not improve from 0.14299

Epoch 00008: val_loss did not improve from 0.14299

Epoch 00009: val_loss did not improve from 0.14299

Epoch 00010: val_loss did not improve from 0.14299





Epoch 00001: val_loss improved from inf to 0.15124, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.15124

Epoch 00003: val_loss did not improve from 0.15124

Epoch 00004: val_loss improved from 0.15124 to 0.11334, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.11334

Epoch 00006: val_loss did not improve from 0.11334

Epoch 00007: val_loss improved from 0.11334 to 0.09627, saving model to best-model-conll.hdfs

Epoch 00008: val_loss did not improve from 0.09627

Epoch 00009: val_loss did not improve from 0.09627

Epoch 00010: val_loss did not improve from 0.09627





Epoch 00001: val_loss improved from inf to 0.11390, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11390

Epoch 00003: val_loss did not improve from 0.11390

Epoch 00004: val_loss did not improve from 0.11390

Epoch 00005: val_loss did not improve from 0.11390

Epoch 00006: val_loss did not improve from 0.11390

Epoch 00007: val_loss did not improve from 0.11390

Epoch 00008: val_loss did not improve from 0.11390

Epoch 00009: val_loss did not improve from 0.11390

Epoch 00010: val_loss did not improve from 0.11390





Epoch 00001: val_loss improved from inf to 0.03002, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03002

Epoch 00003: val_loss did not improve from 0.03002

Epoch 00004: val_loss did not improve from 0.03002

Epoch 00005: val_loss did not improve from 0.03002

Epoch 00006: val_loss did not improve from 0.03002

Epoch 00007: val_loss did not improve from 0.03002

Epoch 00008: val_loss did not improve from 0.03002

Epoch 00009: val_loss did not improve from 0.03002

Epoch 00010: val_loss did not improve from 0.03002





Epoch 00001: val_loss improved from inf to 0.06927, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06927

Epoch 00003: val_loss did not improve from 0.06927

Epoch 00004: val_loss did not improve from 0.06927

Epoch 00005: val_loss did not improve from 0.06927

Epoch 00006: val_loss did not improve from 0.06927

Epoch 00007: val_loss did not improve from 0.06927

Epoch 00008: val_loss did not improve from 0.06927

Epoch 00009: val_loss did not improve from 0.06927

Epoch 00010: val_loss did not improve from 0.06927





Epoch 00001: val_loss improved from inf to 0.08719, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08719

Epoch 00003: val_loss did not improve from 0.08719

Epoch 00004: val_loss did not improve from 0.08719

Epoch 00005: val_loss did not improve from 0.08719

Epoch 00006: val_loss did not improve from 0.08719

Epoch 00007: val_loss did not improve from 0.08719

Epoch 00008: val_loss did not improve from 0.08719

Epoch 00009: val_loss did not improve from 0.08719

Epoch 00010: val_loss did not improve from 0.08719





Epoch 00001: val_loss improved from inf to 0.07496, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07496

Epoch 00003: val_loss did not improve from 0.07496

Epoch 00004: val_loss did not improve from 0.07496

Epoch 00005: val_loss did not improve from 0.07496

Epoch 00006: val_loss did not improve from 0.07496

Epoch 00007: val_loss did not improve from 0.07496

Epoch 00008: val_loss did not improve from 0.07496

Epoch 00009: val_loss did not improve from 0.07496

Epoch 00010: val_loss did not improve from 0.07496





Epoch 00001: val_loss improved from inf to 0.01564, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.01564

Epoch 00003: val_loss did not improve from 0.01564

Epoch 00004: val_loss did not improve from 0.01564

Epoch 00005: val_loss did not improve from 0.01564

Epoch 00006: val_loss did not improve from 0.01564

Epoch 00007: val_loss did not improve from 0.01564

Epoch 00008: val_loss did not improve from 0.01564

Epoch 00009: val_loss did not improve from 0.01564

Epoch 00010: val_loss did not improve from 0.01564





Epoch 00001: val_loss improved from inf to 0.06737, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06737

Epoch 00003: val_loss did not improve from 0.06737

Epoch 00004: val_loss did not improve from 0.06737

Epoch 00005: val_loss did not improve from 0.06737

Epoch 00006: val_loss did not improve from 0.06737

Epoch 00007: val_loss did not improve from 0.06737

Epoch 00008: val_loss did not improve from 0.06737

Epoch 00009: val_loss did not improve from 0.06737

Epoch 00010: val_loss did not improve from 0.06737




AlgoCrossValIter - 6
Model: "sequential_16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.39759, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.39759 to 0.34837, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.34837 to 0.32493, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.32493 to 0.32309, saving model to be




Epoch 00001: val_loss improved from inf to 0.14162, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.14162

Epoch 00003: val_loss did not improve from 0.14162

Epoch 00004: val_loss did not improve from 0.14162

Epoch 00005: val_loss did not improve from 0.14162

Epoch 00006: val_loss did not improve from 0.14162

Epoch 00007: val_loss did not improve from 0.14162

Epoch 00008: val_loss did not improve from 0.14162

Epoch 00009: val_loss did not improve from 0.14162

Epoch 00010: val_loss did not improve from 0.14162





Epoch 00001: val_loss improved from inf to 0.08051, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08051

Epoch 00003: val_loss did not improve from 0.08051

Epoch 00004: val_loss did not improve from 0.08051

Epoch 00005: val_loss did not improve from 0.08051

Epoch 00006: val_loss did not improve from 0.08051

Epoch 00007: val_loss did not improve from 0.08051

Epoch 00008: val_loss did not improve from 0.08051

Epoch 00009: val_loss did not improve from 0.08051

Epoch 00010: val_loss did not improve from 0.08051





Epoch 00001: val_loss improved from inf to 0.11549, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11549

Epoch 00003: val_loss did not improve from 0.11549

Epoch 00004: val_loss did not improve from 0.11549

Epoch 00005: val_loss did not improve from 0.11549

Epoch 00006: val_loss did not improve from 0.11549

Epoch 00007: val_loss did not improve from 0.11549

Epoch 00008: val_loss did not improve from 0.11549

Epoch 00009: val_loss did not improve from 0.11549

Epoch 00010: val_loss did not improve from 0.11549





Epoch 00001: val_loss improved from inf to 0.03795, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03795

Epoch 00003: val_loss did not improve from 0.03795

Epoch 00004: val_loss did not improve from 0.03795

Epoch 00005: val_loss did not improve from 0.03795

Epoch 00006: val_loss did not improve from 0.03795

Epoch 00007: val_loss did not improve from 0.03795

Epoch 00008: val_loss did not improve from 0.03795

Epoch 00009: val_loss did not improve from 0.03795

Epoch 00010: val_loss did not improve from 0.03795





Epoch 00001: val_loss improved from inf to 0.06565, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06565

Epoch 00003: val_loss did not improve from 0.06565

Epoch 00004: val_loss did not improve from 0.06565

Epoch 00005: val_loss did not improve from 0.06565

Epoch 00006: val_loss did not improve from 0.06565

Epoch 00007: val_loss did not improve from 0.06565

Epoch 00008: val_loss did not improve from 0.06565

Epoch 00009: val_loss did not improve from 0.06565

Epoch 00010: val_loss did not improve from 0.06565





Epoch 00001: val_loss improved from inf to 0.11962, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.11962 to 0.10762, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.10762

Epoch 00004: val_loss did not improve from 0.10762

Epoch 00005: val_loss did not improve from 0.10762

Epoch 00006: val_loss did not improve from 0.10762

Epoch 00007: val_loss did not improve from 0.10762

Epoch 00008: val_loss did not improve from 0.10762

Epoch 00009: val_loss did not improve from 0.10762

Epoch 00010: val_loss did not improve from 0.10762





Epoch 00001: val_loss improved from inf to 0.09355, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09355 to 0.08640, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08640

Epoch 00004: val_loss did not improve from 0.08640

Epoch 00005: val_loss did not improve from 0.08640

Epoch 00006: val_loss did not improve from 0.08640

Epoch 00007: val_loss did not improve from 0.08640

Epoch 00008: val_loss did not improve from 0.08640

Epoch 00009: val_loss did not improve from 0.08640

Epoch 00010: val_loss improved from 0.08640 to 0.08252, saving model to best-model-conll.hdfs

Epoch 00001: val_loss improved from inf to 0.02639, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02639

Epoch 00003: val_loss did not improve from 0.02639

Epoch 00004: val_loss did not improve from 0.02639

Epoch 00005: val_loss did not improve from 0.02639

Epoch 00006: val_loss did not improve from 0.02639

Epo




Epoch 00001: val_loss improved from inf to 0.07161, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07161

Epoch 00003: val_loss did not improve from 0.07161

Epoch 00004: val_loss did not improve from 0.07161

Epoch 00005: val_loss did not improve from 0.07161

Epoch 00006: val_loss did not improve from 0.07161

Epoch 00007: val_loss did not improve from 0.07161

Epoch 00008: val_loss did not improve from 0.07161

Epoch 00009: val_loss did not improve from 0.07161

Epoch 00010: val_loss did not improve from 0.07161




AlgoCrossValIter - 7
Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.39984, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.39984 to 0.36171, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.36171 to 0.34643, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.34643 to 0.30702, saving model to be




Epoch 00001: val_loss improved from inf to 0.15199, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.15199 to 0.14207, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.14207 to 0.14057, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.14057

Epoch 00005: val_loss did not improve from 0.14057

Epoch 00006: val_loss did not improve from 0.14057

Epoch 00007: val_loss did not improve from 0.14057

Epoch 00008: val_loss did not improve from 0.14057

Epoch 00009: val_loss did not improve from 0.14057

Epoch 00010: val_loss did not improve from 0.14057





Epoch 00001: val_loss improved from inf to 0.07741, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07741

Epoch 00003: val_loss did not improve from 0.07741

Epoch 00004: val_loss did not improve from 0.07741

Epoch 00005: val_loss did not improve from 0.07741

Epoch 00006: val_loss did not improve from 0.07741

Epoch 00007: val_loss did not improve from 0.07741

Epoch 00008: val_loss did not improve from 0.07741

Epoch 00009: val_loss did not improve from 0.07741

Epoch 00010: val_loss did not improve from 0.07741





Epoch 00001: val_loss improved from inf to 0.12707, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.12707 to 0.11825, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.11825

Epoch 00004: val_loss did not improve from 0.11825

Epoch 00005: val_loss did not improve from 0.11825

Epoch 00006: val_loss did not improve from 0.11825

Epoch 00007: val_loss did not improve from 0.11825

Epoch 00008: val_loss did not improve from 0.11825

Epoch 00009: val_loss did not improve from 0.11825

Epoch 00010: val_loss did not improve from 0.11825





Epoch 00001: val_loss improved from inf to 0.05475, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05475

Epoch 00003: val_loss did not improve from 0.05475

Epoch 00004: val_loss did not improve from 0.05475

Epoch 00005: val_loss did not improve from 0.05475

Epoch 00006: val_loss did not improve from 0.05475

Epoch 00007: val_loss did not improve from 0.05475

Epoch 00008: val_loss did not improve from 0.05475

Epoch 00009: val_loss did not improve from 0.05475

Epoch 00010: val_loss did not improve from 0.05475





Epoch 00001: val_loss improved from inf to 0.08040, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08040

Epoch 00003: val_loss did not improve from 0.08040

Epoch 00004: val_loss did not improve from 0.08040

Epoch 00005: val_loss did not improve from 0.08040

Epoch 00006: val_loss did not improve from 0.08040

Epoch 00007: val_loss did not improve from 0.08040

Epoch 00008: val_loss did not improve from 0.08040

Epoch 00009: val_loss did not improve from 0.08040

Epoch 00010: val_loss did not improve from 0.08040





Epoch 00001: val_loss improved from inf to 0.09534, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09534

Epoch 00003: val_loss did not improve from 0.09534

Epoch 00004: val_loss did not improve from 0.09534

Epoch 00005: val_loss did not improve from 0.09534

Epoch 00006: val_loss did not improve from 0.09534

Epoch 00007: val_loss did not improve from 0.09534

Epoch 00008: val_loss did not improve from 0.09534

Epoch 00009: val_loss did not improve from 0.09534

Epoch 00010: val_loss did not improve from 0.09534





Epoch 00001: val_loss improved from inf to 0.09348, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09348 to 0.08457, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08457

Epoch 00004: val_loss improved from 0.08457 to 0.08275, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.08275

Epoch 00006: val_loss did not improve from 0.08275

Epoch 00007: val_loss did not improve from 0.08275

Epoch 00008: val_loss improved from 0.08275 to 0.08231, saving model to best-model-conll.hdfs

Epoch 00009: val_loss did not improve from 0.08231

Epoch 00010: val_loss improved from 0.08231 to 0.07956, saving model to best-model-conll.hdfs





Epoch 00001: val_loss improved from inf to 0.01710, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.01710

Epoch 00003: val_loss did not improve from 0.01710

Epoch 00004: val_loss did not improve from 0.01710

Epoch 00005: val_loss did not improve from 0.01710

Epoch 00006: val_loss did not improve from 0.01710

Epoch 00007: val_loss did not improve from 0.01710

Epoch 00008: val_loss did not improve from 0.01710

Epoch 00009: val_loss did not improve from 0.01710

Epoch 00010: val_loss did not improve from 0.01710

Epoch 00001: val_loss improved from inf to 0.07463, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07463

Epoch 00003: val_loss did not improve from 0.07463

Epoch 00004: val_loss did not improve from 0.07463

Epoch 00005: val_loss did not improve from 0.07463

Epoch 00006: val_loss did not improve from 0.07463

Epoch 00007: val_loss did not improve from 0.07463

Epoch 00008: val_loss did not improve



AlgoCrossValIter - 8
Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.38983, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.38983 to 0.37201, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.37201 to 0.33785, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.33785 to 0.32692, saving model to be




Epoch 00001: val_loss improved from inf to 0.16984, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.16984 to 0.13951, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.13951

Epoch 00004: val_loss did not improve from 0.13951

Epoch 00005: val_loss did not improve from 0.13951

Epoch 00006: val_loss did not improve from 0.13951

Epoch 00007: val_loss did not improve from 0.13951

Epoch 00008: val_loss did not improve from 0.13951

Epoch 00009: val_loss did not improve from 0.13951

Epoch 00010: val_loss did not improve from 0.13951





Epoch 00001: val_loss improved from inf to 0.07210, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07210

Epoch 00003: val_loss did not improve from 0.07210

Epoch 00004: val_loss did not improve from 0.07210

Epoch 00005: val_loss did not improve from 0.07210

Epoch 00006: val_loss did not improve from 0.07210

Epoch 00007: val_loss did not improve from 0.07210

Epoch 00008: val_loss did not improve from 0.07210

Epoch 00009: val_loss did not improve from 0.07210

Epoch 00010: val_loss did not improve from 0.07210





Epoch 00001: val_loss improved from inf to 0.18206, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.18206 to 0.13081, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.13081

Epoch 00004: val_loss did not improve from 0.13081

Epoch 00005: val_loss did not improve from 0.13081

Epoch 00006: val_loss did not improve from 0.13081

Epoch 00007: val_loss did not improve from 0.13081

Epoch 00008: val_loss did not improve from 0.13081

Epoch 00009: val_loss did not improve from 0.13081

Epoch 00010: val_loss did not improve from 0.13081





Epoch 00001: val_loss improved from inf to 0.03538, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03538

Epoch 00003: val_loss did not improve from 0.03538

Epoch 00004: val_loss did not improve from 0.03538

Epoch 00005: val_loss did not improve from 0.03538

Epoch 00006: val_loss did not improve from 0.03538

Epoch 00007: val_loss did not improve from 0.03538

Epoch 00008: val_loss did not improve from 0.03538

Epoch 00009: val_loss did not improve from 0.03538

Epoch 00010: val_loss did not improve from 0.03538





Epoch 00001: val_loss improved from inf to 0.12044, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.12044 to 0.11297, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.11297 to 0.09947, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.09947

Epoch 00005: val_loss did not improve from 0.09947

Epoch 00006: val_loss did not improve from 0.09947

Epoch 00007: val_loss did not improve from 0.09947

Epoch 00008: val_loss did not improve from 0.09947

Epoch 00009: val_loss did not improve from 0.09947

Epoch 00010: val_loss did not improve from 0.09947





Epoch 00001: val_loss improved from inf to 0.08488, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08488

Epoch 00003: val_loss did not improve from 0.08488

Epoch 00004: val_loss did not improve from 0.08488

Epoch 00005: val_loss did not improve from 0.08488

Epoch 00006: val_loss did not improve from 0.08488

Epoch 00007: val_loss did not improve from 0.08488

Epoch 00008: val_loss did not improve from 0.08488

Epoch 00009: val_loss did not improve from 0.08488

Epoch 00010: val_loss did not improve from 0.08488





Epoch 00001: val_loss improved from inf to 0.07787, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07787

Epoch 00003: val_loss did not improve from 0.07787

Epoch 00004: val_loss did not improve from 0.07787

Epoch 00005: val_loss did not improve from 0.07787

Epoch 00006: val_loss did not improve from 0.07787

Epoch 00007: val_loss did not improve from 0.07787

Epoch 00008: val_loss did not improve from 0.07787

Epoch 00009: val_loss did not improve from 0.07787

Epoch 00010: val_loss did not improve from 0.07787





Epoch 00001: val_loss improved from inf to 0.02250, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02250

Epoch 00003: val_loss did not improve from 0.02250

Epoch 00004: val_loss did not improve from 0.02250

Epoch 00005: val_loss did not improve from 0.02250

Epoch 00006: val_loss did not improve from 0.02250

Epoch 00007: val_loss did not improve from 0.02250

Epoch 00008: val_loss did not improve from 0.02250

Epoch 00009: val_loss did not improve from 0.02250

Epoch 00010: val_loss did not improve from 0.02250





Epoch 00001: val_loss improved from inf to 0.07204, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07204

Epoch 00003: val_loss did not improve from 0.07204

Epoch 00004: val_loss did not improve from 0.07204

Epoch 00005: val_loss did not improve from 0.07204

Epoch 00006: val_loss did not improve from 0.07204

Epoch 00007: val_loss did not improve from 0.07204

Epoch 00008: val_loss did not improve from 0.07204

Epoch 00009: val_loss did not improve from 0.07204

Epoch 00010: val_loss did not improve from 0.07204




AlgoCrossValIter - 9
Model: "sequential_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.38963, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.38963 to 0.38518, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.38518 to 0.33733, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.33733

Epoch 00005: val_loss 




Epoch 00001: val_loss improved from inf to 0.12934, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12934

Epoch 00003: val_loss did not improve from 0.12934

Epoch 00004: val_loss did not improve from 0.12934

Epoch 00005: val_loss did not improve from 0.12934

Epoch 00006: val_loss did not improve from 0.12934

Epoch 00007: val_loss did not improve from 0.12934

Epoch 00008: val_loss did not improve from 0.12934

Epoch 00009: val_loss did not improve from 0.12934

Epoch 00010: val_loss did not improve from 0.12934





Epoch 00001: val_loss improved from inf to 0.22687, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.22687 to 0.09604, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.09604

Epoch 00004: val_loss improved from 0.09604 to 0.08349, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.08349

Epoch 00006: val_loss did not improve from 0.08349

Epoch 00007: val_loss did not improve from 0.08349

Epoch 00008: val_loss did not improve from 0.08349

Epoch 00009: val_loss did not improve from 0.08349

Epoch 00010: val_loss did not improve from 0.08349





Epoch 00001: val_loss improved from inf to 0.11343, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11343

Epoch 00003: val_loss did not improve from 0.11343

Epoch 00004: val_loss did not improve from 0.11343

Epoch 00005: val_loss did not improve from 0.11343

Epoch 00006: val_loss did not improve from 0.11343

Epoch 00007: val_loss did not improve from 0.11343

Epoch 00008: val_loss did not improve from 0.11343

Epoch 00009: val_loss did not improve from 0.11343

Epoch 00010: val_loss did not improve from 0.11343





Epoch 00001: val_loss improved from inf to 0.03945, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03945

Epoch 00003: val_loss did not improve from 0.03945

Epoch 00004: val_loss did not improve from 0.03945

Epoch 00005: val_loss did not improve from 0.03945

Epoch 00006: val_loss did not improve from 0.03945

Epoch 00007: val_loss did not improve from 0.03945

Epoch 00008: val_loss did not improve from 0.03945

Epoch 00009: val_loss did not improve from 0.03945

Epoch 00010: val_loss did not improve from 0.03945





Epoch 00001: val_loss improved from inf to 0.06253, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06253

Epoch 00003: val_loss did not improve from 0.06253

Epoch 00004: val_loss did not improve from 0.06253

Epoch 00005: val_loss did not improve from 0.06253

Epoch 00006: val_loss did not improve from 0.06253

Epoch 00007: val_loss did not improve from 0.06253

Epoch 00008: val_loss did not improve from 0.06253

Epoch 00009: val_loss did not improve from 0.06253

Epoch 00010: val_loss did not improve from 0.06253





Epoch 00001: val_loss improved from inf to 0.09340, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09340

Epoch 00003: val_loss did not improve from 0.09340

Epoch 00004: val_loss did not improve from 0.09340

Epoch 00005: val_loss did not improve from 0.09340

Epoch 00006: val_loss did not improve from 0.09340

Epoch 00007: val_loss did not improve from 0.09340

Epoch 00008: val_loss did not improve from 0.09340

Epoch 00009: val_loss did not improve from 0.09340

Epoch 00010: val_loss did not improve from 0.09340





Epoch 00001: val_loss improved from inf to 0.08354, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08354

Epoch 00003: val_loss did not improve from 0.08354

Epoch 00004: val_loss did not improve from 0.08354

Epoch 00005: val_loss did not improve from 0.08354

Epoch 00006: val_loss did not improve from 0.08354

Epoch 00007: val_loss did not improve from 0.08354

Epoch 00008: val_loss did not improve from 0.08354

Epoch 00009: val_loss did not improve from 0.08354

Epoch 00010: val_loss did not improve from 0.08354





Epoch 00001: val_loss improved from inf to 0.02173, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02173

Epoch 00003: val_loss did not improve from 0.02173

Epoch 00004: val_loss did not improve from 0.02173

Epoch 00005: val_loss did not improve from 0.02173

Epoch 00006: val_loss did not improve from 0.02173

Epoch 00007: val_loss did not improve from 0.02173

Epoch 00008: val_loss did not improve from 0.02173

Epoch 00009: val_loss did not improve from 0.02173

Epoch 00010: val_loss did not improve from 0.02173





Epoch 00001: val_loss improved from inf to 0.06863, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06863

Epoch 00003: val_loss did not improve from 0.06863

Epoch 00004: val_loss did not improve from 0.06863

Epoch 00005: val_loss did not improve from 0.06863

Epoch 00006: val_loss did not improve from 0.06863

Epoch 00007: val_loss did not improve from 0.06863

Epoch 00008: val_loss did not improve from 0.06863

Epoch 00009: val_loss did not improve from 0.06863

Epoch 00010: val_loss did not improve from 0.06863




AlgoCrossValIter - 10
Model: "sequential_20"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.36151, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.36151

Epoch 00003: val_loss improved from 0.36151 to 0.34637, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.34637

Epoch 00005: val_loss did not improve from 0.34637

Epoch 00006:




Epoch 00001: val_loss improved from inf to 0.14850, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.14850

Epoch 00003: val_loss did not improve from 0.14850

Epoch 00004: val_loss did not improve from 0.14850

Epoch 00005: val_loss did not improve from 0.14850

Epoch 00006: val_loss did not improve from 0.14850

Epoch 00007: val_loss did not improve from 0.14850

Epoch 00008: val_loss did not improve from 0.14850

Epoch 00009: val_loss did not improve from 0.14850

Epoch 00010: val_loss did not improve from 0.14850

Epoch 00001: val_loss improved from inf to 0.06864, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06864

Epoch 00003: val_loss did not improve from 0.06864

Epoch 00004: val_loss did not improve from 0.06864

Epoch 00005: val_loss did not improve from 0.06864

Epoch 00006: val_loss did not improve from 0.06864

Epoch 00007: val_loss did not improve from 0.06864

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.15268, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.15268 to 0.13160, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.13160

Epoch 00004: val_loss did not improve from 0.13160

Epoch 00005: val_loss did not improve from 0.13160

Epoch 00006: val_loss did not improve from 0.13160

Epoch 00007: val_loss did not improve from 0.13160

Epoch 00008: val_loss did not improve from 0.13160

Epoch 00009: val_loss did not improve from 0.13160

Epoch 00010: val_loss did not improve from 0.13160





Epoch 00001: val_loss improved from inf to 0.03987, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03987

Epoch 00003: val_loss did not improve from 0.03987

Epoch 00004: val_loss did not improve from 0.03987

Epoch 00005: val_loss did not improve from 0.03987

Epoch 00006: val_loss did not improve from 0.03987

Epoch 00007: val_loss did not improve from 0.03987

Epoch 00008: val_loss did not improve from 0.03987

Epoch 00009: val_loss did not improve from 0.03987

Epoch 00010: val_loss did not improve from 0.03987





Epoch 00001: val_loss improved from inf to 0.06923, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06923

Epoch 00003: val_loss did not improve from 0.06923

Epoch 00004: val_loss did not improve from 0.06923

Epoch 00005: val_loss did not improve from 0.06923

Epoch 00006: val_loss did not improve from 0.06923

Epoch 00007: val_loss did not improve from 0.06923

Epoch 00008: val_loss did not improve from 0.06923

Epoch 00009: val_loss did not improve from 0.06923

Epoch 00010: val_loss did not improve from 0.06923





Epoch 00001: val_loss improved from inf to 0.10383, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10383

Epoch 00003: val_loss did not improve from 0.10383

Epoch 00004: val_loss did not improve from 0.10383

Epoch 00005: val_loss did not improve from 0.10383

Epoch 00006: val_loss did not improve from 0.10383

Epoch 00007: val_loss did not improve from 0.10383

Epoch 00008: val_loss did not improve from 0.10383

Epoch 00009: val_loss did not improve from 0.10383

Epoch 00010: val_loss did not improve from 0.10383





Epoch 00001: val_loss improved from inf to 0.08524, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08524

Epoch 00003: val_loss did not improve from 0.08524

Epoch 00004: val_loss did not improve from 0.08524

Epoch 00005: val_loss did not improve from 0.08524

Epoch 00006: val_loss improved from 0.08524 to 0.08207, saving model to best-model-conll.hdfs

Epoch 00007: val_loss improved from 0.08207 to 0.08164, saving model to best-model-conll.hdfs

Epoch 00008: val_loss did not improve from 0.08164

Epoch 00009: val_loss did not improve from 0.08164

Epoch 00010: val_loss did not improve from 0.08164

Epoch 00001: val_loss improved from inf to 0.01604, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.01604

Epoch 00003: val_loss did not improve from 0.01604

Epoch 00004: val_loss did not improve from 0.01604

Epoch 00005: val_loss did not improve from 0.01604

Epoch 00006: val_loss did not improve from 0.01604

Epo




Epoch 00001: val_loss improved from inf to 0.06587, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06587

Epoch 00003: val_loss did not improve from 0.06587

Epoch 00004: val_loss did not improve from 0.06587

Epoch 00005: val_loss did not improve from 0.06587

Epoch 00006: val_loss did not improve from 0.06587

Epoch 00007: val_loss did not improve from 0.06587

Epoch 00008: val_loss did not improve from 0.06587

Epoch 00009: val_loss did not improve from 0.06587

Epoch 00010: val_loss did not improve from 0.06587




In [153]:
resultCrossVal.to_csv("results.csv")
resultCrossVal

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
P_test,84.146,84.151,83.606,85.977,83.616,82.658,83.75,84.793,82.44,82.684
P_train,87.602,87.708,88.99,88.033,89.177,87.471,86.763,87.872,87.647,85.515
P_ewo,76.09,78.66,78.531,77.011,77.751,76.014,77.989,77.7,78.184,76.3
R_test,74.109,76.141,74.6,76.685,73.072,78.222,78.993,77.251,76.203,74.486
R_train,82.488,82.424,82.014,83.639,81.219,82.201,84.951,83.401,83.472,84.43
R_ewo,66.019,64.723,66.296,66.758,64.537,66.482,67.592,66.297,67.683,69.723
F1-test,77.967,78.384,78.019,80.504,76.293,79.536,80.234,80.196,77.639,76.151
F1-train,84.128,84.391,84.881,85.518,84.359,84.047,85.558,84.936,84.925,84.215
F1-ewo,69.959,70.478,71.267,70.972,69.849,69.85,71.788,71.009,71.783,71.977


In [154]:
resultCrossVal.mean(axis=1).to_frame()

Unnamed: 0,0
P_test,83.7821
P_train,87.6778
P_ewo,77.423
R_test,75.9762
R_train,83.0239
R_ewo,66.611
F1-test,78.4923
F1-train,84.6958
F1-ewo,70.8932


In [155]:
resultCrossVal.std(axis=1).to_frame()

Unnamed: 0,0
P_test,1.076945
P_train,1.038069
P_ewo,1.001924
R_test,1.896964
R_train,1.155373
R_ewo,1.49995
F1-test,1.585727
F1-train,0.552245
F1-ewo,0.826648


In [156]:
trainByTagResult.to_csv("results/train-by-tag.csv")
trainByTagResult

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
F1-O,98.057,98.076,98.115,98.166,98.097,98.011,98.139,98.113,98.115,98.028
P-O,97.727,97.706,97.657,97.878,97.561,97.695,98.051,97.826,97.865,97.99
R-O,98.412,98.468,98.593,98.467,98.662,98.354,98.244,98.425,98.383,98.09
F1-MISC,76.782222,78.163333,72.602,75.612,74.142,72.224,74.007,74.356,74.705,77.193333
P-MISC,85.682,84.583,96.071,90.177,97.5,91.801,91.0,93.321,93.245,81.836
R-MISC,60.182,61.364,60.864,66.864,61.864,63.773,66.864,64.682,64.864,61.182
F1-PER,87.944,88.487,88.688,89.018,88.186,87.289,89.08,88.594,88.196,88.465
P-PER,87.691,88.377,89.911,89.177,89.069,88.35,88.895,88.307,87.424,87.895
R-PER,89.013,89.036,88.317,89.375,87.983,87.424,89.6,89.538,89.839,89.628
F1-LOC,80.415556,78.404444,72.691,76.371,78.307778,74.903,76.843,80.241111,75.661,79.484444


In [157]:
trainByTagResult.mean(axis=1).to_frame()

Unnamed: 0,0
F1-O,98.0917
P-O,97.7956
R-O,98.4098
F1-MISC,74.978689
P-MISC,90.5216
R-MISC,63.2503
F1-PER,88.3947
P-PER,88.5096
R-PER,88.9753
F1-LOC,77.332233


In [158]:
trainByTagResult.std(axis=1).to_frame()

Unnamed: 0,0
F1-O,0.048701
P-O,0.153714
R-O,0.1625
F1-MISC,1.944915
P-MISC,5.064696
R-MISC,2.492464
F1-PER,0.527687
P-PER,0.758747
R-PER,0.807485
F1-LOC,2.496672


In [159]:
testByTagResult.to_csv("results/test-by-tag.csv")
testByTagResult

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
F1-O,97.635,97.757,97.641,97.824,97.646,97.754,97.814,97.89,97.63,97.637
P-O,97.074,97.341,97.081,97.303,96.996,97.569,97.696,97.445,97.341,97.359
R-O,98.223,98.215,98.235,98.362,98.349,97.971,97.969,98.354,97.963,97.966
F1-MISC,100.0,80.0,75.0,78.571429,80.0,80.0,77.778333,100.0,80.0,93.334
P-MISC,40.0,40.0,30.0,60.0,40.0,40.0,45.0,30.0,40.0,45.0
R-MISC,40.0,40.0,30.0,53.333,40.0,40.0,50.0,30.0,40.0,50.0
F1-PER,85.162,84.131,83.353,83.67,83.948,86.065,85.808,86.414,83.939,81.494
P-PER,86.206,85.897,85.746,87.079,86.079,84.317,86.015,86.317,84.231,86.317
R-PER,84.932,84.004,82.353,81.977,83.865,88.543,86.46,87.293,85.432,80.293
F1-LOC,43.672857,54.0475,65.815714,55.08875,43.672857,46.937143,55.8025,56.875,51.4275,53.21375


In [160]:
testByTagResult.mean(axis=1).to_frame()

Unnamed: 0,0
F1-O,97.7228
P-O,97.3205
R-O,98.1607
F1-MISC,84.468376
P-MISC,41.0
R-MISC,41.3333
F1-PER,84.3984
P-PER,85.8204
R-PER,84.5152
F1-LOC,52.655357


In [161]:
testByTagResult.std(axis=1).to_frame()

Unnamed: 0,0
F1-O,0.097142
P-O,0.222187
R-O,0.174936
F1-MISC,9.487467
P-MISC,8.43274
R-MISC,7.88805
F1-PER,1.48902
P-PER,0.889977
R-PER,2.546555
F1-LOC,6.705324


In [162]:
ewoByTagResult.to_csv("results/ewo-by-tag.csv")

In [163]:
ewoByTagResult = pd.read_csv("results/ewo-by-tag.csv", index_col=0)
ewoByTagResult

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
F1-O,96.908,97.103,97.046,97.001,97.036,96.875,97.154,97.086,97.129,97.079
P-O,96.24,96.149,96.245,96.336,96.138,96.315,96.504,96.299,96.466,96.687
R-O,97.606,98.089,97.878,97.693,97.967,97.474,97.837,97.901,97.824,97.497
F1-MISC,42.3675,41.07,49.64625,43.177,47.5625,52.75375,45.768889,48.455,42.516667,50.42
P-MISC,50.002,53.493,54.645,59.396,55.002,51.51,54.642,51.669,54.558,57.7
R-MISC,27.5,30.0,32.5,36.25,30.0,37.5,37.5,32.5,31.25,40.0
F1-PER,76.372,77.473,78.226,77.021,76.372,75.455,77.659,76.998,77.95,78.662
P-PER,76.617,80.266,80.886,78.959,78.359,76.908,80.522,78.165,78.461,79.765
R-PER,77.125,75.375,76.875,76.25,75.25,76.0,75.75,76.625,79.0,78.5
F1-LOC,57.048889,53.812222,53.695556,55.279,54.335556,53.625,59.146,59.925556,53.978,59.692222


In [164]:
ewoByTagResult.mean(axis=1).to_frame()

Unnamed: 0,0
F1-O,97.0417
P-O,96.3379
R-O,97.7766
F1-MISC,46.373756
P-MISC,54.2617
R-MISC,33.5
F1-PER,77.2188
P-PER,78.8908
R-PER,76.675
F1-LOC,56.0538


In [165]:
ewoByTagResult.std(axis=1).to_frame()

Unnamed: 0,0
F1-O,0.091114
P-O,0.170545
R-O,0.203124
F1-MISC,3.98849
P-MISC,2.83118
R-MISC,4.073969
F1-PER,0.973158
P-PER,1.468093
R-PER,1.25582
F1-LOC,2.648421


In [166]:
columns = en_fingerprints.columns

print("Pred", "Real", "Freq", "Word", sep="\t")
for c in columns:
    prediction = model.predict(en_fingerprints[c].values.reshape((1, 210)))
    pred_tag = int2tag[np.argmax(prediction)]
    real_tag = en_corpus[en_corpus.word == c].iloc[0]['ne-tag']
    
    if pred_tag != real_tag:
        print(pred_tag, real_tag, en_fingerprints[c].max(), c, sep="\t")

Pred	Real	Freq	Word
O	LOC	4170.0	Samaria
O	LOC	4170.0	Olivet
O	MISC	4170.0	Sabbath
PER	O	4170.0	upper
PER	O	4170.0	room
PER	O	4170.0	where
PER	O	4170.0	Zealot
LOC	O	4170.0	so
LOC	O	4170.0	language
O	MISC	4170.0	Psalms
PER	O	4170.0	forward
O	MISC	4170.0	ninth
O	PER	4170.0	Moses
PER	O	4170.0	proclaimed
ORG	O	4170.0	captain
PER	O	4170.0	high-priestly
PER	O	4170.0	family
O	PER	4170.0	Pontius
O	PER	4170.0	Barnabas
O	LOC	4170.0	Cyprus
O	PER	4170.0	Elijah
O	MISC	4170.0	r
PER	O	4170.0	Ju
PER	O	4170.0	h
LOC	O	4170.0	deportation
PER	O	4170.0	us)
