In [1]:
# import
import keras
import sys
import numpy as np
import string
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense
from keras.utils import np_utils, plot_model
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn import model_selection
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, precision_recall_fscore_support
import h5py as h5py

Using TensorFlow backend.


In [2]:
# if we are doeing binary classification. That means say if a token is a named entity or not
BINARY = False

# number of epochs for training
epochs = 10 

# the english side of the corpus
en_corpus_file = "corpus-en.txt"

# the ewondo side of the corpus
ewo_corpus_file = "corpus-ewo.txt"

# name of the file to same the model 
best_model_file = "best-model-conll.hdfs"

# The maximal number of phrases to use
max_nb_of_phrases =  -1

# the maximal number of duplicates for each word in the corpus
duplication = 1

# wether we are using only the vocabulary, ro redundancy
is_only_vocab = True

# if word should be shuffle or not
shuffle = is_only_vocab

# normalization strategy
# "exp": normalization by exponentiation expo(nbWV)/exp(nbOcc(w))
# "log": normalization by logarithm log(nbWV)/log(nbOcc(w))
# "max": normalization by max [nbWV/nbOcc(w)]/ [nbWV/nbOcc(w*)] = nbOcc(w*)/nbOcc(w) where w* is the most frequent word
# None: for no normalization
normalization_strategy = "max"

# if we are using the Zennaki et al. signature
is_zennaki = False

# the number of neurons in the first layer
h1_size = 640

# number of neurons in the second layer
h2_size = 160  

In [3]:
def getTag(aString):
    """
        convert a string to a tag
    """
    tag = "O"
    if BINARY:
        if aString != "O":
            return "NE"
    else:
        tag = aString
    return tag
     

In [4]:
def load_corpus(file, max_nb_of_phrases):
    """
    Load a corpus stored in a file
    Input:
        - file: the name of the file of the corpus
        - max_nb_of_phases: maximal number of phrases to load
    
    Return:
        - a DataFrame representing the corpus
        - the number of phrases in the corpus
    """
    nb_of_phrases = 0
    dataset = {"word": [], "ne-tag": []}
    with open(file) as f:
        prev_line = None
        for cpt, line in enumerate(f):
            if cpt == 0:
                continue
            if nb_of_phrases == max_nb_of_phrases:
                break;

            l = line.strip()
            if len(l) == 0 and len(prev_line) != 0:
                nb_of_phrases += 1
                dataset["word"].append(line)
                dataset["ne-tag"].append(None)
            else:
                l = l.split("\t")
                if l[0] not in string.punctuation:
                    dataset["word"].append(l[0])
                    dataset["ne-tag"].append(ne_type(l[1]))
            prev_line = line.strip()
        
    return pd.DataFrame(dataset), nb_of_phrases+1

In [94]:
def corpus_fingerprint(aDataframe, nb_of_biphrases):
    """
    Create the distributionnal signature of each word in the corpus
    Input:
        -aDataFrame: the corpus DataFrame
        -nb_of_biphrases: number of phrases in the corpus
    Return:
        a DataFrame: corpus fingerprint, the columns are the words in the corpus
    """
    print("Normalization strategy:", normalization_strategy)
    fingerprints = {}
    current_bi_phrase_index = 0
    nb_word_in_corpus = aDataframe[aDataframe.word != "\n"].word.size
    words_in_current_phrase = []
    for index, row in aDataframe.iterrows():
        if current_bi_phrase_index > nb_of_biphrases:
            break
            
        word = row['word']
        
        if word != "\n":
            words_in_current_phrase.append(word)
            if word not in fingerprints:
                fingerprints[word] = np.zeros(nb_of_biphrases, dtype=np.float32)
            fingerprints[word][current_bi_phrase_index] += 1
        else:
            nb_word_in_current_phrase = len(words_in_current_phrase)
#             for w in words_in_current_phrase:
#                 fingerprints[w][current_bi_phrase_index] = nb_word_in_corpus / fingerprints[w][current_bi_phrase_index]                
            current_bi_phrase_index += 1
            words_in_current_phrase = []
        
    for word in fingerprints:
        for i in range(nb_of_biphrases):
            if fingerprints[word][i] != 0:
                if normalization_strategy == "exp":
                    fingerprints[word][i] = np.exp(nb_word_in_corpus) / np.exp(fingerprints[word][i])
                elif normalization_strategy == "log":
                    fingerprints[word][i] = np.log(nb_word_in_corpus) / (np.log(fingerprints[word][i]) or 1)
                else:
                    fingerprints[word][i] = nb_word_in_corpus / fingerprints[word][i]
#         fingerprints[word][nb_of_biphrases] = nb_word_in_corpus / aDataframe[aDataframe.word == word].word.size
    ret = pd.DataFrame(fingerprints)
    
    if is_zennaki:
        ret[ret > 0] = 1
    elif normalization_strategy == "max":
        _max = ret.max().max()
        ret[ret > 0] = _max / ret[ret > 0]
        
    return ret

In [6]:
def corpus2trainingdata(aDataframe, fingerprintsDataFrame):
    """
    Convert corpus to training data => numpy array
    
    Input:
        -aDataFrame: Corpus dataframe
        -fingerprintsDataFrame: distributionnal signature of words in the corpus
    Return:
        (X, y): X is the array of words (signature) in the corpus and y is the corresponding labels (NE tags)
    """
    X = np.zeros((aDataframe.shape[0], fingerprintsDataFrame.shape[0]), dtype=np.int8)
    y = np.zeros(aDataframe.shape[0], dtype=np.int8)
    i = 0
    for row in aDataframe.iterrows():
        X[i] = fingerprintsDataFrame[row[1]['word']].values
        y[i] = tag2int[getTag(row[1]['ne-tag'])]
        i += 1
    return X, y

In [7]:
# A utility function to convert NE tags
def ne_type(aType):
    aType = aType.lower()
    if 'per' in aType:
        t =  'NE' if BINARY else 'PER' 
    elif 'loc' in aType:
        t =  'NE' if BINARY else 'LOC'
    elif 'org' in aType:
        t =  'NE' if BINARY else 'ORG'
    elif 'hour' in aType:
        t =  'NE' if BINARY else 'MISC'
    elif aType != 'o' and len(aType) > 0 :
        t =  'NE' if BINARY else 'MISC'
    else:
        t = 'O'
    return t

In [8]:
def P_R_F1(y_pred, y_true, neg_class):
    same = y_pred[y_true==y_pred]
    tp = same[same != neg_class].size
    nb_of_pos_exple = y_true[y_true != neg_class].size
    nb_of_pos_pred = y_pred[y_pred != neg_class].size
    p = r = f1 = 0
    try:
        p = np.round(tp*100/nb_of_pos_pred, 2)
    except ZeroDivisionError:
        print("number of correct positive predictions is 0")
        
    try:
        r = np.round(tp*100/nb_of_pos_exple, 2)
    except ZeroDivisionError:
        print("number of position exple is 0")
        
    try:
        f1 = np.round(2*r*p/(r+p), 2)
    except ZeroDivisionError:
        print("Recall and precision are 0")

    return p, r, f1

In [9]:
def shuffle_data(X, y):
    indices = [i for i in  range(X.shape[0])]
    np.random.shuffle(indices)
    return X[indices], y[indices]

In [10]:
def create_model(input_dim, output_dim):
    model = Sequential()
    model.add(Dense(h1_size, input_dim=input_dim, activation='sigmoid', name="hidden1"))
    model.add(Dense(h2_size, activation='sigmoid', name="hidden2"))
    if BINARY:
        model.add(Dense(1, activation='sigmoid', name="outputlayer"))
        model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['binary_accuracy'])
    else:
        model.add(Dense(output_dim, activation='softmax', name="outputlayer"))
        model.compile(loss='categorical_crossentropy', optimizer="rmsprop", metrics=['accuracy'])
    model.summary()
    return model

In [11]:
def train_model(model, X_train, y_train, X_val, y_val, epochs=epochs):
    # stop learning if the error is the same between two consecutive epochs
    early_stop = EarlyStopping(patience=20, verbose=2)
    
    # saving best model
    best_model_cp = ModelCheckpoint(best_model_file, save_best_only=True, verbose=1)
    
    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, verbose=0, shuffle=shuffle, callbacks=[best_model_cp, early_stop])
    
    #loading and returning the best model
    return keras.models.load_model(best_model_file)

In [12]:
def predict(model, X, y, binary=BINARY):
    if BINARY:
        y_pred = np.round(model.predict(X))
        y_true = y
    else:
        predictions = model.predict(X)
        y_pred = np.array([np.argmax(p) for p in predictions])
        y_true = np.array([np.argmax(t) for t in y ])
    return y_true, y_pred

In [13]:
def model_performance(y_true, y_pred):
    return P_R_F1(y_pred, y_true, tag2int['O']) #precision, recall, f1-score

In [14]:
def model_performace_by_tag(y_true, y_pred, tag):
    p, r, f1 = 0, 0, 0
    
    eq = y_pred[y_pred==y_true]
    correctly_pred = eq[eq==tag].size
    try:
        p = np.round(100 * correctly_pred / y_pred[y_pred==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        r = np.round(100 * correctly_pred / y_true[y_true==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        f1 = np.round(2 * r * p / (r + p), 2)
    except ZeroDivisionError:
        pass
    
    return p, r, f1

In [15]:
def algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, epochs=epochs, model=None):
    """
    Train a model on (X, y) and validate on (X_val, y_val) then project on (X_ewo)
    """
    test_precision, train_precision, ewo_precision = [], [], []
    test_recall, train_recall, ewo_recall = [], [], []
    test_fscore, train_fscore, ewo_fscore = [], [], []
    
    test_result_by_tag = {}
    train_result_by_tag = {}
    ewo_result_by_tag = {}
    for t in tagSet:
        f1_key = "F1-"+t
        p_key = "P-"+t
        r_key = "R-"+t
        train_result_by_tag[f1_key], train_result_by_tag[p_key], train_result_by_tag[r_key] = [], [], []
        test_result_by_tag[f1_key], test_result_by_tag[p_key], test_result_by_tag[r_key] = [], [], []
        ewo_result_by_tag[f1_key], ewo_result_by_tag[p_key], ewo_result_by_tag[r_key] = [], [], []

    m = train_model(model, X_train, y_train, X_val, y_val, epochs=epochs)
        
    y_true, y_pred = predict(m, X_train, y_train)
    p_train, r_train, f1_train = model_performance(y_true, y_pred)
        
    y_true_val, y_pred_val = predict(m, X_val, y_val)
    p_val, r_val, f1_val = model_performance(y_true_val, y_pred_val)
        
    y_true_ewo, y_pred_ewo = predict(m, X_ewo, y_ewo) 
    p_ewo, r_ewo, f1_ewo = model_performance(y_true_ewo, y_pred_ewo)
        
    for t in range(len(int2tag)):
        f1_key = "F1-" + int2tag[t]
        p_key = "P-" + int2tag[t]
        r_key = "R-" + int2tag[t]
            
        p, r, f1 = model_performace_by_tag(y_true, y_pred, t)
        train_result_by_tag[p_key].append(p)
        train_result_by_tag[r_key].append(r)
        train_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_val, y_pred_val, t)
        test_result_by_tag[p_key].append(p)
        test_result_by_tag[r_key].append(r)
        test_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_ewo, y_pred_ewo, t)
        ewo_result_by_tag[p_key].append(p)
        ewo_result_by_tag[r_key].append(r)
        ewo_result_by_tag[f1_key].append(f1)
                
    test_precision.append(p_val)
    train_precision.append(p_train)
    ewo_precision.append(p_ewo)
        
    test_recall.append(r_val)
    train_recall.append(r_train)
    ewo_recall.append(r_ewo)
        
    test_fscore.append(f1_val)
    train_fscore.append(f1_train)
    ewo_fscore.append(f1_ewo)
    return pd.DataFrame({
        'P_val': test_precision, 
        'P_train': train_precision, 
        'P_ewo': ewo_precision, 'R_val': test_recall, 'R_train': train_recall, 
        'R_ewo': ewo_recall, 'F1-val': test_fscore, 'F1-train': train_fscore, 'F1-ewo': ewo_fscore}), pd.DataFrame(train_result_by_tag), pd.DataFrame(test_result_by_tag), pd.DataFrame(ewo_result_by_tag)

In [16]:
def algoCrossVal(X, y, X_ewo, y_ewo, k = 10, repeat=1): 
    """
    Traing a model with k-fold cross validation
    We train the model `repeat` times to check it's stability
    """
    block_size = int(X.shape[0] / k)   
    output = None
    model = None
    train_by_tags, test_by_tags, ewo_by_tags = None, None, None
    for it in range(repeat):
        print("AlgoCrossValIter -", it+1)
        model = create_model(X.shape[1], len(tagSet))
        results = None
        train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = None, None, None
        for i in range(k):
            X_val, y_val = X[i*block_size:i*block_size+block_size], y[i*block_size:i*block_size+block_size]
            X_train = np.concatenate((X[0:i*block_size], X[i*block_size+block_size:]))
            y_train = np.concatenate((y[0:i*block_size], y[i*block_size+block_size:]))

            X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
            X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])

            result, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)
            if results is None:
                results = result.copy()
                train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = train_by_tag.copy(), test_by_tag.copy(), ewo_by_tag.copy()
            else:
                results = pd.concat([results, result], ignore_index=True)
                train_by_tagsTmp = pd.concat([train_by_tagsTmp, train_by_tag], ignore_index=True)
                test_by_tagsTmp = pd.concat([test_by_tagsTmp, test_by_tag], ignore_index=True)
                ewo_by_tagsTmp = pd.concat([ewo_by_tagsTmp, ewo_by_tag], ignore_index=True)
        
        if output is None:
            output = results.mean(axis=0).to_frame()
            train_by_tags = train_by_tagsTmp.mean(axis=0).to_frame()
            test_by_tags = test_by_tagsTmp.mean(axis=0).to_frame()
            ewo_by_tags = ewo_by_tagsTmp.mean(axis=0).to_frame()
        else:
            output = pd.concat([output, results.mean(axis=0).to_frame()], axis=1)
            train_by_tags = pd.concat([train_by_tags, train_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            test_by_tags = pd.concat([test_by_tags, test_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            ewo_by_tags = pd.concat([ewo_by_tags, ewo_by_tagsTmp.mean(axis=0).to_frame()], axis=1)

    return output, train_by_tags, test_by_tags, ewo_by_tags, model

In [17]:
en_corpus, en_nb_of_phrases = load_corpus(en_corpus_file, max_nb_of_phrases)

In [18]:
nb_word_in_corpus = en_corpus[en_corpus.word != "\n"].word.size
print("Nb word in corpus", nb_word_in_corpus)

Nb word in corpus 4170


In [19]:
en_corpus.head()
en_corpus.loc[en_corpus['ne-tag'] == 'ORG']

Unnamed: 0,word,ne-tag
1335,Sadducees,ORG


In [20]:
tagSet = en_corpus["ne-tag"].dropna().unique()
if BINARY:
    tagSet = ['NE', 'O']
tag2int = {j: i for i, j in enumerate(tagSet)}
int2tag = {i: j for i, j in enumerate(tagSet)}
print(tag2int)

{'O': 0, 'MISC': 1, 'PER': 2, 'LOC': 3, 'ORG': 4}


In [21]:
en_nb_of_phrases

210

In [22]:
en_corpus.describe()

Unnamed: 0,word,ne-tag
count,4379,4170
unique,904,5
top,the,O
freq,313,3779


In [23]:
en_corpus.head(10)

Unnamed: 0,word,ne-tag
0,The,O
1,Promise,O
2,of,O
3,the,O
4,Holy,MISC
5,Spirit,MISC
6,\n,
7,In,O
8,the,O
9,first,O


In [24]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(en_corpus[en_corpus['ne-tag']==tag].shape[0] * 100 / en_corpus[en_corpus['ne-tag']!='\n'].shape[0], 2)))

O % = 86.3 %
MISC % = 2.4 %
PER % = 5.59 %
LOC % = 0.91 %
ORG % = 0.02 %


In [25]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(en_corpus[en_corpus['ne-tag']==tag].word.unique().shape[0] * 100 / en_corpus[en_corpus['ne-tag']!='\n'].word.unique().shape[0], 2)))

O % = 89.16 %
MISC % = 1.88 %
PER % = 8.96 %
LOC % = 1.99 %
ORG % = 0.11 %


In [26]:
en_corpus[en_corpus.word == "\n"].shape

(209, 2)

In [27]:
print("Nb of bi-phrases", en_nb_of_phrases)

Nb of bi-phrases 210


In [97]:
normalization_strategy = "log"
en_fingerprints = corpus_fingerprint(en_corpus, en_nb_of_phrases)

Normalization strategy: log


In [99]:
en_fingerprints.head(10)

Unnamed: 0,The,Promise,of,the,Holy,Spirit,In,first,book,O,...,considered,dream,She,save,fulfill,Immanuel,us),woke,sleep,knew
0,8.335671,8.335671,8.335671,8.335671,8.335671,8.335671,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,8.335671,0.0,0.0,8.335671,8.335671,8.335671,8.335671,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,7.587455,8.335671,8.335671,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,8.335671,8.335671,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,8.335671,12.025831,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,8.335671,8.335671,8.335671,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,8.335671,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,8.335671,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,8.335671,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,8.335671,7.587455,8.335671,8.335671,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [100]:
(4170 / nb_word_in_corpus)

1.0

In [101]:
en_corpus.shape

(4379, 2)

In [102]:
en_fingerprints['you'].values.shape

(210,)

In [103]:
en_corpus[en_corpus.word != "\n"].shape

(4170, 2)

In [104]:
if is_only_vocab:
    text = list(en_corpus[en_corpus.word != "\n"].word.unique())
else:
    text = list(en_corpus[en_corpus.word != "\n"].word)
en_vocab = pd.DataFrame({'text': text})
en_vocab.describe()

Unnamed: 0,text
count,903
unique,903
top,ministry
freq,1


In [105]:
if is_only_vocab:
    X = np.zeros((en_vocab.shape[0] * duplication, en_nb_of_phrases))
    target = np.zeros((en_vocab.shape[0] * duplication))
    p=0
    for i, row in en_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X[p] = en_fingerprints[c.split(" ")[0]]
            target[p] = tag2int[getTag(en_corpus[en_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X, target = shuffle_data(X, target)
    print(X.shape, en_fingerprints.shape, target.shape)

(903, 210) (210, 903) (903,)


In [106]:
en_vocab[-20:]

Unnamed: 0,text
883,Eliud
884,Eleazar
885,Matthan
886,husband
887,fourteen
888,unwilling
889,shame
890,resolved
891,divorce
892,quietly


In [107]:
if not is_only_vocab:
    X, target = corpus2trainingdata(en_corpus[en_corpus.word != "\n"], en_fingerprints)

In [108]:
if shuffle:
    X, target = shuffle_data(X, target)

In [109]:
y = target.copy()
y[0:100]
if not BINARY:
    y = np_utils.to_categorical(y, len(tagSet))
y.shape

(903, 5)

In [110]:
from sklearn.decomposition import PCA

def visualize(X, y):
    pca = PCA(n_components=2)
    X_embeded = pca.fit_transform(X)
    plt.figure(figsize=(5, 5))
    plt.scatter(X_embeded[:, 0], X_embeded[:, 1], c=y)
    plt.legend()
    plt.show()

In [111]:
# visualize(X, target)

In [112]:
X_train, X_val, y_train, y_val = model_selection.train_test_split(X, y, test_size=0.33)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])
print("X_train.shape =", X_train.shape)
print("y_train.shape =", y_train.shape)
print("X_val.shape =", X_val.shape)
print("y_val.shape =", y_val.shape)

tTarget = np.array([np.argmax(yy) for yy in y_train])
vTarget = np.array([np.argmax(yy) for yy in y_val])

for tag in tagSet:
    print("{0} % in training data = {1} %".format(tag, np.round(tTarget[tTarget==tag2int[tag]].size * 100 / tTarget.shape[0], 2)))
    print("{0} % in validation data = {1} %".format(tag, np.round(vTarget[vTarget==tag2int[tag]].size * 100 / vTarget.shape[0], 2)))

X_train.shape = (605, 210)
y_train.shape = (605, 5)
X_val.shape = (298, 210)
y_val.shape = (298, 5)
O % in training data = 87.44 %
O % in validation data = 89.93 %
MISC % in training data = 0.99 %
MISC % in validation data = 1.68 %
PER % in training data = 9.42 %
PER % in validation data = 6.71 %
LOC % in training data = 1.98 %
LOC % in validation data = 1.68 %
ORG % in training data = 0.17 %
ORG % in validation data = 0.0 %


In [113]:
ewo_corpus, ewo_nb_of_phrases = load_corpus(ewo_corpus_file, max_nb_of_phrases)

In [114]:
ewo_corpus.loc[ewo_corpus['ne-tag'] == 'PER']

Unnamed: 0,word,ne-tag
6,Teofil,PER
15,Yesus,PER
86,Yohannes,PER
104,Yesus,PER
230,Yesus,PER
...,...,...
3676,Maria,PER
3697,Yesus,PER
3740,Emmanuel,PER
3750,Yosef,PER


In [115]:
ewo_nb_of_phrases

210

In [116]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(ewo_corpus[ewo_corpus['ne-tag']==tag].shape[0] * 100 / ewo_corpus[ewo_corpus['ne-tag']!='\n'].shape[0], 2)))

O % = 84.15 %
MISC % = 2.54 %
PER % = 6.69 %
LOC % = 1.03 %
ORG % = 0.05 %


In [117]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(ewo_corpus[ewo_corpus['ne-tag']==tag].word.unique().shape[0] * 100 / ewo_corpus[ewo_corpus['ne-tag']!='\n'].word.unique().shape[0], 2)))

O % = 89.94 %
MISC % = 1.17 %
PER % = 8.3 %
LOC % = 1.86 %
ORG % = 0.2 %


In [118]:
ewo_corpus.describe()

Unnamed: 0,word,ne-tag
count,3779,3570
unique,1024,5
top,\n,O
freq,209,3180


In [119]:
ewo_corpus.head()

Unnamed: 0,word,ne-tag
0,Mfufub,MISC
1,Nsisim,MISC
2,ayi,O
3,sò,O
4,\n,


In [120]:
ewo_fingerprints = corpus_fingerprint(ewo_corpus, en_nb_of_phrases)

Normalization strategy: log


In [127]:
if is_only_vocab:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word.unique())
else:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word)
ewo_vocab = pd.DataFrame({"text":text})

In [128]:
if is_only_vocab:
    X_ewo = np.zeros((ewo_vocab.shape[0] * duplication, en_nb_of_phrases))
    ewo_target = np.zeros((ewo_vocab.shape[0] * duplication))
    p=0
    for i, row in ewo_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X_ewo[p] = ewo_fingerprints[c.split(" ")[0]]
            ewo_target[p] = tag2int[getTag(ewo_corpus[ewo_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [129]:
ewo_vocab[-10:]

Unnamed: 0,text
1013,sik
1014,Ntud
1015,bëyole
1016,Emmanuel
1017,Avëbë
1018,angavëbë
1019,oyò
1020,angabende
1021,anganòṅ
1022,angayole


In [130]:
if not is_only_vocab:
    X_ewo, ewo_target = corpus2trainingdata(ewo_corpus[ewo_corpus.word != "\n"], ewo_fingerprints)

In [131]:
if shuffle:
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [132]:
y_ewo = ewo_target.copy()
print(y_ewo.shape, len(ewo_vocab))

(1023,) 1023


In [133]:
X_ewo.shape

(1023, 210)

In [134]:
y_ewo = ewo_target.copy()
y_ewo[:20]
if not BINARY:
    y_ewo = np_utils.to_categorical(y_ewo)

In [135]:
X_ewo = X_ewo.reshape((X_ewo.shape[0], en_nb_of_phrases))

In [136]:
# model = create_model(X.shape[1], len(tagSet))
# resultEval, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)

In [137]:
# resultEval

In [138]:
# train_by_tag

In [139]:
# test_by_tag

In [140]:
# ewo_by_tag

In [141]:
# resultEval.mean()

In [142]:
# resultEval.std()

In [143]:
resultCrossVal, trainByTagResult, testByTagResult, ewoByTagResult, model = algoCrossVal(X, y, X_ewo, y_ewo, repeat=10)

AlgoCrossValIter - 1
Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.41321, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.41321 to 0.34123, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.34123

Epoch 00004: val_loss did not improve from 0.34123

Epoch 00005: val_loss improved from 0.34123 to 0.28535, saving mo




Epoch 00001: val_loss improved from inf to 0.20404, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.20404

Epoch 00003: val_loss did not improve from 0.20404

Epoch 00004: val_loss did not improve from 0.20404

Epoch 00005: val_loss did not improve from 0.20404

Epoch 00006: val_loss did not improve from 0.20404

Epoch 00007: val_loss did not improve from 0.20404

Epoch 00008: val_loss did not improve from 0.20404

Epoch 00009: val_loss did not improve from 0.20404

Epoch 00010: val_loss did not improve from 0.20404





Epoch 00001: val_loss improved from inf to 0.08818, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08818

Epoch 00003: val_loss did not improve from 0.08818

Epoch 00004: val_loss did not improve from 0.08818

Epoch 00005: val_loss did not improve from 0.08818

Epoch 00006: val_loss did not improve from 0.08818

Epoch 00007: val_loss did not improve from 0.08818

Epoch 00008: val_loss did not improve from 0.08818

Epoch 00009: val_loss did not improve from 0.08818

Epoch 00010: val_loss did not improve from 0.08818





Epoch 00001: val_loss improved from inf to 0.13397, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.13397 to 0.07494, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07494

Epoch 00004: val_loss did not improve from 0.07494

Epoch 00005: val_loss did not improve from 0.07494

Epoch 00006: val_loss did not improve from 0.07494

Epoch 00007: val_loss did not improve from 0.07494

Epoch 00008: val_loss did not improve from 0.07494

Epoch 00009: val_loss did not improve from 0.07494

Epoch 00010: val_loss did not improve from 0.07494





Epoch 00001: val_loss improved from inf to 0.09600, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09600

Epoch 00003: val_loss did not improve from 0.09600

Epoch 00004: val_loss did not improve from 0.09600

Epoch 00005: val_loss did not improve from 0.09600

Epoch 00006: val_loss did not improve from 0.09600

Epoch 00007: val_loss did not improve from 0.09600

Epoch 00008: val_loss did not improve from 0.09600

Epoch 00009: val_loss did not improve from 0.09600

Epoch 00010: val_loss did not improve from 0.09600





Epoch 00001: val_loss improved from inf to 0.07115, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07115

Epoch 00003: val_loss did not improve from 0.07115

Epoch 00004: val_loss did not improve from 0.07115

Epoch 00005: val_loss did not improve from 0.07115

Epoch 00006: val_loss did not improve from 0.07115

Epoch 00007: val_loss did not improve from 0.07115

Epoch 00008: val_loss did not improve from 0.07115

Epoch 00009: val_loss did not improve from 0.07115

Epoch 00010: val_loss did not improve from 0.07115





Epoch 00001: val_loss improved from inf to 0.13695, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.13695 to 0.11536, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.11536

Epoch 00004: val_loss did not improve from 0.11536

Epoch 00005: val_loss did not improve from 0.11536

Epoch 00006: val_loss did not improve from 0.11536

Epoch 00007: val_loss did not improve from 0.11536

Epoch 00008: val_loss did not improve from 0.11536

Epoch 00009: val_loss did not improve from 0.11536

Epoch 00010: val_loss did not improve from 0.11536





Epoch 00001: val_loss improved from inf to 0.16497, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.16497

Epoch 00003: val_loss did not improve from 0.16497

Epoch 00004: val_loss did not improve from 0.16497

Epoch 00005: val_loss did not improve from 0.16497

Epoch 00006: val_loss did not improve from 0.16497

Epoch 00007: val_loss did not improve from 0.16497

Epoch 00008: val_loss did not improve from 0.16497

Epoch 00009: val_loss did not improve from 0.16497

Epoch 00010: val_loss did not improve from 0.16497





Epoch 00001: val_loss improved from inf to 0.07071, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07071

Epoch 00003: val_loss did not improve from 0.07071

Epoch 00004: val_loss did not improve from 0.07071

Epoch 00005: val_loss did not improve from 0.07071

Epoch 00006: val_loss did not improve from 0.07071

Epoch 00007: val_loss did not improve from 0.07071

Epoch 00008: val_loss did not improve from 0.07071

Epoch 00009: val_loss did not improve from 0.07071

Epoch 00010: val_loss did not improve from 0.07071





Epoch 00001: val_loss improved from inf to 0.07668, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07668 to 0.06382, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06382

Epoch 00004: val_loss did not improve from 0.06382

Epoch 00005: val_loss did not improve from 0.06382

Epoch 00006: val_loss did not improve from 0.06382

Epoch 00007: val_loss did not improve from 0.06382

Epoch 00008: val_loss did not improve from 0.06382

Epoch 00009: val_loss did not improve from 0.06382

Epoch 00010: val_loss did not improve from 0.06382




AlgoCrossValIter - 2
Model: "sequential_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.47209, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.47209

Epoch 00003: val_loss improved from 0.47209 to 0.39099, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.39099 to 0.29533, saving model to best-model-conll.hdfs

Epoch 00005: val_loss 




Epoch 00001: val_loss improved from inf to 0.22521, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.22521

Epoch 00003: val_loss did not improve from 0.22521

Epoch 00004: val_loss did not improve from 0.22521

Epoch 00005: val_loss did not improve from 0.22521

Epoch 00006: val_loss did not improve from 0.22521

Epoch 00007: val_loss did not improve from 0.22521

Epoch 00008: val_loss did not improve from 0.22521

Epoch 00009: val_loss did not improve from 0.22521

Epoch 00010: val_loss did not improve from 0.22521





Epoch 00001: val_loss improved from inf to 0.17604, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.17604 to 0.13500, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.13500

Epoch 00004: val_loss did not improve from 0.13500

Epoch 00005: val_loss did not improve from 0.13500

Epoch 00006: val_loss did not improve from 0.13500

Epoch 00007: val_loss did not improve from 0.13500

Epoch 00008: val_loss did not improve from 0.13500

Epoch 00009: val_loss did not improve from 0.13500

Epoch 00010: val_loss did not improve from 0.13500





Epoch 00001: val_loss improved from inf to 0.06045, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06045

Epoch 00003: val_loss did not improve from 0.06045

Epoch 00004: val_loss did not improve from 0.06045

Epoch 00005: val_loss did not improve from 0.06045

Epoch 00006: val_loss did not improve from 0.06045

Epoch 00007: val_loss did not improve from 0.06045

Epoch 00008: val_loss did not improve from 0.06045

Epoch 00009: val_loss did not improve from 0.06045

Epoch 00010: val_loss did not improve from 0.06045





Epoch 00001: val_loss improved from inf to 0.09502, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09502

Epoch 00003: val_loss did not improve from 0.09502

Epoch 00004: val_loss did not improve from 0.09502

Epoch 00005: val_loss did not improve from 0.09502

Epoch 00006: val_loss did not improve from 0.09502

Epoch 00007: val_loss did not improve from 0.09502

Epoch 00008: val_loss did not improve from 0.09502

Epoch 00009: val_loss did not improve from 0.09502

Epoch 00010: val_loss did not improve from 0.09502





Epoch 00001: val_loss improved from inf to 0.08017, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08017

Epoch 00003: val_loss did not improve from 0.08017

Epoch 00004: val_loss did not improve from 0.08017

Epoch 00005: val_loss did not improve from 0.08017

Epoch 00006: val_loss did not improve from 0.08017

Epoch 00007: val_loss did not improve from 0.08017

Epoch 00008: val_loss did not improve from 0.08017

Epoch 00009: val_loss did not improve from 0.08017

Epoch 00010: val_loss did not improve from 0.08017





Epoch 00001: val_loss improved from inf to 0.09157, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09157

Epoch 00003: val_loss did not improve from 0.09157

Epoch 00004: val_loss did not improve from 0.09157

Epoch 00005: val_loss did not improve from 0.09157

Epoch 00006: val_loss did not improve from 0.09157

Epoch 00007: val_loss did not improve from 0.09157

Epoch 00008: val_loss did not improve from 0.09157

Epoch 00009: val_loss did not improve from 0.09157

Epoch 00010: val_loss did not improve from 0.09157





Epoch 00001: val_loss improved from inf to 0.14642, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.14642

Epoch 00003: val_loss did not improve from 0.14642

Epoch 00004: val_loss did not improve from 0.14642

Epoch 00005: val_loss did not improve from 0.14642

Epoch 00006: val_loss did not improve from 0.14642

Epoch 00007: val_loss did not improve from 0.14642

Epoch 00008: val_loss did not improve from 0.14642

Epoch 00009: val_loss did not improve from 0.14642

Epoch 00010: val_loss did not improve from 0.14642





Epoch 00001: val_loss improved from inf to 0.06582, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06582

Epoch 00003: val_loss did not improve from 0.06582

Epoch 00004: val_loss did not improve from 0.06582

Epoch 00005: val_loss did not improve from 0.06582

Epoch 00006: val_loss did not improve from 0.06582

Epoch 00007: val_loss did not improve from 0.06582

Epoch 00008: val_loss did not improve from 0.06582

Epoch 00009: val_loss did not improve from 0.06582

Epoch 00010: val_loss did not improve from 0.06582





Epoch 00001: val_loss improved from inf to 0.07165, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07165

Epoch 00003: val_loss did not improve from 0.07165

Epoch 00004: val_loss did not improve from 0.07165

Epoch 00005: val_loss did not improve from 0.07165

Epoch 00006: val_loss did not improve from 0.07165

Epoch 00007: val_loss did not improve from 0.07165

Epoch 00008: val_loss did not improve from 0.07165

Epoch 00009: val_loss did not improve from 0.07165

Epoch 00010: val_loss did not improve from 0.07165




AlgoCrossValIter - 3
Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.38551, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.38551 to 0.36870, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.36870 to 0.32060, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.32060

Epoch 00005: val_loss 




Epoch 00001: val_loss improved from inf to 0.26010, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.26010 to 0.24873, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.24873 to 0.23308, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.23308

Epoch 00005: val_loss did not improve from 0.23308

Epoch 00006: val_loss did not improve from 0.23308

Epoch 00007: val_loss did not improve from 0.23308

Epoch 00008: val_loss did not improve from 0.23308

Epoch 00009: val_loss did not improve from 0.23308

Epoch 00010: val_loss did not improve from 0.23308





Epoch 00001: val_loss improved from inf to 0.10570, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10570

Epoch 00003: val_loss did not improve from 0.10570

Epoch 00004: val_loss did not improve from 0.10570

Epoch 00005: val_loss did not improve from 0.10570

Epoch 00006: val_loss did not improve from 0.10570

Epoch 00007: val_loss did not improve from 0.10570

Epoch 00008: val_loss did not improve from 0.10570

Epoch 00009: val_loss did not improve from 0.10570

Epoch 00010: val_loss did not improve from 0.10570





Epoch 00001: val_loss improved from inf to 0.05612, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05612

Epoch 00003: val_loss did not improve from 0.05612

Epoch 00004: val_loss did not improve from 0.05612

Epoch 00005: val_loss did not improve from 0.05612

Epoch 00006: val_loss did not improve from 0.05612

Epoch 00007: val_loss did not improve from 0.05612

Epoch 00008: val_loss did not improve from 0.05612

Epoch 00009: val_loss did not improve from 0.05612

Epoch 00010: val_loss did not improve from 0.05612





Epoch 00001: val_loss improved from inf to 0.09531, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09531

Epoch 00003: val_loss did not improve from 0.09531

Epoch 00004: val_loss did not improve from 0.09531

Epoch 00005: val_loss did not improve from 0.09531

Epoch 00006: val_loss did not improve from 0.09531

Epoch 00007: val_loss did not improve from 0.09531

Epoch 00008: val_loss did not improve from 0.09531

Epoch 00009: val_loss did not improve from 0.09531

Epoch 00010: val_loss did not improve from 0.09531





Epoch 00001: val_loss improved from inf to 0.07856, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07856 to 0.07473, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07473

Epoch 00004: val_loss did not improve from 0.07473

Epoch 00005: val_loss did not improve from 0.07473

Epoch 00006: val_loss did not improve from 0.07473

Epoch 00007: val_loss did not improve from 0.07473

Epoch 00008: val_loss did not improve from 0.07473

Epoch 00009: val_loss did not improve from 0.07473

Epoch 00010: val_loss did not improve from 0.07473





Epoch 00001: val_loss improved from inf to 0.11086, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.11086 to 0.09979, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.09979

Epoch 00004: val_loss did not improve from 0.09979

Epoch 00005: val_loss did not improve from 0.09979

Epoch 00006: val_loss did not improve from 0.09979

Epoch 00007: val_loss did not improve from 0.09979

Epoch 00008: val_loss did not improve from 0.09979

Epoch 00009: val_loss did not improve from 0.09979

Epoch 00010: val_loss did not improve from 0.09979





Epoch 00001: val_loss improved from inf to 0.15155, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.15155

Epoch 00003: val_loss did not improve from 0.15155

Epoch 00004: val_loss did not improve from 0.15155

Epoch 00005: val_loss did not improve from 0.15155

Epoch 00006: val_loss did not improve from 0.15155

Epoch 00007: val_loss did not improve from 0.15155

Epoch 00008: val_loss did not improve from 0.15155

Epoch 00009: val_loss did not improve from 0.15155

Epoch 00010: val_loss did not improve from 0.15155





Epoch 00001: val_loss improved from inf to 0.04896, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04896

Epoch 00003: val_loss did not improve from 0.04896

Epoch 00004: val_loss did not improve from 0.04896

Epoch 00005: val_loss did not improve from 0.04896

Epoch 00006: val_loss did not improve from 0.04896

Epoch 00007: val_loss did not improve from 0.04896

Epoch 00008: val_loss did not improve from 0.04896

Epoch 00009: val_loss did not improve from 0.04896

Epoch 00010: val_loss did not improve from 0.04896

Epoch 00001: val_loss improved from inf to 0.07173, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07173

Epoch 00003: val_loss did not improve from 0.07173

Epoch 00004: val_loss did not improve from 0.07173

Epoch 00005: val_loss did not improve from 0.07173

Epoch 00006: val_loss did not improve from 0.07173

Epoch 00007: val_loss did not improve from 0.07173

Epoch 00008: val_loss did not improve



AlgoCrossValIter - 4
Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.61598, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.61598 to 0.33874, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.33874 to 0.32076, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.32076 to 0.30277, saving model to be




Epoch 00001: val_loss improved from inf to 0.28702, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.28702 to 0.20000, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.20000

Epoch 00004: val_loss did not improve from 0.20000

Epoch 00005: val_loss did not improve from 0.20000

Epoch 00006: val_loss did not improve from 0.20000

Epoch 00007: val_loss did not improve from 0.20000

Epoch 00008: val_loss did not improve from 0.20000

Epoch 00009: val_loss did not improve from 0.20000

Epoch 00010: val_loss did not improve from 0.20000





Epoch 00001: val_loss improved from inf to 0.12744, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.12744 to 0.11704, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.11704

Epoch 00004: val_loss did not improve from 0.11704

Epoch 00005: val_loss did not improve from 0.11704

Epoch 00006: val_loss did not improve from 0.11704

Epoch 00007: val_loss did not improve from 0.11704

Epoch 00008: val_loss did not improve from 0.11704

Epoch 00009: val_loss did not improve from 0.11704

Epoch 00010: val_loss did not improve from 0.11704





Epoch 00001: val_loss improved from inf to 0.09361, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09361 to 0.08503, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08503

Epoch 00004: val_loss did not improve from 0.08503

Epoch 00005: val_loss did not improve from 0.08503

Epoch 00006: val_loss did not improve from 0.08503

Epoch 00007: val_loss did not improve from 0.08503

Epoch 00008: val_loss did not improve from 0.08503

Epoch 00009: val_loss did not improve from 0.08503

Epoch 00010: val_loss did not improve from 0.08503





Epoch 00001: val_loss improved from inf to 0.11000, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11000

Epoch 00003: val_loss did not improve from 0.11000

Epoch 00004: val_loss did not improve from 0.11000

Epoch 00005: val_loss did not improve from 0.11000

Epoch 00006: val_loss did not improve from 0.11000

Epoch 00007: val_loss did not improve from 0.11000

Epoch 00008: val_loss did not improve from 0.11000

Epoch 00009: val_loss did not improve from 0.11000

Epoch 00010: val_loss did not improve from 0.11000





Epoch 00001: val_loss improved from inf to 0.07940, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07940 to 0.06798, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06798

Epoch 00004: val_loss did not improve from 0.06798

Epoch 00005: val_loss did not improve from 0.06798

Epoch 00006: val_loss did not improve from 0.06798

Epoch 00007: val_loss did not improve from 0.06798

Epoch 00008: val_loss did not improve from 0.06798

Epoch 00009: val_loss did not improve from 0.06798

Epoch 00010: val_loss did not improve from 0.06798





Epoch 00001: val_loss improved from inf to 0.08834, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08834

Epoch 00003: val_loss did not improve from 0.08834

Epoch 00004: val_loss did not improve from 0.08834

Epoch 00005: val_loss did not improve from 0.08834

Epoch 00006: val_loss did not improve from 0.08834

Epoch 00007: val_loss did not improve from 0.08834

Epoch 00008: val_loss did not improve from 0.08834

Epoch 00009: val_loss did not improve from 0.08834

Epoch 00010: val_loss did not improve from 0.08834





Epoch 00001: val_loss improved from inf to 0.18051, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.18051 to 0.16944, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.16944

Epoch 00004: val_loss did not improve from 0.16944

Epoch 00005: val_loss did not improve from 0.16944

Epoch 00006: val_loss did not improve from 0.16944

Epoch 00007: val_loss did not improve from 0.16944

Epoch 00008: val_loss did not improve from 0.16944

Epoch 00009: val_loss did not improve from 0.16944

Epoch 00010: val_loss did not improve from 0.16944





Epoch 00001: val_loss improved from inf to 0.05576, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05576

Epoch 00003: val_loss did not improve from 0.05576

Epoch 00004: val_loss did not improve from 0.05576

Epoch 00005: val_loss did not improve from 0.05576

Epoch 00006: val_loss did not improve from 0.05576

Epoch 00007: val_loss did not improve from 0.05576

Epoch 00008: val_loss did not improve from 0.05576

Epoch 00009: val_loss did not improve from 0.05576

Epoch 00010: val_loss did not improve from 0.05576





Epoch 00001: val_loss improved from inf to 0.06642, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.06642 to 0.06494, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06494

Epoch 00004: val_loss did not improve from 0.06494

Epoch 00005: val_loss did not improve from 0.06494

Epoch 00006: val_loss did not improve from 0.06494

Epoch 00007: val_loss did not improve from 0.06494

Epoch 00008: val_loss did not improve from 0.06494

Epoch 00009: val_loss did not improve from 0.06494

Epoch 00010: val_loss did not improve from 0.06494




AlgoCrossValIter - 5
Model: "sequential_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.36156, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.36156

Epoch 00003: val_loss did not improve from 0.36156

Epoch 00004: val_loss improved from 0.36156 to 0.30612, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.30612

Epoch 00006: 




Epoch 00001: val_loss improved from inf to 0.20578, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.20578

Epoch 00003: val_loss did not improve from 0.20578

Epoch 00004: val_loss did not improve from 0.20578

Epoch 00005: val_loss did not improve from 0.20578

Epoch 00006: val_loss did not improve from 0.20578

Epoch 00007: val_loss did not improve from 0.20578

Epoch 00008: val_loss did not improve from 0.20578

Epoch 00009: val_loss did not improve from 0.20578

Epoch 00010: val_loss did not improve from 0.20578





Epoch 00001: val_loss improved from inf to 0.10873, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10873

Epoch 00003: val_loss did not improve from 0.10873

Epoch 00004: val_loss did not improve from 0.10873

Epoch 00005: val_loss did not improve from 0.10873

Epoch 00006: val_loss did not improve from 0.10873

Epoch 00007: val_loss did not improve from 0.10873

Epoch 00008: val_loss did not improve from 0.10873

Epoch 00009: val_loss did not improve from 0.10873

Epoch 00010: val_loss did not improve from 0.10873





Epoch 00001: val_loss improved from inf to 0.06947, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.06947 to 0.06928, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06928

Epoch 00004: val_loss did not improve from 0.06928

Epoch 00005: val_loss did not improve from 0.06928

Epoch 00006: val_loss did not improve from 0.06928

Epoch 00007: val_loss did not improve from 0.06928

Epoch 00008: val_loss did not improve from 0.06928

Epoch 00009: val_loss did not improve from 0.06928

Epoch 00010: val_loss did not improve from 0.06928





Epoch 00001: val_loss improved from inf to 0.08368, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08368

Epoch 00003: val_loss did not improve from 0.08368

Epoch 00004: val_loss did not improve from 0.08368

Epoch 00005: val_loss did not improve from 0.08368

Epoch 00006: val_loss did not improve from 0.08368

Epoch 00007: val_loss did not improve from 0.08368

Epoch 00008: val_loss did not improve from 0.08368

Epoch 00009: val_loss did not improve from 0.08368

Epoch 00010: val_loss did not improve from 0.08368





Epoch 00001: val_loss improved from inf to 0.07381, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07381

Epoch 00003: val_loss did not improve from 0.07381

Epoch 00004: val_loss did not improve from 0.07381

Epoch 00005: val_loss did not improve from 0.07381

Epoch 00006: val_loss did not improve from 0.07381

Epoch 00007: val_loss did not improve from 0.07381

Epoch 00008: val_loss did not improve from 0.07381

Epoch 00009: val_loss did not improve from 0.07381

Epoch 00010: val_loss did not improve from 0.07381





Epoch 00001: val_loss improved from inf to 0.08655, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08655

Epoch 00003: val_loss did not improve from 0.08655

Epoch 00004: val_loss did not improve from 0.08655

Epoch 00005: val_loss did not improve from 0.08655

Epoch 00006: val_loss did not improve from 0.08655

Epoch 00007: val_loss did not improve from 0.08655

Epoch 00008: val_loss did not improve from 0.08655

Epoch 00009: val_loss did not improve from 0.08655

Epoch 00010: val_loss did not improve from 0.08655





Epoch 00001: val_loss improved from inf to 0.17204, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.17204

Epoch 00003: val_loss did not improve from 0.17204

Epoch 00004: val_loss did not improve from 0.17204

Epoch 00005: val_loss did not improve from 0.17204

Epoch 00006: val_loss did not improve from 0.17204

Epoch 00007: val_loss did not improve from 0.17204

Epoch 00008: val_loss did not improve from 0.17204

Epoch 00009: val_loss did not improve from 0.17204

Epoch 00010: val_loss did not improve from 0.17204





Epoch 00001: val_loss improved from inf to 0.07271, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07271

Epoch 00003: val_loss did not improve from 0.07271

Epoch 00004: val_loss did not improve from 0.07271

Epoch 00005: val_loss did not improve from 0.07271

Epoch 00006: val_loss did not improve from 0.07271

Epoch 00007: val_loss did not improve from 0.07271

Epoch 00008: val_loss did not improve from 0.07271

Epoch 00009: val_loss did not improve from 0.07271

Epoch 00010: val_loss did not improve from 0.07271

Epoch 00001: val_loss improved from inf to 0.09329, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09329

Epoch 00003: val_loss did not improve from 0.09329

Epoch 00004: val_loss improved from 0.09329 to 0.08511, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.08511

Epoch 00006: val_loss did not improve from 0.08511

Epoch 00007: val_loss did not improve from 0.0



AlgoCrossValIter - 6
Model: "sequential_16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.38056, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.38056 to 0.37696, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.37696 to 0.33556, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.33556 to 0.30292, saving model to be




Epoch 00001: val_loss improved from inf to 0.21508, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.21508

Epoch 00003: val_loss did not improve from 0.21508

Epoch 00004: val_loss did not improve from 0.21508

Epoch 00005: val_loss did not improve from 0.21508

Epoch 00006: val_loss did not improve from 0.21508

Epoch 00007: val_loss did not improve from 0.21508

Epoch 00008: val_loss did not improve from 0.21508

Epoch 00009: val_loss did not improve from 0.21508

Epoch 00010: val_loss did not improve from 0.21508





Epoch 00001: val_loss improved from inf to 0.17846, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.17846 to 0.14360, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.14360

Epoch 00004: val_loss did not improve from 0.14360

Epoch 00005: val_loss did not improve from 0.14360

Epoch 00006: val_loss did not improve from 0.14360

Epoch 00007: val_loss did not improve from 0.14360

Epoch 00008: val_loss did not improve from 0.14360

Epoch 00009: val_loss did not improve from 0.14360

Epoch 00010: val_loss did not improve from 0.14360





Epoch 00001: val_loss improved from inf to 0.07101, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07101

Epoch 00003: val_loss did not improve from 0.07101

Epoch 00004: val_loss did not improve from 0.07101

Epoch 00005: val_loss did not improve from 0.07101

Epoch 00006: val_loss did not improve from 0.07101

Epoch 00007: val_loss did not improve from 0.07101

Epoch 00008: val_loss did not improve from 0.07101

Epoch 00009: val_loss did not improve from 0.07101

Epoch 00010: val_loss did not improve from 0.07101





Epoch 00001: val_loss improved from inf to 0.10431, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10431

Epoch 00003: val_loss did not improve from 0.10431

Epoch 00004: val_loss did not improve from 0.10431

Epoch 00005: val_loss did not improve from 0.10431

Epoch 00006: val_loss did not improve from 0.10431

Epoch 00007: val_loss did not improve from 0.10431

Epoch 00008: val_loss did not improve from 0.10431

Epoch 00009: val_loss did not improve from 0.10431

Epoch 00010: val_loss did not improve from 0.10431





Epoch 00001: val_loss improved from inf to 0.08306, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08306

Epoch 00003: val_loss did not improve from 0.08306

Epoch 00004: val_loss did not improve from 0.08306

Epoch 00005: val_loss did not improve from 0.08306

Epoch 00006: val_loss did not improve from 0.08306

Epoch 00007: val_loss did not improve from 0.08306

Epoch 00008: val_loss did not improve from 0.08306

Epoch 00009: val_loss did not improve from 0.08306

Epoch 00010: val_loss did not improve from 0.08306





Epoch 00001: val_loss improved from inf to 0.07926, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07926

Epoch 00003: val_loss did not improve from 0.07926

Epoch 00004: val_loss did not improve from 0.07926

Epoch 00005: val_loss did not improve from 0.07926

Epoch 00006: val_loss did not improve from 0.07926

Epoch 00007: val_loss did not improve from 0.07926

Epoch 00008: val_loss did not improve from 0.07926

Epoch 00009: val_loss did not improve from 0.07926

Epoch 00010: val_loss did not improve from 0.07926





Epoch 00001: val_loss improved from inf to 0.17449, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.17449 to 0.16318, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.16318

Epoch 00004: val_loss did not improve from 0.16318

Epoch 00005: val_loss did not improve from 0.16318

Epoch 00006: val_loss did not improve from 0.16318

Epoch 00007: val_loss did not improve from 0.16318

Epoch 00008: val_loss did not improve from 0.16318

Epoch 00009: val_loss did not improve from 0.16318

Epoch 00010: val_loss did not improve from 0.16318





Epoch 00001: val_loss improved from inf to 0.07107, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07107 to 0.06981, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06981

Epoch 00004: val_loss did not improve from 0.06981

Epoch 00005: val_loss did not improve from 0.06981

Epoch 00006: val_loss did not improve from 0.06981

Epoch 00007: val_loss did not improve from 0.06981

Epoch 00008: val_loss did not improve from 0.06981

Epoch 00009: val_loss did not improve from 0.06981

Epoch 00010: val_loss did not improve from 0.06981





Epoch 00001: val_loss improved from inf to 0.07306, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07306 to 0.06869, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06869

Epoch 00004: val_loss did not improve from 0.06869

Epoch 00005: val_loss did not improve from 0.06869

Epoch 00006: val_loss did not improve from 0.06869

Epoch 00007: val_loss did not improve from 0.06869

Epoch 00008: val_loss did not improve from 0.06869

Epoch 00009: val_loss did not improve from 0.06869

Epoch 00010: val_loss did not improve from 0.06869




AlgoCrossValIter - 7
Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 1.41117, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 1.41117 to 0.37467, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.37467 to 0.32828, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.32828

Epoch 00005: val_loss 




Epoch 00001: val_loss improved from inf to 0.19324, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.19324

Epoch 00003: val_loss did not improve from 0.19324

Epoch 00004: val_loss did not improve from 0.19324

Epoch 00005: val_loss did not improve from 0.19324

Epoch 00006: val_loss did not improve from 0.19324

Epoch 00007: val_loss did not improve from 0.19324

Epoch 00008: val_loss did not improve from 0.19324

Epoch 00009: val_loss did not improve from 0.19324

Epoch 00010: val_loss did not improve from 0.19324





Epoch 00001: val_loss improved from inf to 0.11258, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11258

Epoch 00003: val_loss did not improve from 0.11258

Epoch 00004: val_loss did not improve from 0.11258

Epoch 00005: val_loss did not improve from 0.11258

Epoch 00006: val_loss did not improve from 0.11258

Epoch 00007: val_loss did not improve from 0.11258

Epoch 00008: val_loss did not improve from 0.11258

Epoch 00009: val_loss did not improve from 0.11258

Epoch 00010: val_loss did not improve from 0.11258





Epoch 00001: val_loss improved from inf to 0.06962, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06962

Epoch 00003: val_loss did not improve from 0.06962

Epoch 00004: val_loss did not improve from 0.06962

Epoch 00005: val_loss did not improve from 0.06962

Epoch 00006: val_loss did not improve from 0.06962

Epoch 00007: val_loss did not improve from 0.06962

Epoch 00008: val_loss did not improve from 0.06962

Epoch 00009: val_loss did not improve from 0.06962

Epoch 00010: val_loss did not improve from 0.06962





Epoch 00001: val_loss improved from inf to 0.10388, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10388

Epoch 00003: val_loss did not improve from 0.10388

Epoch 00004: val_loss did not improve from 0.10388

Epoch 00005: val_loss did not improve from 0.10388

Epoch 00006: val_loss did not improve from 0.10388

Epoch 00007: val_loss did not improve from 0.10388

Epoch 00008: val_loss did not improve from 0.10388

Epoch 00009: val_loss did not improve from 0.10388

Epoch 00010: val_loss did not improve from 0.10388





Epoch 00001: val_loss improved from inf to 0.05942, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05942

Epoch 00003: val_loss did not improve from 0.05942

Epoch 00004: val_loss did not improve from 0.05942

Epoch 00005: val_loss did not improve from 0.05942

Epoch 00006: val_loss did not improve from 0.05942

Epoch 00007: val_loss did not improve from 0.05942

Epoch 00008: val_loss did not improve from 0.05942

Epoch 00009: val_loss did not improve from 0.05942

Epoch 00010: val_loss did not improve from 0.05942





Epoch 00001: val_loss improved from inf to 0.13231, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.13231 to 0.09070, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.09070

Epoch 00004: val_loss did not improve from 0.09070

Epoch 00005: val_loss did not improve from 0.09070

Epoch 00006: val_loss did not improve from 0.09070

Epoch 00007: val_loss did not improve from 0.09070

Epoch 00008: val_loss did not improve from 0.09070

Epoch 00009: val_loss did not improve from 0.09070

Epoch 00010: val_loss did not improve from 0.09070





Epoch 00001: val_loss improved from inf to 0.14507, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.14507

Epoch 00003: val_loss did not improve from 0.14507

Epoch 00004: val_loss did not improve from 0.14507

Epoch 00005: val_loss did not improve from 0.14507

Epoch 00006: val_loss did not improve from 0.14507

Epoch 00007: val_loss did not improve from 0.14507

Epoch 00008: val_loss did not improve from 0.14507

Epoch 00009: val_loss did not improve from 0.14507

Epoch 00010: val_loss did not improve from 0.14507





Epoch 00001: val_loss improved from inf to 0.06237, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06237

Epoch 00003: val_loss did not improve from 0.06237

Epoch 00004: val_loss did not improve from 0.06237

Epoch 00005: val_loss did not improve from 0.06237

Epoch 00006: val_loss did not improve from 0.06237

Epoch 00007: val_loss did not improve from 0.06237

Epoch 00008: val_loss did not improve from 0.06237

Epoch 00009: val_loss did not improve from 0.06237

Epoch 00010: val_loss did not improve from 0.06237





Epoch 00001: val_loss improved from inf to 0.07543, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07543

Epoch 00003: val_loss improved from 0.07543 to 0.07407, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.07407

Epoch 00005: val_loss did not improve from 0.07407

Epoch 00006: val_loss did not improve from 0.07407

Epoch 00007: val_loss did not improve from 0.07407

Epoch 00008: val_loss did not improve from 0.07407

Epoch 00009: val_loss did not improve from 0.07407

Epoch 00010: val_loss did not improve from 0.07407




AlgoCrossValIter - 8
Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.60679, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.60679 to 0.36455, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.36455 to 0.32556, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.32556

Epoch 00005: val_loss 




Epoch 00001: val_loss improved from inf to 0.24176, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.24176

Epoch 00003: val_loss did not improve from 0.24176

Epoch 00004: val_loss did not improve from 0.24176

Epoch 00005: val_loss did not improve from 0.24176

Epoch 00006: val_loss did not improve from 0.24176

Epoch 00007: val_loss did not improve from 0.24176

Epoch 00008: val_loss did not improve from 0.24176

Epoch 00009: val_loss did not improve from 0.24176

Epoch 00010: val_loss did not improve from 0.24176





Epoch 00001: val_loss improved from inf to 0.12369, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12369

Epoch 00003: val_loss did not improve from 0.12369

Epoch 00004: val_loss did not improve from 0.12369

Epoch 00005: val_loss did not improve from 0.12369

Epoch 00006: val_loss did not improve from 0.12369

Epoch 00007: val_loss did not improve from 0.12369

Epoch 00008: val_loss did not improve from 0.12369

Epoch 00009: val_loss did not improve from 0.12369

Epoch 00010: val_loss did not improve from 0.12369





Epoch 00001: val_loss improved from inf to 0.11409, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.11409 to 0.06952, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06952

Epoch 00004: val_loss did not improve from 0.06952

Epoch 00005: val_loss did not improve from 0.06952

Epoch 00006: val_loss did not improve from 0.06952

Epoch 00007: val_loss did not improve from 0.06952

Epoch 00008: val_loss did not improve from 0.06952

Epoch 00009: val_loss did not improve from 0.06952

Epoch 00010: val_loss did not improve from 0.06952





Epoch 00001: val_loss improved from inf to 0.10714, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10714

Epoch 00003: val_loss did not improve from 0.10714

Epoch 00004: val_loss did not improve from 0.10714

Epoch 00005: val_loss did not improve from 0.10714

Epoch 00006: val_loss did not improve from 0.10714

Epoch 00007: val_loss did not improve from 0.10714

Epoch 00008: val_loss did not improve from 0.10714

Epoch 00009: val_loss did not improve from 0.10714

Epoch 00010: val_loss did not improve from 0.10714





Epoch 00001: val_loss improved from inf to 0.05934, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05934

Epoch 00003: val_loss did not improve from 0.05934

Epoch 00004: val_loss did not improve from 0.05934

Epoch 00005: val_loss did not improve from 0.05934

Epoch 00006: val_loss did not improve from 0.05934

Epoch 00007: val_loss did not improve from 0.05934

Epoch 00008: val_loss did not improve from 0.05934

Epoch 00009: val_loss did not improve from 0.05934

Epoch 00010: val_loss did not improve from 0.05934





Epoch 00001: val_loss improved from inf to 0.12340, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.12340 to 0.12318, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.12318

Epoch 00004: val_loss did not improve from 0.12318

Epoch 00005: val_loss did not improve from 0.12318

Epoch 00006: val_loss did not improve from 0.12318

Epoch 00007: val_loss did not improve from 0.12318

Epoch 00008: val_loss did not improve from 0.12318

Epoch 00009: val_loss did not improve from 0.12318

Epoch 00010: val_loss did not improve from 0.12318





Epoch 00001: val_loss improved from inf to 0.14077, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.14077

Epoch 00003: val_loss did not improve from 0.14077

Epoch 00004: val_loss did not improve from 0.14077

Epoch 00005: val_loss did not improve from 0.14077

Epoch 00006: val_loss did not improve from 0.14077

Epoch 00007: val_loss did not improve from 0.14077

Epoch 00008: val_loss did not improve from 0.14077

Epoch 00009: val_loss did not improve from 0.14077

Epoch 00010: val_loss did not improve from 0.14077





Epoch 00001: val_loss improved from inf to 0.08184, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08184

Epoch 00003: val_loss did not improve from 0.08184

Epoch 00004: val_loss did not improve from 0.08184

Epoch 00005: val_loss did not improve from 0.08184

Epoch 00006: val_loss did not improve from 0.08184

Epoch 00007: val_loss did not improve from 0.08184

Epoch 00008: val_loss did not improve from 0.08184

Epoch 00009: val_loss did not improve from 0.08184

Epoch 00010: val_loss did not improve from 0.08184

Epoch 00001: val_loss improved from inf to 0.06578, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06578

Epoch 00003: val_loss did not improve from 0.06578

Epoch 00004: val_loss did not improve from 0.06578

Epoch 00005: val_loss did not improve from 0.06578

Epoch 00006: val_loss did not improve from 0.06578

Epoch 00007: val_loss did not improve from 0.06578

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.19346, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.19346

Epoch 00003: val_loss did not improve from 0.19346

Epoch 00004: val_loss did not improve from 0.19346

Epoch 00005: val_loss did not improve from 0.19346

Epoch 00006: val_loss did not improve from 0.19346

Epoch 00007: val_loss did not improve from 0.19346

Epoch 00008: val_loss did not improve from 0.19346

Epoch 00009: val_loss did not improve from 0.19346

Epoch 00010: val_loss did not improve from 0.19346





Epoch 00001: val_loss improved from inf to 0.12383, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12383

Epoch 00003: val_loss did not improve from 0.12383

Epoch 00004: val_loss did not improve from 0.12383

Epoch 00005: val_loss did not improve from 0.12383

Epoch 00006: val_loss did not improve from 0.12383

Epoch 00007: val_loss did not improve from 0.12383

Epoch 00008: val_loss did not improve from 0.12383

Epoch 00009: val_loss did not improve from 0.12383

Epoch 00010: val_loss did not improve from 0.12383





Epoch 00001: val_loss improved from inf to 0.06997, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.06997 to 0.06293, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06293

Epoch 00004: val_loss did not improve from 0.06293

Epoch 00005: val_loss did not improve from 0.06293

Epoch 00006: val_loss did not improve from 0.06293

Epoch 00007: val_loss did not improve from 0.06293

Epoch 00008: val_loss did not improve from 0.06293

Epoch 00009: val_loss did not improve from 0.06293

Epoch 00010: val_loss did not improve from 0.06293





Epoch 00001: val_loss improved from inf to 0.09271, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09271

Epoch 00003: val_loss did not improve from 0.09271

Epoch 00004: val_loss did not improve from 0.09271

Epoch 00005: val_loss did not improve from 0.09271

Epoch 00006: val_loss did not improve from 0.09271

Epoch 00007: val_loss did not improve from 0.09271

Epoch 00008: val_loss did not improve from 0.09271

Epoch 00009: val_loss did not improve from 0.09271

Epoch 00010: val_loss did not improve from 0.09271





Epoch 00001: val_loss improved from inf to 0.09272, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09272 to 0.07723, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07723

Epoch 00004: val_loss did not improve from 0.07723

Epoch 00005: val_loss did not improve from 0.07723

Epoch 00006: val_loss did not improve from 0.07723

Epoch 00007: val_loss did not improve from 0.07723

Epoch 00008: val_loss did not improve from 0.07723

Epoch 00009: val_loss did not improve from 0.07723

Epoch 00010: val_loss did not improve from 0.07723





Epoch 00001: val_loss improved from inf to 0.10762, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10762 to 0.09998, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.09998

Epoch 00004: val_loss did not improve from 0.09998

Epoch 00005: val_loss did not improve from 0.09998

Epoch 00006: val_loss did not improve from 0.09998

Epoch 00007: val_loss did not improve from 0.09998

Epoch 00008: val_loss did not improve from 0.09998

Epoch 00009: val_loss did not improve from 0.09998

Epoch 00010: val_loss did not improve from 0.09998





Epoch 00001: val_loss improved from inf to 0.18598, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.18598 to 0.17222, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.17222

Epoch 00004: val_loss did not improve from 0.17222

Epoch 00005: val_loss did not improve from 0.17222

Epoch 00006: val_loss did not improve from 0.17222

Epoch 00007: val_loss did not improve from 0.17222

Epoch 00008: val_loss did not improve from 0.17222

Epoch 00009: val_loss did not improve from 0.17222

Epoch 00010: val_loss did not improve from 0.17222





Epoch 00001: val_loss improved from inf to 0.08400, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08400

Epoch 00003: val_loss improved from 0.08400 to 0.08305, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.08305 to 0.08300, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.08300

Epoch 00006: val_loss did not improve from 0.08300

Epoch 00007: val_loss did not improve from 0.08300

Epoch 00008: val_loss did not improve from 0.08300

Epoch 00009: val_loss did not improve from 0.08300

Epoch 00010: val_loss did not improve from 0.08300

Epoch 00001: val_loss improved from inf to 0.07399, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07399

Epoch 00003: val_loss improved from 0.07399 to 0.06713, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.06713

Epoch 00005: val_loss did not improve from 0.06713

Epoch 00006:



AlgoCrossValIter - 10
Model: "sequential_20"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.35741, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.35741 to 0.34913, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.34913

Epoch 00004: val_loss improved from 0.34913 to 0.31444, saving model to best-model-conll.hdfs

Epoch 00005: val_loss




Epoch 00001: val_loss improved from inf to 0.28890, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.28890

Epoch 00003: val_loss improved from 0.28890 to 0.27424, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.27424 to 0.25651, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.25651

Epoch 00006: val_loss did not improve from 0.25651

Epoch 00007: val_loss did not improve from 0.25651

Epoch 00008: val_loss did not improve from 0.25651

Epoch 00009: val_loss did not improve from 0.25651

Epoch 00010: val_loss did not improve from 0.25651





Epoch 00001: val_loss improved from inf to 0.08949, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08949

Epoch 00003: val_loss did not improve from 0.08949

Epoch 00004: val_loss did not improve from 0.08949

Epoch 00005: val_loss did not improve from 0.08949

Epoch 00006: val_loss did not improve from 0.08949

Epoch 00007: val_loss did not improve from 0.08949

Epoch 00008: val_loss did not improve from 0.08949

Epoch 00009: val_loss did not improve from 0.08949

Epoch 00010: val_loss did not improve from 0.08949





Epoch 00001: val_loss improved from inf to 0.08443, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08443

Epoch 00003: val_loss improved from 0.08443 to 0.07853, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.07853

Epoch 00005: val_loss did not improve from 0.07853

Epoch 00006: val_loss did not improve from 0.07853

Epoch 00007: val_loss did not improve from 0.07853

Epoch 00008: val_loss did not improve from 0.07853

Epoch 00009: val_loss did not improve from 0.07853

Epoch 00010: val_loss did not improve from 0.07853





Epoch 00001: val_loss improved from inf to 0.09504, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09504

Epoch 00003: val_loss did not improve from 0.09504

Epoch 00004: val_loss did not improve from 0.09504

Epoch 00005: val_loss did not improve from 0.09504

Epoch 00006: val_loss did not improve from 0.09504

Epoch 00007: val_loss did not improve from 0.09504

Epoch 00008: val_loss did not improve from 0.09504

Epoch 00009: val_loss did not improve from 0.09504

Epoch 00010: val_loss did not improve from 0.09504





Epoch 00001: val_loss improved from inf to 0.07339, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07339

Epoch 00003: val_loss did not improve from 0.07339

Epoch 00004: val_loss did not improve from 0.07339

Epoch 00005: val_loss did not improve from 0.07339

Epoch 00006: val_loss did not improve from 0.07339

Epoch 00007: val_loss did not improve from 0.07339

Epoch 00008: val_loss did not improve from 0.07339

Epoch 00009: val_loss did not improve from 0.07339

Epoch 00010: val_loss did not improve from 0.07339





Epoch 00001: val_loss improved from inf to 0.09146, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09146

Epoch 00003: val_loss did not improve from 0.09146

Epoch 00004: val_loss did not improve from 0.09146

Epoch 00005: val_loss did not improve from 0.09146

Epoch 00006: val_loss did not improve from 0.09146

Epoch 00007: val_loss did not improve from 0.09146

Epoch 00008: val_loss did not improve from 0.09146

Epoch 00009: val_loss did not improve from 0.09146

Epoch 00010: val_loss did not improve from 0.09146





Epoch 00001: val_loss improved from inf to 0.14900, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.14900

Epoch 00003: val_loss did not improve from 0.14900

Epoch 00004: val_loss did not improve from 0.14900

Epoch 00005: val_loss did not improve from 0.14900

Epoch 00006: val_loss did not improve from 0.14900

Epoch 00007: val_loss did not improve from 0.14900

Epoch 00008: val_loss did not improve from 0.14900

Epoch 00009: val_loss did not improve from 0.14900

Epoch 00010: val_loss did not improve from 0.14900





Epoch 00001: val_loss improved from inf to 0.05728, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05728

Epoch 00003: val_loss did not improve from 0.05728

Epoch 00004: val_loss did not improve from 0.05728

Epoch 00005: val_loss did not improve from 0.05728

Epoch 00006: val_loss did not improve from 0.05728

Epoch 00007: val_loss did not improve from 0.05728

Epoch 00008: val_loss did not improve from 0.05728

Epoch 00009: val_loss did not improve from 0.05728

Epoch 00010: val_loss did not improve from 0.05728





Epoch 00001: val_loss improved from inf to 0.06123, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06123

Epoch 00003: val_loss did not improve from 0.06123

Epoch 00004: val_loss did not improve from 0.06123

Epoch 00005: val_loss did not improve from 0.06123

Epoch 00006: val_loss did not improve from 0.06123

Epoch 00007: val_loss did not improve from 0.06123

Epoch 00008: val_loss did not improve from 0.06123

Epoch 00009: val_loss did not improve from 0.06123

Epoch 00010: val_loss did not improve from 0.06123




In [144]:
resultCrossVal.to_csv("results.csv")
resultCrossVal

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
P_val,86.397,84.344,85.749,79.617,82.579,82.932,85.369,82.07,85.48,85.785
P_train,87.604,84.879,85.929,85.796,86.321,86.701,88.782,84.828,86.483,87.707
P_ewo,74.503,73.489,74.294,73.415,74.278,75.325,77.132,73.578,75.166,75.009
R_val,69.718,68.357,72.334,71.084,69.425,69.607,69.607,69.627,68.126,69.834
R_train,77.168,78.573,77.822,79.185,77.915,75.903,74.128,78.008,77.289,77.647
R_ewo,63.426,65.0,64.351,66.481,64.723,62.777,60.369,65.462,63.796,64.352
F1-val,75.887,72.351,77.149,73.432,74.114,73.708,75.716,72.952,75.01,73.668
F1-train,81.864,80.821,81.131,82.107,81.563,80.428,80.53,80.342,81.206,81.532
F1-ewo,68.271,68.257,68.39,69.493,68.726,67.93,67.514,68.119,68.414,68.323


In [145]:
resultCrossVal.mean(axis=1).to_frame()

Unnamed: 0,0
P_val,84.0322
P_train,86.503
P_ewo,74.6189
R_val,69.7719
R_train,77.3638
R_ewo,64.0737
F1-val,74.3987
F1-train,81.1524
F1-ewo,68.3437


In [146]:
resultCrossVal.std(axis=1).to_frame()

Unnamed: 0,0
P_val,2.166893
P_train,1.256128
P_ewo,1.121035
R_val,1.211031
R_train,1.431245
R_ewo,1.668642
F1-val,1.498174
F1-train,0.615753
F1-ewo,0.516118


In [147]:
trainByTagResult.to_csv("results/train-by-tag.csv")
trainByTagResult

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
F1-O,97.784,97.652,97.668,97.761,97.715,97.652,97.68,97.537,97.716,97.764
P-O,97.034,97.256,97.129,97.28,97.125,96.907,96.644,97.147,97.066,97.091
R-O,98.552,98.079,98.23,98.258,98.327,98.425,98.746,97.964,98.385,98.468
F1-MISC,70.004,69.663,70.516,69.06,70.723,71.077,72.847,68.213,68.467,69.684
P-MISC,100.0,98.889,94.771,96.667,100.0,98.889,100.0,89.0,93.889,98.333
R-MISC,54.588,55.406,59.113,56.719,55.497,56.636,57.747,60.608,58.136,55.406
F1-PER,86.392,85.522,85.124,87.035,85.124,84.958,84.284,84.485,85.224,86.069
P-PER,86.308,86.61,85.663,85.306,86.508,86.804,88.521,87.124,86.738,86.423
R-PER,86.771,85.082,85.58,89.071,84.637,83.975,81.216,83.62,84.832,87.142
F1-LOC,64.818,70.852222,65.612,68.758889,69.888,63.169,66.15,70.544,66.264,63.986


In [148]:
trainByTagResult.mean(axis=1).to_frame()

Unnamed: 0,0
F1-O,97.6929
P-O,97.0679
R-O,98.3434
F1-MISC,70.0254
P-MISC,97.0438
R-MISC,56.9856
F1-PER,85.4217
P-PER,86.6005
R-PER,85.1926
F1-LOC,67.004211


In [149]:
trainByTagResult.std(axis=1).to_frame()

Unnamed: 0,0
F1-O,0.072611
P-O,0.182843
R-O,0.227007
F1-MISC,1.361713
P-MISC,3.569189
R-MISC,1.906094
F1-PER,0.854816
P-PER,0.863007
R-PER,2.152608
F1-LOC,2.799727


In [150]:
testByTagResult.to_csv("results/test-by-tag.csv")
testByTagResult

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
F1-O,97.399,97.019,97.455,97.124,97.076,97.078,97.392,97.076,97.343,97.272
P-O,96.247,96.143,96.598,96.705,96.226,96.253,96.343,96.366,96.12,96.373
R-O,98.612,97.981,98.362,97.603,97.984,97.982,98.487,97.861,98.617,98.23
F1-MISC,51.6675,44.762857,51.6675,43.401429,51.6675,51.6675,51.6675,53.334444,47.62,44.762857
P-MISC,50.0,40.0,50.0,34.0,50.0,50.0,50.0,55.0,40.0,40.0
R-MISC,36.667,26.667,36.667,31.667,36.667,36.667,36.667,46.667,30.0,26.667
F1-PER,81.612,79.837,83.201,81.204,79.374,78.775,82.283,79.535,81.31,79.53
P-PER,85.562,86.966,87.241,81.745,84.475,84.188,86.435,84.253,87.268,86.149
R-PER,80.349,77.81,81.46,82.849,77.016,76.699,80.488,80.21,77.949,79.46
F1-LOC,50.396667,63.81,50.926667,47.223333,63.334,53.705,60.002,63.095,63.334,55.556667


In [151]:
testByTagResult.mean(axis=1).to_frame()

Unnamed: 0,0
F1-O,97.2234
P-O,96.3374
R-O,98.1719
F1-MISC,49.221909
P-MISC,45.9
R-MISC,34.5003
F1-PER,80.6661
P-PER,85.4282
R-PER,79.429
F1-LOC,57.138333


In [152]:
testByTagResult.std(axis=1).to_frame()

Unnamed: 0,0
F1-O,0.165268
P-O,0.187948
R-O,0.342981
F1-MISC,3.697265
P-MISC,6.773314
R-MISC,5.986123
F1-PER,1.458388
P-PER,1.763229
R-PER,2.013488
F1-LOC,6.338574


In [153]:
ewoByTagResult.to_csv("results/ewo-by-tag.csv")

In [154]:
ewoByTagResult = pd.read_csv("results/ewo-by-tag.csv", index_col=0)
ewoByTagResult

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
F1-O,96.797,96.757,96.789,96.857,96.798,96.804,96.843,96.676,96.836,96.79
P-O,95.988,96.178,96.107,96.353,96.124,95.928,95.648,96.212,96.052,96.082
R-O,97.628,97.366,97.497,97.377,97.495,97.714,98.076,97.181,97.65,97.53
F1-MISC,47.251,48.918,46.958,47.051,45.523,48.554,48.251,48.519,50.282,46.077
P-MISC,75.167,74.81,67.28,71.196,71.001,71.477,72.667,66.545,71.893,70.786
R-MISC,35.0,37.5,40.0,38.75,33.75,37.5,36.25,45.0,42.5,35.0
F1-PER,75.182,74.137,74.814,76.227,74.913,74.594,74.051,74.752,74.736,75.101
P-PER,74.564,74.836,75.115,73.983,75.055,76.807,78.08,76.621,76.063,75.296
R-PER,76.375,74.375,75.75,79.0,75.875,73.5,71.125,75.25,74.75,77.0
F1-LOC,37.41,52.352222,42.197,45.458889,45.355,41.667,41.041,46.899,42.335,39.466


In [155]:
ewoByTagResult.mean(axis=1).to_frame()

Unnamed: 0,0
F1-O,96.7947
P-O,96.0672
R-O,97.5514
F1-MISC,47.7384
P-MISC,71.2822
R-MISC,38.125
F1-PER,74.8507
P-PER,75.642
R-PER,75.3
F1-LOC,43.418111


In [156]:
ewoByTagResult.std(axis=1).to_frame()

Unnamed: 0,0
F1-O,0.051195
P-O,0.189022
R-O,0.241711
F1-MISC,1.429297
P-MISC,2.761987
R-MISC,3.547789
F1-PER,0.606403
P-PER,1.235708
R-PER,2.110819
F1-LOC,4.255051


In [157]:
columns = en_fingerprints.columns

print("Pred", "Real", "Freq", "Word", sep="\t")
for c in columns:
    prediction = model.predict(en_fingerprints[c].values.reshape((1, 210)))
    pred_tag = int2tag[np.argmax(prediction)]
    real_tag = en_corpus[en_corpus.word == c].iloc[0]['ne-tag']
    
    if pred_tag != real_tag:
        print(pred_tag, real_tag, en_fingerprints[c].max(), c, sep="\t")

Pred	Real	Freq	Word
LOC	O	8.335671	restore
O	LOC	8.335671	Samaria
O	LOC	8.335671	Olivet
O	MISC	8.335671	Sabbath
PER	O	8.335671	upper
PER	O	8.335671	room
PER	O	8.335671	where
PER	O	8.335671	Zealot
LOC	O	8.335671	so
LOC	O	8.335671	language
O	MISC	8.335671	Psalms
PER	O	8.335671	forward
O	MISC	8.335671	ninth
O	PER	8.335671	Moses
O	PER	8.335671	Samuel
O	ORG	8.335671	Sadducees
PER	O	8.335671	high-priestly
PER	O	8.335671	family
PER	O	12.025831	father
O	PER	8.335671	Herod
O	PER	8.335671	Pontius
O	PER	8.335671	Barnabas
O	LOC	8.335671	Cyprus
O	PER	8.335671	Elijah
O	MISC	8.335671	r
PER	O	8.335671	Ju
PER	O	8.335671	h
LOC	O	12.025831	deportation
O	PER	8.335671	Immanuel
