In [1]:
# import
import keras
import sys
import numpy as np
import string
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense
from keras.utils import np_utils, plot_model
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn import model_selection
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, precision_recall_fscore_support
import h5py as h5py

Using TensorFlow backend.


In [2]:
# if we are doeing binary classification. That means say if a token is a named entity or not
BINARY = False

# number of epochs for training
epochs = 10 

# the english side of the corpus
en_corpus_file = "corpus-en.txt"

# the ewondo side of the corpus
ewo_corpus_file = "corpus-ewo.txt"

# name of the file to same the model 
best_model_file = "best-model-conll.hdfs"

# The maximal number of phrases to use
max_nb_of_phrases =  -1

# the maximal number of duplicates for each word in the corpus
duplication = 1

# wether we are using only the vocabulary, ro redundancy
is_only_vocab = True

# if word should be shuffle or not
shuffle = is_only_vocab

# the number of neurons in the first layer
h1_size = 640

# number of neurons in the second layer
h2_size = 160  

In [3]:
def getTag(aString):
    """
        convert a string to a tag
    """
    tag = "O"
    if BINARY:
        if aString != "O":
            return "NE"
    else:
        tag = aString
    return tag
     

In [4]:
def load_corpus(file, max_nb_of_phrases):
    """
    Load a corpus stored in a file
    Input:
        - file: the name of the file of the corpus
        - max_nb_of_phases: maximal number of phrases to load
    
    Return:
        - a DataFrame representing the corpus
        - the number of phrases in the corpus
    """
    nb_of_phrases = 0
    dataset = {"word": [], "ne-tag": []}
    with open(file) as f:
        prev_line = None
        for cpt, line in enumerate(f):
            if cpt == 0:
                continue
            if nb_of_phrases == max_nb_of_phrases:
                break;

            l = line.strip()
            if len(l) == 0 and len(prev_line) != 0:
                nb_of_phrases += 1
                dataset["word"].append(line)
                dataset["ne-tag"].append(None)
            else:
                l = l.split("\t")
                if l[0] not in string.punctuation:
                    dataset["word"].append(l[0])
                    dataset["ne-tag"].append(ne_type(l[1]))
            prev_line = line.strip()
        
    return pd.DataFrame(dataset), nb_of_phrases+1

In [5]:
def corpus_fingerprint(aDataframe, nb_of_biphrases):
    """
    Create the distributionnal signature of each word in the corpus
    Input:
        -aDataFrame: the corpus DataFrame
        -nb_of_biphrases: number of phrases in the corpus
    Return:
        a DataFrame: corpus fingerprint, the columns are the words in the corpus
    """
    fingerprints = {}
    current_bi_phrase_index = 0
    nb_word_in_corpus = aDataframe[aDataframe.word != "\n"].word.size
    words_in_current_phrase = []
    for index, row in aDataframe.iterrows():
        if current_bi_phrase_index > nb_of_biphrases:
            break
            
        word = row['word']
        
        if word != "\n":
            words_in_current_phrase.append(word)
            if word not in fingerprints:
                fingerprints[word] = np.zeros(nb_of_biphrases, dtype=np.float32)
            fingerprints[word][current_bi_phrase_index] += 1
        else:
            nb_word_in_current_phrase = len(words_in_current_phrase)
#             for w in words_in_current_phrase:
#                 fingerprints[w][current_bi_phrase_index] = nb_word_in_corpus / fingerprints[w][current_bi_phrase_index]                
            current_bi_phrase_index += 1
            words_in_current_phrase = []
    for word in fingerprints:
        for i in range(nb_of_biphrases):
            if fingerprints[word][i] != 0:
                fingerprints[word][i] = nb_word_in_corpus / fingerprints[word][i]
#         fingerprints[word][nb_of_biphrases] = nb_word_in_corpus / aDataframe[aDataframe.word == word].word.size
        
    return pd.DataFrame(fingerprints)

In [6]:
def corpus2trainingdata(aDataframe, fingerprintsDataFrame):
    """
    Convert corpus to training data => numpy array
    
    Input:
        -aDataFrame: Corpus dataframe
        -fingerprintsDataFrame: distributionnal signature of words in the corpus
    Return:
        (X, y): X is the array of words (signature) in the corpus and y is the corresponding labels (NE tags)
    """
    X = np.zeros((aDataframe.shape[0], fingerprintsDataFrame.shape[0]), dtype=np.int8)
    y = np.zeros(aDataframe.shape[0], dtype=np.int8)
    i = 0
    for row in aDataframe.iterrows():
        X[i] = fingerprintsDataFrame[row[1]['word']].values
        y[i] = tag2int[getTag(row[1]['ne-tag'])]
        i += 1
    return X, y

In [7]:
def train_test_split(X, y, test_size = 0.33):
    total = X.shape[0]
    train_length = round(total * (1 - test_size)) 
    return X[:train_length], X[train_length:], y[:train_length], y[train_length:]

In [8]:
# A utility function to convert NE tags
def ne_type(aType):
    aType = aType.lower()
    if 'per' in aType:
        t =  'NE' if BINARY else 'PER' 
    elif 'loc' in aType:
        t =  'NE' if BINARY else 'LOC'
    elif 'org' in aType:
        t =  'NE' if BINARY else 'ORG'
    elif 'hour' in aType:
        t =  'NE' if BINARY else 'MISC'
    elif aType != 'o' and len(aType) > 0 :
        t =  'NE' if BINARY else 'MISC'
    else:
        t = 'O'
    return t

In [10]:
def P_R_F1(y_pred, y_true, neg_class):
    same = y_pred[y_true==y_pred]
    tp = same[same != neg_class].size
    nb_of_pos_exple = y_true[y_true != neg_class].size
    nb_of_pos_pred = y_pred[y_pred != neg_class].size
    p = r = f1 = 0
    try:
        p = np.round(tp*100/nb_of_pos_pred, 2)
    except ZeroDivisionError:
        print("number of correct positive predictions is 0")
        
    try:
        r = np.round(tp*100/nb_of_pos_exple, 2)
    except ZeroDivisionError:
        print("number of position exple is 0")
        
    try:
        f1 = np.round(2*r*p/(r+p), 2)
    except ZeroDivisionError:
        print("Recall and precision are 0")

    return p, r, f1

In [11]:
def shuffle_data(X, y):
    indices = [i for i in  range(X.shape[0])]
    np.random.shuffle(indices)
    return X[indices], y[indices]

In [12]:
def create_model(input_dim, output_dim):
    model = Sequential()
    model.add(Dense(h1_size, input_dim=input_dim, activation='sigmoid', name="hidden1"))
    model.add(Dense(h2_size, activation='sigmoid', name="hidden2"))
    if BINARY:
        model.add(Dense(1, activation='sigmoid', name="outputlayer"))
        model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['binary_accuracy'])
    else:
        model.add(Dense(output_dim, activation='softmax', name="outputlayer"))
        model.compile(loss='categorical_crossentropy', optimizer="rmsprop", metrics=['accuracy'])
    model.summary()
    return model

In [13]:
def train_model(model, X_train, y_train, X_val, y_val, epochs=epochs):
    # stop learning if the error is the same between two consecutive epochs
    early_stop = EarlyStopping(patience=20, verbose=2)
    
    # saving best model
    best_model_cp = ModelCheckpoint(best_model_file, save_best_only=True, verbose=1)
    
    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, verbose=0, shuffle=shuffle, callbacks=[best_model_cp, early_stop])
    
    #loading and returning the best model
    return keras.models.load_model(best_model_file)

In [14]:
def predict(model, X, y, binary=BINARY):
    if BINARY:
        y_pred = np.round(model.predict(X))
        y_true = y
    else:
        predictions = model.predict(X)
        y_pred = np.array([np.argmax(p) for p in predictions])
        y_true = np.array([np.argmax(t) for t in y ])
    return y_true, y_pred

In [15]:
def model_performance(y_true, y_pred):
    return P_R_F1(y_pred, y_true, tag2int['O']) #precision, recall, f1-score

In [16]:
def model_performace_by_tag(y_true, y_pred, tag):
    p, r, f1 = 0, 0, 0
    
    eq = y_pred[y_pred==y_true]
    correctly_pred = eq[eq==tag].size
    try:
        p = np.round(100 * correctly_pred / y_pred[y_pred==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        r = np.round(100 * correctly_pred / y_true[y_true==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        f1 = np.round(2 * r * p / (r + p), 2)
    except ZeroDivisionError:
        pass
    
    return p, r, f1

In [17]:
def algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, epochs=epochs, model=None):
    """
    Train a model on (X, y) and validate on (X_val, y_val) then project on (X_ewo)
    """
    test_precision, train_precision, ewo_precision = [], [], []
    test_recall, train_recall, ewo_recall = [], [], []
    test_fscore, train_fscore, ewo_fscore = [], [], []
    
    test_result_by_tag = {}
    train_result_by_tag = {}
    ewo_result_by_tag = {}
    for t in tagSet:
        f1_key = "F1-"+t
        p_key = "P-"+t
        r_key = "R-"+t
        train_result_by_tag[f1_key], train_result_by_tag[p_key], train_result_by_tag[r_key] = [], [], []
        test_result_by_tag[f1_key], test_result_by_tag[p_key], test_result_by_tag[r_key] = [], [], []
        ewo_result_by_tag[f1_key], ewo_result_by_tag[p_key], ewo_result_by_tag[r_key] = [], [], []

    m = train_model(model, X_train, y_train, X_val, y_val, epochs=epochs)
        
    y_true, y_pred = predict(m, X_train, y_train)
    p_train, r_train, f1_train = model_performance(y_true, y_pred)
        
    y_true_val, y_pred_val = predict(m, X_val, y_val)
    p_val, r_val, f1_val = model_performance(y_true_val, y_pred_val)
        
    y_true_ewo, y_pred_ewo = predict(m, X_ewo, y_ewo) 
    p_ewo, r_ewo, f1_ewo = model_performance(y_true_ewo, y_pred_ewo)
        
    for t in range(len(int2tag)):
        f1_key = "F1-" + int2tag[t]
        p_key = "P-" + int2tag[t]
        r_key = "R-" + int2tag[t]
            
        p, r, f1 = model_performace_by_tag(y_true, y_pred, t)
        train_result_by_tag[p_key].append(p)
        train_result_by_tag[r_key].append(r)
        train_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_val, y_pred_val, t)
        test_result_by_tag[p_key].append(p)
        test_result_by_tag[r_key].append(r)
        test_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_ewo, y_pred_ewo, t)
        ewo_result_by_tag[p_key].append(p)
        ewo_result_by_tag[r_key].append(r)
        ewo_result_by_tag[f1_key].append(f1)
                
    test_precision.append(p_val)
    train_precision.append(p_train)
    ewo_precision.append(p_ewo)
        
    test_recall.append(r_val)
    train_recall.append(r_train)
    ewo_recall.append(r_ewo)
        
    test_fscore.append(f1_val)
    train_fscore.append(f1_train)
    ewo_fscore.append(f1_ewo)
    return pd.DataFrame({
        'P_test': test_precision, 
        'P_train': train_precision, 
        'P_ewo': ewo_precision, 'R_test': test_recall, 'R_train': train_recall, 
        'R_ewo': ewo_recall, 'F1-test': test_fscore, 'F1-train': train_fscore, 'F1-ewo': ewo_fscore}), pd.DataFrame(train_result_by_tag), pd.DataFrame(test_result_by_tag), pd.DataFrame(ewo_result_by_tag)

In [18]:
def algoCrossVal(X, y, X_ewo, y_ewo, k = 10, repeat=1): 
    """
    Traing a model with k-fold cross validation
    We train the model `repeat` times to check it's stability
    """
    block_size = int(X.shape[0] / k)   
    output = None
    model = None
    train_by_tags, test_by_tags, ewo_by_tags = None, None, None
    for it in range(repeat):
        print("AlgoCrossValIter -", it+1)
        model = create_model(X.shape[1], len(tagSet))
        results = None
        train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = None, None, None
        for i in range(k):
            X_val, y_val = X[i*block_size:i*block_size+block_size], y[i*block_size:i*block_size+block_size]
            X_train = np.concatenate((X[0:i*block_size], X[i*block_size+block_size:]))
            y_train = np.concatenate((y[0:i*block_size], y[i*block_size+block_size:]))

            X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
            X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])

            result, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)
            if results is None:
                results = result.copy()
                train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = train_by_tag.copy(), test_by_tag.copy(), ewo_by_tag.copy()
            else:
                results = pd.concat([results, result], ignore_index=True)
                train_by_tagsTmp = pd.concat([train_by_tagsTmp, train_by_tag], ignore_index=True)
                test_by_tagsTmp = pd.concat([test_by_tagsTmp, test_by_tag], ignore_index=True)
                ewo_by_tagsTmp = pd.concat([ewo_by_tagsTmp, ewo_by_tag], ignore_index=True)
        
        if output is None:
            output = results.mean(axis=0).to_frame()
            train_by_tags = train_by_tagsTmp.mean(axis=0).to_frame()
            test_by_tags = test_by_tagsTmp.mean(axis=0).to_frame()
            ewo_by_tags = ewo_by_tagsTmp.mean(axis=0).to_frame()
        else:
            output = pd.concat([output, results.mean(axis=0).to_frame()], axis=1)
            train_by_tags = pd.concat([train_by_tags, train_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            test_by_tags = pd.concat([test_by_tags, test_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            ewo_by_tags = pd.concat([ewo_by_tags, ewo_by_tagsTmp.mean(axis=0).to_frame()], axis=1)

    return output, train_by_tags, test_by_tags, ewo_by_tags, model

In [19]:
en_corpus, en_nb_of_phrases = load_corpus(en_corpus_file, max_nb_of_phrases)

In [20]:
en_corpus.head()
en_corpus.loc[en_corpus['ne-tag'] == 'ORG']

Unnamed: 0,word,ne-tag
1335,Sadducees,ORG


In [21]:
tagSet = en_corpus["ne-tag"].dropna().unique()
if BINARY:
    tagSet = ['NE', 'O']
tag2int = {j: i for i, j in enumerate(tagSet)}
int2tag = {i: j for i, j in enumerate(tagSet)}
print(tag2int)

{'O': 0, 'MISC': 1, 'PER': 2, 'LOC': 3, 'ORG': 4}


In [22]:
en_nb_of_phrases

210

In [23]:
en_corpus.describe()

Unnamed: 0,word,ne-tag
count,4379,4170
unique,904,5
top,the,O
freq,313,3779


In [24]:
en_corpus.head(10)

Unnamed: 0,word,ne-tag
0,The,O
1,Promise,O
2,of,O
3,the,O
4,Holy,MISC
5,Spirit,MISC
6,\n,
7,In,O
8,the,O
9,first,O


In [25]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(en_corpus[en_corpus['ne-tag']==tag].shape[0] * 100 / en_corpus[en_corpus['ne-tag']!='\n'].shape[0], 2)))

O % = 86.3 %
MISC % = 2.4 %
PER % = 5.59 %
LOC % = 0.91 %
ORG % = 0.02 %


In [26]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(en_corpus[en_corpus['ne-tag']==tag].word.unique().shape[0] * 100 / en_corpus[en_corpus['ne-tag']!='\n'].word.unique().shape[0], 2)))

O % = 89.16 %
MISC % = 1.88 %
PER % = 8.96 %
LOC % = 1.99 %
ORG % = 0.11 %


In [27]:
en_corpus[en_corpus.word == "\n"].shape

(209, 2)

In [28]:
print("Nb of bi-phrases", en_nb_of_phrases)
en_fingerprints = corpus_fingerprint(en_corpus, en_nb_of_phrases)

Nb of bi-phrases 210


In [29]:
en_fingerprints.head(5)

Unnamed: 0,The,Promise,of,the,Holy,Spirit,In,first,book,O,...,considered,dream,She,save,fulfill,Immanuel,us),woke,sleep,knew
0,4170.0,4170.0,4170.0,4170.0,4170.0,4170.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,4170.0,0.0,0.0,4170.0,4170.0,4170.0,4170.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,1390.0,4170.0,4170.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,4170.0,4170.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,4170.0,2085.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
en_fingerprints['you'].values.shape

(210,)

In [31]:
en_corpus[en_corpus.word != "\n"].shape

(4170, 2)

In [32]:
if is_only_vocab:
    text = list(en_corpus[en_corpus.word != "\n"].word.unique())
else:
    text = list(en_corpus[en_corpus.word != "\n"].word)
en_vocab = pd.DataFrame({'text': text})
en_vocab.describe()

Unnamed: 0,text
count,903
unique,903
top,beforehand
freq,1


In [33]:
if is_only_vocab:
    X = np.zeros((en_vocab.shape[0] * duplication, en_nb_of_phrases))
    target = np.zeros((en_vocab.shape[0] * duplication))
    p=0
    for i, row in en_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X[p] = en_fingerprints[c.split(" ")[0]]
            target[p] = tag2int[getTag(en_corpus[en_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X, target = shuffle_data(X, target)
    print(X.shape, en_fingerprints.shape, target.shape)

(903, 210) (210, 903) (903,)


In [34]:
en_vocab[-20:]

Unnamed: 0,text
883,Eliud
884,Eleazar
885,Matthan
886,husband
887,fourteen
888,unwilling
889,shame
890,resolved
891,divorce
892,quietly


In [35]:
if not is_only_vocab:
    X, target = corpus2trainingdata(en_corpus[en_corpus.word != "\n"], en_fingerprints)

In [36]:
if shuffle:
    X, target = shuffle_data(X, target)

In [37]:
y = target.copy()
y[0:100]
if not BINARY:
    y = np_utils.to_categorical(y, len(tagSet))
y.shape

(903, 5)

In [38]:
from sklearn.decomposition import PCA

def visualize(X, y):
    pca = PCA(n_components=2)
    X_embeded = pca.fit_transform(X)
    plt.figure(figsize=(5, 5))
    plt.scatter(X_embeded[:, 0], X_embeded[:, 1], c=y)
    plt.legend()
    plt.show()

In [39]:
# visualize(X, target)

In [40]:
X_train, X_val, y_train, y_val = model_selection.train_test_split(X, y, test_size=0.33)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])
print("X_train.shape =", X_train.shape)
print("y_train.shape =", y_train.shape)
print("X_val.shape =", X_val.shape)
print("y_val.shape =", y_val.shape)

tTarget = np.array([np.argmax(yy) for yy in y_train])
vTarget = np.array([np.argmax(yy) for yy in y_val])

for tag in tagSet:
    print("{0} % in training data = {1} %".format(tag, np.round(tTarget[tTarget==tag2int[tag]].size * 100 / tTarget.shape[0], 2)))
    print("{0} % in validation data = {1} %".format(tag, np.round(vTarget[vTarget==tag2int[tag]].size * 100 / vTarget.shape[0], 2)))

X_train.shape = (605, 210)
y_train.shape = (605, 5)
X_val.shape = (298, 210)
y_val.shape = (298, 5)
O % in training data = 87.77 %
O % in validation data = 89.26 %
MISC % in training data = 1.16 %
MISC % in validation data = 1.34 %
PER % in training data = 9.09 %
PER % in validation data = 7.38 %
LOC % in training data = 1.98 %
LOC % in validation data = 1.68 %
ORG % in training data = 0.0 %
ORG % in validation data = 0.34 %


In [41]:
ewo_corpus, ewo_nb_of_phrases = load_corpus(ewo_corpus_file, max_nb_of_phrases)

In [42]:
ewo_corpus.loc[ewo_corpus['ne-tag'] == 'PER']

Unnamed: 0,word,ne-tag
6,Teofil,PER
15,Yesus,PER
86,Yohannes,PER
104,Yesus,PER
230,Yesus,PER
...,...,...
3676,Maria,PER
3697,Yesus,PER
3740,Emmanuel,PER
3750,Yosef,PER


In [43]:
ewo_nb_of_phrases

210

In [44]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(ewo_corpus[ewo_corpus['ne-tag']==tag].shape[0] * 100 / ewo_corpus[ewo_corpus['ne-tag']!='\n'].shape[0], 2)))

O % = 84.15 %
MISC % = 2.54 %
PER % = 6.69 %
LOC % = 1.03 %
ORG % = 0.05 %


In [45]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(ewo_corpus[ewo_corpus['ne-tag']==tag].word.unique().shape[0] * 100 / ewo_corpus[ewo_corpus['ne-tag']!='\n'].word.unique().shape[0], 2)))

O % = 89.94 %
MISC % = 1.17 %
PER % = 8.3 %
LOC % = 1.86 %
ORG % = 0.2 %


In [46]:
ewo_corpus.describe()

Unnamed: 0,word,ne-tag
count,3779,3570
unique,1024,5
top,\n,O
freq,209,3180


In [47]:
ewo_corpus.head()

Unnamed: 0,word,ne-tag
0,Mfufub,MISC
1,Nsisim,MISC
2,ayi,O
3,sò,O
4,\n,


In [48]:
ewo_fingerprints = corpus_fingerprint(ewo_corpus, en_nb_of_phrases)

In [49]:
if is_only_vocab:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word.unique())
else:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word)
ewo_vocab = pd.DataFrame({"text":text})

In [50]:
if is_only_vocab:
    X_ewo = np.zeros((ewo_vocab.shape[0] * duplication, en_nb_of_phrases))
    ewo_target = np.zeros((ewo_vocab.shape[0] * duplication))
    p=0
    for i, row in ewo_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X_ewo[p] = ewo_fingerprints[c.split(" ")[0]]
            ewo_target[p] = tag2int[getTag(ewo_corpus[ewo_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [51]:
ewo_vocab[-10:]

Unnamed: 0,text
1013,sik
1014,Ntud
1015,bëyole
1016,Emmanuel
1017,Avëbë
1018,angavëbë
1019,oyò
1020,angabende
1021,anganòṅ
1022,angayole


In [52]:
if not is_only_vocab:
    X_ewo, ewo_target = corpus2trainingdata(ewo_corpus[ewo_corpus.word != "\n"], ewo_fingerprints)

In [53]:
if shuffle:
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [54]:
y_ewo = ewo_target.copy()
print(y_ewo.shape, len(ewo_vocab))

(1023,) 1023


In [55]:
X_ewo.shape

(1023, 210)

In [56]:
y_ewo = ewo_target.copy()
y_ewo[:20]
if not BINARY:
    y_ewo = np_utils.to_categorical(y_ewo)

In [57]:
X_ewo = X_ewo.reshape((X_ewo.shape[0], en_nb_of_phrases))

In [58]:
# model = create_model(X.shape[1], len(tagSet))
# resultEval, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)

In [59]:
# resultEval

In [60]:
# train_by_tag

In [61]:
# test_by_tag

In [62]:
# ewo_by_tag

In [63]:
# resultEval.mean()

In [64]:
# resultEval.std()

In [65]:
resultCrossVal, trainByTagResult, testByTagResult, ewoByTagResult, model = algoCrossVal(X, y, X_ewo, y_ewo, repeat=10)

AlgoCrossValIter - 1
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.40357, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.40357 to 0.36072, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.36072 to 0.32171, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.32171 to 0.29767, saving model to bes




Epoch 00001: val_loss improved from inf to 0.13868, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.13868

Epoch 00003: val_loss did not improve from 0.13868

Epoch 00004: val_loss did not improve from 0.13868

Epoch 00005: val_loss did not improve from 0.13868

Epoch 00006: val_loss did not improve from 0.13868

Epoch 00007: val_loss did not improve from 0.13868

Epoch 00008: val_loss did not improve from 0.13868

Epoch 00009: val_loss did not improve from 0.13868

Epoch 00010: val_loss did not improve from 0.13868





Epoch 00001: val_loss improved from inf to 0.07114, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07114

Epoch 00003: val_loss did not improve from 0.07114

Epoch 00004: val_loss did not improve from 0.07114

Epoch 00005: val_loss did not improve from 0.07114

Epoch 00006: val_loss did not improve from 0.07114

Epoch 00007: val_loss did not improve from 0.07114

Epoch 00008: val_loss did not improve from 0.07114

Epoch 00009: val_loss did not improve from 0.07114

Epoch 00010: val_loss did not improve from 0.07114





Epoch 00001: val_loss improved from inf to 0.09692, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09692 to 0.09077, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.09077

Epoch 00004: val_loss did not improve from 0.09077

Epoch 00005: val_loss did not improve from 0.09077

Epoch 00006: val_loss did not improve from 0.09077

Epoch 00007: val_loss did not improve from 0.09077

Epoch 00008: val_loss did not improve from 0.09077

Epoch 00009: val_loss did not improve from 0.09077

Epoch 00010: val_loss did not improve from 0.09077





Epoch 00001: val_loss improved from inf to 0.05832, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05832

Epoch 00003: val_loss did not improve from 0.05832

Epoch 00004: val_loss did not improve from 0.05832

Epoch 00005: val_loss did not improve from 0.05832

Epoch 00006: val_loss did not improve from 0.05832

Epoch 00007: val_loss did not improve from 0.05832

Epoch 00008: val_loss did not improve from 0.05832

Epoch 00009: val_loss did not improve from 0.05832

Epoch 00010: val_loss did not improve from 0.05832





Epoch 00001: val_loss improved from inf to 0.02907, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02907

Epoch 00003: val_loss did not improve from 0.02907

Epoch 00004: val_loss did not improve from 0.02907

Epoch 00005: val_loss did not improve from 0.02907

Epoch 00006: val_loss did not improve from 0.02907

Epoch 00007: val_loss did not improve from 0.02907

Epoch 00008: val_loss did not improve from 0.02907

Epoch 00009: val_loss did not improve from 0.02907

Epoch 00010: val_loss did not improve from 0.02907





Epoch 00001: val_loss improved from inf to 0.09674, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09674 to 0.08404, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08404

Epoch 00004: val_loss did not improve from 0.08404

Epoch 00005: val_loss did not improve from 0.08404

Epoch 00006: val_loss did not improve from 0.08404

Epoch 00007: val_loss did not improve from 0.08404

Epoch 00008: val_loss did not improve from 0.08404

Epoch 00009: val_loss did not improve from 0.08404

Epoch 00010: val_loss did not improve from 0.08404

Epoch 00001: val_loss improved from inf to 0.08972, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.08972 to 0.07954, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07954

Epoch 00004: val_loss did not improve from 0.07954

Epoch 00005: val_loss did not improve from 0.07954

Epoch 00006: val_loss did not improve from 0.07954

Epo




Epoch 00001: val_loss improved from inf to 0.05033, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05033

Epoch 00003: val_loss did not improve from 0.05033

Epoch 00004: val_loss did not improve from 0.05033

Epoch 00005: val_loss did not improve from 0.05033

Epoch 00006: val_loss did not improve from 0.05033

Epoch 00007: val_loss did not improve from 0.05033

Epoch 00008: val_loss did not improve from 0.05033

Epoch 00009: val_loss did not improve from 0.05033

Epoch 00010: val_loss did not improve from 0.05033





Epoch 00001: val_loss improved from inf to 0.08612, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08612

Epoch 00003: val_loss did not improve from 0.08612

Epoch 00004: val_loss did not improve from 0.08612

Epoch 00005: val_loss did not improve from 0.08612

Epoch 00006: val_loss did not improve from 0.08612

Epoch 00007: val_loss did not improve from 0.08612

Epoch 00008: val_loss did not improve from 0.08612

Epoch 00009: val_loss did not improve from 0.08612

Epoch 00010: val_loss did not improve from 0.08612




AlgoCrossValIter - 2
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.45720, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.45720 to 0.41514, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.41514

Epoch 00004: val_loss improved from 0.41514 to 0.40879, saving model to best-model-conll.hdfs

Epoch 00005: val_loss i




Epoch 00001: val_loss improved from inf to 0.16141, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.16141

Epoch 00003: val_loss did not improve from 0.16141

Epoch 00004: val_loss did not improve from 0.16141

Epoch 00005: val_loss did not improve from 0.16141

Epoch 00006: val_loss did not improve from 0.16141

Epoch 00007: val_loss did not improve from 0.16141

Epoch 00008: val_loss did not improve from 0.16141

Epoch 00009: val_loss did not improve from 0.16141

Epoch 00010: val_loss did not improve from 0.16141





Epoch 00001: val_loss improved from inf to 0.08393, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08393

Epoch 00003: val_loss did not improve from 0.08393

Epoch 00004: val_loss did not improve from 0.08393

Epoch 00005: val_loss did not improve from 0.08393

Epoch 00006: val_loss did not improve from 0.08393

Epoch 00007: val_loss did not improve from 0.08393

Epoch 00008: val_loss did not improve from 0.08393

Epoch 00009: val_loss did not improve from 0.08393

Epoch 00010: val_loss did not improve from 0.08393





Epoch 00001: val_loss improved from inf to 0.09233, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09233 to 0.09213, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.09213

Epoch 00004: val_loss did not improve from 0.09213

Epoch 00005: val_loss did not improve from 0.09213

Epoch 00006: val_loss did not improve from 0.09213

Epoch 00007: val_loss did not improve from 0.09213

Epoch 00008: val_loss did not improve from 0.09213

Epoch 00009: val_loss did not improve from 0.09213

Epoch 00010: val_loss did not improve from 0.09213





Epoch 00001: val_loss improved from inf to 0.11717, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.11717 to 0.06716, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06716

Epoch 00004: val_loss did not improve from 0.06716

Epoch 00005: val_loss did not improve from 0.06716

Epoch 00006: val_loss did not improve from 0.06716

Epoch 00007: val_loss did not improve from 0.06716

Epoch 00008: val_loss did not improve from 0.06716

Epoch 00009: val_loss did not improve from 0.06716

Epoch 00010: val_loss did not improve from 0.06716





Epoch 00001: val_loss improved from inf to 0.04540, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04540

Epoch 00003: val_loss did not improve from 0.04540

Epoch 00004: val_loss did not improve from 0.04540

Epoch 00005: val_loss did not improve from 0.04540

Epoch 00006: val_loss did not improve from 0.04540

Epoch 00007: val_loss did not improve from 0.04540

Epoch 00008: val_loss did not improve from 0.04540

Epoch 00009: val_loss did not improve from 0.04540

Epoch 00010: val_loss did not improve from 0.04540





Epoch 00001: val_loss improved from inf to 0.07398, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07398

Epoch 00003: val_loss did not improve from 0.07398

Epoch 00004: val_loss did not improve from 0.07398

Epoch 00005: val_loss did not improve from 0.07398

Epoch 00006: val_loss did not improve from 0.07398

Epoch 00007: val_loss did not improve from 0.07398

Epoch 00008: val_loss did not improve from 0.07398

Epoch 00009: val_loss did not improve from 0.07398

Epoch 00010: val_loss did not improve from 0.07398





Epoch 00001: val_loss improved from inf to 0.08193, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08193

Epoch 00003: val_loss did not improve from 0.08193

Epoch 00004: val_loss did not improve from 0.08193

Epoch 00005: val_loss did not improve from 0.08193

Epoch 00006: val_loss did not improve from 0.08193

Epoch 00007: val_loss did not improve from 0.08193

Epoch 00008: val_loss did not improve from 0.08193

Epoch 00009: val_loss did not improve from 0.08193

Epoch 00010: val_loss did not improve from 0.08193





Epoch 00001: val_loss improved from inf to 0.06995, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.06995 to 0.05922, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.05922

Epoch 00004: val_loss did not improve from 0.05922

Epoch 00005: val_loss did not improve from 0.05922

Epoch 00006: val_loss did not improve from 0.05922

Epoch 00007: val_loss did not improve from 0.05922

Epoch 00008: val_loss did not improve from 0.05922

Epoch 00009: val_loss did not improve from 0.05922

Epoch 00010: val_loss did not improve from 0.05922





Epoch 00001: val_loss improved from inf to 0.09654, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09654 to 0.08872, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08872

Epoch 00004: val_loss did not improve from 0.08872

Epoch 00005: val_loss did not improve from 0.08872

Epoch 00006: val_loss did not improve from 0.08872

Epoch 00007: val_loss did not improve from 0.08872

Epoch 00008: val_loss did not improve from 0.08872

Epoch 00009: val_loss did not improve from 0.08872

Epoch 00010: val_loss did not improve from 0.08872




AlgoCrossValIter - 3
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.41875, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.41875

Epoch 00003: val_loss improved from 0.41875 to 0.39373, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.39373 to 0.29935, saving model to best-model-conll.hdfs

Epoch 00005: val_loss i




Epoch 00001: val_loss improved from inf to 0.11554, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11554

Epoch 00003: val_loss did not improve from 0.11554

Epoch 00004: val_loss did not improve from 0.11554

Epoch 00005: val_loss did not improve from 0.11554

Epoch 00006: val_loss did not improve from 0.11554

Epoch 00007: val_loss did not improve from 0.11554

Epoch 00008: val_loss did not improve from 0.11554

Epoch 00009: val_loss did not improve from 0.11554

Epoch 00010: val_loss did not improve from 0.11554





Epoch 00001: val_loss improved from inf to 0.12108, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.12108 to 0.09094, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.09094

Epoch 00004: val_loss did not improve from 0.09094

Epoch 00005: val_loss did not improve from 0.09094

Epoch 00006: val_loss did not improve from 0.09094

Epoch 00007: val_loss improved from 0.09094 to 0.08793, saving model to best-model-conll.hdfs

Epoch 00008: val_loss did not improve from 0.08793

Epoch 00009: val_loss did not improve from 0.08793

Epoch 00010: val_loss did not improve from 0.08793





Epoch 00001: val_loss improved from inf to 0.06743, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06743

Epoch 00003: val_loss did not improve from 0.06743

Epoch 00004: val_loss did not improve from 0.06743

Epoch 00005: val_loss did not improve from 0.06743

Epoch 00006: val_loss did not improve from 0.06743

Epoch 00007: val_loss did not improve from 0.06743

Epoch 00008: val_loss did not improve from 0.06743

Epoch 00009: val_loss did not improve from 0.06743

Epoch 00010: val_loss did not improve from 0.06743

Epoch 00001: val_loss improved from inf to 0.09427, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09427 to 0.07870, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07870

Epoch 00004: val_loss improved from 0.07870 to 0.07846, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.07846

Epoch 00006: val_loss did not improve from 0.07846

Epo




Epoch 00001: val_loss improved from inf to 0.08680, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08680

Epoch 00003: val_loss did not improve from 0.08680

Epoch 00004: val_loss did not improve from 0.08680

Epoch 00005: val_loss did not improve from 0.08680

Epoch 00006: val_loss did not improve from 0.08680

Epoch 00007: val_loss did not improve from 0.08680

Epoch 00008: val_loss did not improve from 0.08680

Epoch 00009: val_loss did not improve from 0.08680

Epoch 00010: val_loss did not improve from 0.08680

Epoch 00001: val_loss improved from inf to 0.07789, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07789

Epoch 00003: val_loss did not improve from 0.07789

Epoch 00004: val_loss did not improve from 0.07789

Epoch 00005: val_loss did not improve from 0.07789

Epoch 00006: val_loss did not improve from 0.07789

Epoch 00007: val_loss did not improve from 0.07789

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.09986, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09986 to 0.09878, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.09878

Epoch 00004: val_loss did not improve from 0.09878

Epoch 00005: val_loss did not improve from 0.09878

Epoch 00006: val_loss did not improve from 0.09878

Epoch 00007: val_loss did not improve from 0.09878

Epoch 00008: val_loss did not improve from 0.09878

Epoch 00009: val_loss did not improve from 0.09878

Epoch 00010: val_loss did not improve from 0.09878




AlgoCrossValIter - 4
Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.44326, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.44326 to 0.42889, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.42889 to 0.32257, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.32257

Epoch 00005: val_loss d




Epoch 00001: val_loss improved from inf to 0.16912, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.16912

Epoch 00003: val_loss did not improve from 0.16912

Epoch 00004: val_loss did not improve from 0.16912

Epoch 00005: val_loss did not improve from 0.16912

Epoch 00006: val_loss did not improve from 0.16912

Epoch 00007: val_loss did not improve from 0.16912

Epoch 00008: val_loss did not improve from 0.16912

Epoch 00009: val_loss did not improve from 0.16912

Epoch 00010: val_loss did not improve from 0.16912





Epoch 00001: val_loss improved from inf to 0.07733, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07733

Epoch 00003: val_loss did not improve from 0.07733

Epoch 00004: val_loss did not improve from 0.07733

Epoch 00005: val_loss did not improve from 0.07733

Epoch 00006: val_loss did not improve from 0.07733

Epoch 00007: val_loss did not improve from 0.07733

Epoch 00008: val_loss did not improve from 0.07733

Epoch 00009: val_loss did not improve from 0.07733

Epoch 00010: val_loss did not improve from 0.07733





Epoch 00001: val_loss improved from inf to 0.12771, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.12771 to 0.08931, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08931

Epoch 00004: val_loss did not improve from 0.08931

Epoch 00005: val_loss did not improve from 0.08931

Epoch 00006: val_loss did not improve from 0.08931

Epoch 00007: val_loss did not improve from 0.08931

Epoch 00008: val_loss did not improve from 0.08931

Epoch 00009: val_loss did not improve from 0.08931

Epoch 00010: val_loss did not improve from 0.08931





Epoch 00001: val_loss improved from inf to 0.07902, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07902

Epoch 00003: val_loss did not improve from 0.07902

Epoch 00004: val_loss did not improve from 0.07902

Epoch 00005: val_loss did not improve from 0.07902

Epoch 00006: val_loss did not improve from 0.07902

Epoch 00007: val_loss did not improve from 0.07902

Epoch 00008: val_loss did not improve from 0.07902

Epoch 00009: val_loss did not improve from 0.07902

Epoch 00010: val_loss did not improve from 0.07902





Epoch 00001: val_loss improved from inf to 0.04177, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04177

Epoch 00003: val_loss did not improve from 0.04177

Epoch 00004: val_loss did not improve from 0.04177

Epoch 00005: val_loss did not improve from 0.04177

Epoch 00006: val_loss did not improve from 0.04177

Epoch 00007: val_loss did not improve from 0.04177

Epoch 00008: val_loss did not improve from 0.04177

Epoch 00009: val_loss did not improve from 0.04177

Epoch 00010: val_loss did not improve from 0.04177





Epoch 00001: val_loss improved from inf to 0.14691, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.14691 to 0.07908, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07908

Epoch 00004: val_loss did not improve from 0.07908

Epoch 00005: val_loss did not improve from 0.07908

Epoch 00006: val_loss did not improve from 0.07908

Epoch 00007: val_loss did not improve from 0.07908

Epoch 00008: val_loss did not improve from 0.07908

Epoch 00009: val_loss did not improve from 0.07908

Epoch 00010: val_loss did not improve from 0.07908

Epoch 00001: val_loss improved from inf to 0.07793, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07793

Epoch 00003: val_loss did not improve from 0.07793

Epoch 00004: val_loss did not improve from 0.07793

Epoch 00005: val_loss did not improve from 0.07793

Epoch 00006: val_loss did not improve from 0.07793

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.05263, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05263

Epoch 00003: val_loss did not improve from 0.05263

Epoch 00004: val_loss did not improve from 0.05263

Epoch 00005: val_loss did not improve from 0.05263

Epoch 00006: val_loss did not improve from 0.05263

Epoch 00007: val_loss did not improve from 0.05263

Epoch 00008: val_loss did not improve from 0.05263

Epoch 00009: val_loss did not improve from 0.05263

Epoch 00010: val_loss did not improve from 0.05263





Epoch 00001: val_loss improved from inf to 0.07881, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07881

Epoch 00003: val_loss did not improve from 0.07881

Epoch 00004: val_loss did not improve from 0.07881

Epoch 00005: val_loss did not improve from 0.07881

Epoch 00006: val_loss did not improve from 0.07881

Epoch 00007: val_loss did not improve from 0.07881

Epoch 00008: val_loss did not improve from 0.07881

Epoch 00009: val_loss did not improve from 0.07881

Epoch 00010: val_loss did not improve from 0.07881




AlgoCrossValIter - 5
Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.46890, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.46890 to 0.36376, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.36376 to 0.35967, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.35967 to 0.31660, saving model to bes




Epoch 00001: val_loss improved from inf to 0.13511, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.13511

Epoch 00003: val_loss did not improve from 0.13511

Epoch 00004: val_loss did not improve from 0.13511

Epoch 00005: val_loss did not improve from 0.13511

Epoch 00006: val_loss did not improve from 0.13511

Epoch 00007: val_loss did not improve from 0.13511

Epoch 00008: val_loss did not improve from 0.13511

Epoch 00009: val_loss did not improve from 0.13511

Epoch 00010: val_loss did not improve from 0.13511





Epoch 00001: val_loss improved from inf to 0.07937, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07937

Epoch 00003: val_loss did not improve from 0.07937

Epoch 00004: val_loss did not improve from 0.07937

Epoch 00005: val_loss did not improve from 0.07937

Epoch 00006: val_loss did not improve from 0.07937

Epoch 00007: val_loss did not improve from 0.07937

Epoch 00008: val_loss did not improve from 0.07937

Epoch 00009: val_loss did not improve from 0.07937

Epoch 00010: val_loss did not improve from 0.07937





Epoch 00001: val_loss improved from inf to 0.07378, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07378

Epoch 00003: val_loss did not improve from 0.07378

Epoch 00004: val_loss did not improve from 0.07378

Epoch 00005: val_loss did not improve from 0.07378

Epoch 00006: val_loss did not improve from 0.07378

Epoch 00007: val_loss did not improve from 0.07378

Epoch 00008: val_loss did not improve from 0.07378

Epoch 00009: val_loss did not improve from 0.07378

Epoch 00010: val_loss did not improve from 0.07378

Epoch 00001: val_loss improved from inf to 0.07831, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07831

Epoch 00003: val_loss did not improve from 0.07831

Epoch 00004: val_loss did not improve from 0.07831

Epoch 00005: val_loss did not improve from 0.07831

Epoch 00006: val_loss did not improve from 0.07831

Epoch 00007: val_loss did not improve from 0.07831

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.05761, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05761

Epoch 00003: val_loss did not improve from 0.05761

Epoch 00004: val_loss did not improve from 0.05761

Epoch 00005: val_loss improved from 0.05761 to 0.04639, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.04639

Epoch 00007: val_loss did not improve from 0.04639

Epoch 00008: val_loss did not improve from 0.04639

Epoch 00009: val_loss did not improve from 0.04639

Epoch 00010: val_loss did not improve from 0.04639





Epoch 00001: val_loss improved from inf to 0.07444, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07444

Epoch 00003: val_loss did not improve from 0.07444

Epoch 00004: val_loss did not improve from 0.07444

Epoch 00005: val_loss did not improve from 0.07444

Epoch 00006: val_loss did not improve from 0.07444

Epoch 00007: val_loss did not improve from 0.07444

Epoch 00008: val_loss did not improve from 0.07444

Epoch 00009: val_loss did not improve from 0.07444

Epoch 00010: val_loss did not improve from 0.07444

Epoch 00001: val_loss improved from inf to 0.08616, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08616

Epoch 00003: val_loss did not improve from 0.08616

Epoch 00004: val_loss did not improve from 0.08616

Epoch 00005: val_loss did not improve from 0.08616

Epoch 00006: val_loss did not improve from 0.08616

Epoch 00007: val_loss did not improve from 0.08616

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.08966, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.08966 to 0.08788, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08788

Epoch 00004: val_loss did not improve from 0.08788

Epoch 00005: val_loss did not improve from 0.08788

Epoch 00006: val_loss did not improve from 0.08788

Epoch 00007: val_loss did not improve from 0.08788

Epoch 00008: val_loss did not improve from 0.08788

Epoch 00009: val_loss did not improve from 0.08788

Epoch 00010: val_loss did not improve from 0.08788




AlgoCrossValIter - 6
Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.43140, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.43140 to 0.37373, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.37373 to 0.33747, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.33747

Epoch 00005: val_loss i




Epoch 00001: val_loss improved from inf to 0.14801, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.14801

Epoch 00003: val_loss did not improve from 0.14801

Epoch 00004: val_loss did not improve from 0.14801

Epoch 00005: val_loss did not improve from 0.14801

Epoch 00006: val_loss did not improve from 0.14801

Epoch 00007: val_loss did not improve from 0.14801

Epoch 00008: val_loss did not improve from 0.14801

Epoch 00009: val_loss did not improve from 0.14801

Epoch 00010: val_loss did not improve from 0.14801





Epoch 00001: val_loss improved from inf to 0.07436, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07436

Epoch 00003: val_loss did not improve from 0.07436

Epoch 00004: val_loss did not improve from 0.07436

Epoch 00005: val_loss did not improve from 0.07436

Epoch 00006: val_loss did not improve from 0.07436

Epoch 00007: val_loss did not improve from 0.07436

Epoch 00008: val_loss did not improve from 0.07436

Epoch 00009: val_loss did not improve from 0.07436

Epoch 00010: val_loss did not improve from 0.07436





Epoch 00001: val_loss improved from inf to 0.09683, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09683

Epoch 00003: val_loss did not improve from 0.09683

Epoch 00004: val_loss did not improve from 0.09683

Epoch 00005: val_loss did not improve from 0.09683

Epoch 00006: val_loss did not improve from 0.09683

Epoch 00007: val_loss did not improve from 0.09683

Epoch 00008: val_loss did not improve from 0.09683

Epoch 00009: val_loss did not improve from 0.09683

Epoch 00010: val_loss did not improve from 0.09683





Epoch 00001: val_loss improved from inf to 0.09181, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09181

Epoch 00003: val_loss did not improve from 0.09181

Epoch 00004: val_loss did not improve from 0.09181

Epoch 00005: val_loss did not improve from 0.09181

Epoch 00006: val_loss did not improve from 0.09181

Epoch 00007: val_loss did not improve from 0.09181

Epoch 00008: val_loss did not improve from 0.09181

Epoch 00009: val_loss did not improve from 0.09181

Epoch 00010: val_loss did not improve from 0.09181





Epoch 00001: val_loss improved from inf to 0.03634, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03634

Epoch 00003: val_loss did not improve from 0.03634

Epoch 00004: val_loss did not improve from 0.03634

Epoch 00005: val_loss did not improve from 0.03634

Epoch 00006: val_loss did not improve from 0.03634

Epoch 00007: val_loss did not improve from 0.03634

Epoch 00008: val_loss did not improve from 0.03634

Epoch 00009: val_loss did not improve from 0.03634

Epoch 00010: val_loss did not improve from 0.03634





Epoch 00001: val_loss improved from inf to 0.06394, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06394

Epoch 00003: val_loss did not improve from 0.06394

Epoch 00004: val_loss did not improve from 0.06394

Epoch 00005: val_loss did not improve from 0.06394

Epoch 00006: val_loss did not improve from 0.06394

Epoch 00007: val_loss did not improve from 0.06394

Epoch 00008: val_loss did not improve from 0.06394

Epoch 00009: val_loss did not improve from 0.06394

Epoch 00010: val_loss did not improve from 0.06394

Epoch 00001: val_loss improved from inf to 0.09363, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09363 to 0.08938, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08938

Epoch 00004: val_loss did not improve from 0.08938

Epoch 00005: val_loss did not improve from 0.08938

Epoch 00006: val_loss did not improve from 0.08938

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.08235, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.08235 to 0.06381, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06381

Epoch 00004: val_loss did not improve from 0.06381

Epoch 00005: val_loss did not improve from 0.06381

Epoch 00006: val_loss did not improve from 0.06381

Epoch 00007: val_loss did not improve from 0.06381

Epoch 00008: val_loss did not improve from 0.06381

Epoch 00009: val_loss did not improve from 0.06381

Epoch 00010: val_loss did not improve from 0.06381





Epoch 00001: val_loss improved from inf to 0.07893, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07893

Epoch 00003: val_loss did not improve from 0.07893

Epoch 00004: val_loss did not improve from 0.07893

Epoch 00005: val_loss did not improve from 0.07893

Epoch 00006: val_loss did not improve from 0.07893

Epoch 00007: val_loss did not improve from 0.07893

Epoch 00008: val_loss did not improve from 0.07893

Epoch 00009: val_loss did not improve from 0.07893

Epoch 00010: val_loss did not improve from 0.07893




AlgoCrossValIter - 7
Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.43250, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.43250 to 0.36677, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.36677

Epoch 00004: val_loss improved from 0.36677 to 0.33225, saving model to best-model-conll.hdfs

Epoch 00005: val_loss d




Epoch 00001: val_loss improved from inf to 0.22529, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.22529 to 0.17868, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.17868

Epoch 00004: val_loss did not improve from 0.17868

Epoch 00005: val_loss did not improve from 0.17868

Epoch 00006: val_loss did not improve from 0.17868

Epoch 00007: val_loss did not improve from 0.17868

Epoch 00008: val_loss did not improve from 0.17868

Epoch 00009: val_loss did not improve from 0.17868

Epoch 00010: val_loss did not improve from 0.17868





Epoch 00001: val_loss improved from inf to 0.12847, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.12847 to 0.10355, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.10355 to 0.09869, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.09869

Epoch 00005: val_loss improved from 0.09869 to 0.09708, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.09708

Epoch 00007: val_loss did not improve from 0.09708

Epoch 00008: val_loss did not improve from 0.09708

Epoch 00009: val_loss did not improve from 0.09708

Epoch 00010: val_loss did not improve from 0.09708





Epoch 00001: val_loss improved from inf to 0.14337, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.14337 to 0.07917, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07917

Epoch 00004: val_loss did not improve from 0.07917

Epoch 00005: val_loss did not improve from 0.07917

Epoch 00006: val_loss did not improve from 0.07917

Epoch 00007: val_loss did not improve from 0.07917

Epoch 00008: val_loss did not improve from 0.07917

Epoch 00009: val_loss did not improve from 0.07917

Epoch 00010: val_loss did not improve from 0.07917





Epoch 00001: val_loss improved from inf to 0.08216, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.08216 to 0.06437, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06437

Epoch 00004: val_loss did not improve from 0.06437

Epoch 00005: val_loss did not improve from 0.06437

Epoch 00006: val_loss did not improve from 0.06437

Epoch 00007: val_loss did not improve from 0.06437

Epoch 00008: val_loss did not improve from 0.06437

Epoch 00009: val_loss did not improve from 0.06437

Epoch 00010: val_loss did not improve from 0.06437





Epoch 00001: val_loss improved from inf to 0.03683, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03683

Epoch 00003: val_loss did not improve from 0.03683

Epoch 00004: val_loss did not improve from 0.03683

Epoch 00005: val_loss did not improve from 0.03683

Epoch 00006: val_loss did not improve from 0.03683

Epoch 00007: val_loss did not improve from 0.03683

Epoch 00008: val_loss did not improve from 0.03683

Epoch 00009: val_loss did not improve from 0.03683

Epoch 00010: val_loss did not improve from 0.03683





Epoch 00001: val_loss improved from inf to 0.14461, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.14461 to 0.08794, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08794

Epoch 00004: val_loss did not improve from 0.08794

Epoch 00005: val_loss did not improve from 0.08794

Epoch 00006: val_loss did not improve from 0.08794

Epoch 00007: val_loss did not improve from 0.08794

Epoch 00008: val_loss did not improve from 0.08794

Epoch 00009: val_loss did not improve from 0.08794

Epoch 00010: val_loss did not improve from 0.08794

Epoch 00001: val_loss improved from inf to 0.07722, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07722

Epoch 00003: val_loss did not improve from 0.07722

Epoch 00004: val_loss did not improve from 0.07722

Epoch 00005: val_loss did not improve from 0.07722

Epoch 00006: val_loss did not improve from 0.07722

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.08436, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08436

Epoch 00003: val_loss did not improve from 0.08436

Epoch 00004: val_loss did not improve from 0.08436

Epoch 00005: val_loss did not improve from 0.08436

Epoch 00006: val_loss did not improve from 0.08436

Epoch 00007: val_loss did not improve from 0.08436

Epoch 00008: val_loss did not improve from 0.08436

Epoch 00009: val_loss did not improve from 0.08436

Epoch 00010: val_loss did not improve from 0.08436




AlgoCrossValIter - 8
Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.44038, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.44038 to 0.39434, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.39434 to 0.34648, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.34648

Epoch 00005: val_loss i




Epoch 00001: val_loss improved from inf to 0.12536, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12536

Epoch 00003: val_loss did not improve from 0.12536

Epoch 00004: val_loss did not improve from 0.12536

Epoch 00005: val_loss did not improve from 0.12536

Epoch 00006: val_loss did not improve from 0.12536

Epoch 00007: val_loss did not improve from 0.12536

Epoch 00008: val_loss did not improve from 0.12536

Epoch 00009: val_loss did not improve from 0.12536

Epoch 00010: val_loss did not improve from 0.12536





Epoch 00001: val_loss improved from inf to 0.12654, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.12654 to 0.08729, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.08729 to 0.07896, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.07896

Epoch 00005: val_loss did not improve from 0.07896

Epoch 00006: val_loss did not improve from 0.07896

Epoch 00007: val_loss did not improve from 0.07896

Epoch 00008: val_loss did not improve from 0.07896

Epoch 00009: val_loss did not improve from 0.07896

Epoch 00010: val_loss did not improve from 0.07896





Epoch 00001: val_loss improved from inf to 0.07974, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07974 to 0.07859, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07859

Epoch 00004: val_loss did not improve from 0.07859

Epoch 00005: val_loss did not improve from 0.07859

Epoch 00006: val_loss did not improve from 0.07859

Epoch 00007: val_loss did not improve from 0.07859

Epoch 00008: val_loss did not improve from 0.07859

Epoch 00009: val_loss did not improve from 0.07859

Epoch 00010: val_loss did not improve from 0.07859





Epoch 00001: val_loss improved from inf to 0.12501, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.12501 to 0.06778, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06778

Epoch 00004: val_loss did not improve from 0.06778

Epoch 00005: val_loss did not improve from 0.06778

Epoch 00006: val_loss did not improve from 0.06778

Epoch 00007: val_loss did not improve from 0.06778

Epoch 00008: val_loss did not improve from 0.06778

Epoch 00009: val_loss did not improve from 0.06778

Epoch 00010: val_loss did not improve from 0.06778





Epoch 00001: val_loss improved from inf to 0.02992, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02992

Epoch 00003: val_loss did not improve from 0.02992

Epoch 00004: val_loss did not improve from 0.02992

Epoch 00005: val_loss did not improve from 0.02992

Epoch 00006: val_loss did not improve from 0.02992

Epoch 00007: val_loss did not improve from 0.02992

Epoch 00008: val_loss did not improve from 0.02992

Epoch 00009: val_loss did not improve from 0.02992

Epoch 00010: val_loss did not improve from 0.02992





Epoch 00001: val_loss improved from inf to 0.09367, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09367 to 0.08390, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08390

Epoch 00004: val_loss did not improve from 0.08390

Epoch 00005: val_loss did not improve from 0.08390

Epoch 00006: val_loss did not improve from 0.08390

Epoch 00007: val_loss did not improve from 0.08390

Epoch 00008: val_loss did not improve from 0.08390

Epoch 00009: val_loss did not improve from 0.08390

Epoch 00010: val_loss did not improve from 0.08390





Epoch 00001: val_loss improved from inf to 0.08492, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08492

Epoch 00003: val_loss did not improve from 0.08492

Epoch 00004: val_loss did not improve from 0.08492

Epoch 00005: val_loss did not improve from 0.08492

Epoch 00006: val_loss did not improve from 0.08492

Epoch 00007: val_loss did not improve from 0.08492

Epoch 00008: val_loss did not improve from 0.08492

Epoch 00009: val_loss did not improve from 0.08492

Epoch 00010: val_loss did not improve from 0.08492

Epoch 00001: val_loss improved from inf to 0.06004, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06004

Epoch 00003: val_loss did not improve from 0.06004

Epoch 00004: val_loss did not improve from 0.06004

Epoch 00005: val_loss did not improve from 0.06004

Epoch 00006: val_loss did not improve from 0.06004

Epoch 00007: val_loss did not improve from 0.06004

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.08514, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08514

Epoch 00003: val_loss did not improve from 0.08514

Epoch 00004: val_loss did not improve from 0.08514

Epoch 00005: val_loss did not improve from 0.08514

Epoch 00006: val_loss did not improve from 0.08514

Epoch 00007: val_loss did not improve from 0.08514

Epoch 00008: val_loss did not improve from 0.08514

Epoch 00009: val_loss did not improve from 0.08514

Epoch 00010: val_loss did not improve from 0.08514




AlgoCrossValIter - 9
Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.43451, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.43451 to 0.39154, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.39154 to 0.29928, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.29928

Epoch 00005: val_loss d




Epoch 00001: val_loss improved from inf to 0.15639, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.15639

Epoch 00003: val_loss did not improve from 0.15639

Epoch 00004: val_loss did not improve from 0.15639

Epoch 00005: val_loss did not improve from 0.15639

Epoch 00006: val_loss did not improve from 0.15639

Epoch 00007: val_loss did not improve from 0.15639

Epoch 00008: val_loss did not improve from 0.15639

Epoch 00009: val_loss did not improve from 0.15639

Epoch 00010: val_loss did not improve from 0.15639





Epoch 00001: val_loss improved from inf to 0.07509, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07509

Epoch 00003: val_loss did not improve from 0.07509

Epoch 00004: val_loss did not improve from 0.07509

Epoch 00005: val_loss did not improve from 0.07509

Epoch 00006: val_loss did not improve from 0.07509

Epoch 00007: val_loss did not improve from 0.07509

Epoch 00008: val_loss did not improve from 0.07509

Epoch 00009: val_loss did not improve from 0.07509

Epoch 00010: val_loss did not improve from 0.07509





Epoch 00001: val_loss improved from inf to 0.07837, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07837

Epoch 00003: val_loss did not improve from 0.07837

Epoch 00004: val_loss did not improve from 0.07837

Epoch 00005: val_loss did not improve from 0.07837

Epoch 00006: val_loss did not improve from 0.07837

Epoch 00007: val_loss did not improve from 0.07837

Epoch 00008: val_loss did not improve from 0.07837

Epoch 00009: val_loss did not improve from 0.07837

Epoch 00010: val_loss did not improve from 0.07837





Epoch 00001: val_loss improved from inf to 0.10644, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10644 to 0.05850, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.05850

Epoch 00004: val_loss did not improve from 0.05850

Epoch 00005: val_loss did not improve from 0.05850

Epoch 00006: val_loss did not improve from 0.05850

Epoch 00007: val_loss did not improve from 0.05850

Epoch 00008: val_loss did not improve from 0.05850

Epoch 00009: val_loss did not improve from 0.05850

Epoch 00010: val_loss did not improve from 0.05850





Epoch 00001: val_loss improved from inf to 0.03811, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03811

Epoch 00003: val_loss did not improve from 0.03811

Epoch 00004: val_loss did not improve from 0.03811

Epoch 00005: val_loss did not improve from 0.03811

Epoch 00006: val_loss did not improve from 0.03811

Epoch 00007: val_loss did not improve from 0.03811

Epoch 00008: val_loss did not improve from 0.03811

Epoch 00009: val_loss did not improve from 0.03811

Epoch 00010: val_loss did not improve from 0.03811





Epoch 00001: val_loss improved from inf to 0.08699, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08699

Epoch 00003: val_loss did not improve from 0.08699

Epoch 00004: val_loss did not improve from 0.08699

Epoch 00005: val_loss did not improve from 0.08699

Epoch 00006: val_loss did not improve from 0.08699

Epoch 00007: val_loss did not improve from 0.08699

Epoch 00008: val_loss did not improve from 0.08699

Epoch 00009: val_loss did not improve from 0.08699

Epoch 00010: val_loss did not improve from 0.08699





Epoch 00001: val_loss improved from inf to 0.08102, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08102

Epoch 00003: val_loss did not improve from 0.08102

Epoch 00004: val_loss did not improve from 0.08102

Epoch 00005: val_loss did not improve from 0.08102

Epoch 00006: val_loss did not improve from 0.08102

Epoch 00007: val_loss did not improve from 0.08102

Epoch 00008: val_loss did not improve from 0.08102

Epoch 00009: val_loss did not improve from 0.08102

Epoch 00010: val_loss did not improve from 0.08102

Epoch 00001: val_loss improved from inf to 0.07085, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07085 to 0.06012, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06012

Epoch 00004: val_loss did not improve from 0.06012

Epoch 00005: val_loss did not improve from 0.06012

Epoch 00006: val_loss did not improve from 0.06012

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.08381, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08381

Epoch 00003: val_loss did not improve from 0.08381

Epoch 00004: val_loss did not improve from 0.08381

Epoch 00005: val_loss did not improve from 0.08381

Epoch 00006: val_loss did not improve from 0.08381

Epoch 00007: val_loss did not improve from 0.08381

Epoch 00008: val_loss did not improve from 0.08381

Epoch 00009: val_loss did not improve from 0.08381

Epoch 00010: val_loss did not improve from 0.08381




AlgoCrossValIter - 10
Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.52630, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.52630 to 0.40790, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.40790 to 0.36896, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.36896 to 0.34985, saving model to b




Epoch 00001: val_loss improved from inf to 0.13837, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.13837

Epoch 00003: val_loss did not improve from 0.13837

Epoch 00004: val_loss did not improve from 0.13837

Epoch 00005: val_loss did not improve from 0.13837

Epoch 00006: val_loss did not improve from 0.13837

Epoch 00007: val_loss did not improve from 0.13837

Epoch 00008: val_loss did not improve from 0.13837

Epoch 00009: val_loss did not improve from 0.13837

Epoch 00010: val_loss did not improve from 0.13837





Epoch 00001: val_loss improved from inf to 0.10037, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10037

Epoch 00003: val_loss improved from 0.10037 to 0.08301, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.08301

Epoch 00005: val_loss did not improve from 0.08301

Epoch 00006: val_loss did not improve from 0.08301

Epoch 00007: val_loss did not improve from 0.08301

Epoch 00008: val_loss did not improve from 0.08301

Epoch 00009: val_loss did not improve from 0.08301

Epoch 00010: val_loss did not improve from 0.08301





Epoch 00001: val_loss improved from inf to 0.10425, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10425

Epoch 00003: val_loss improved from 0.10425 to 0.09288, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.09288

Epoch 00005: val_loss did not improve from 0.09288

Epoch 00006: val_loss did not improve from 0.09288

Epoch 00007: val_loss did not improve from 0.09288

Epoch 00008: val_loss did not improve from 0.09288

Epoch 00009: val_loss did not improve from 0.09288

Epoch 00010: val_loss did not improve from 0.09288

Epoch 00001: val_loss improved from inf to 0.07366, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07366

Epoch 00003: val_loss did not improve from 0.07366

Epoch 00004: val_loss did not improve from 0.07366

Epoch 00005: val_loss did not improve from 0.07366

Epoch 00006: val_loss did not improve from 0.07366

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.10611, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10611 to 0.04759, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.04759 to 0.04225, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.04225

Epoch 00005: val_loss did not improve from 0.04225

Epoch 00006: val_loss improved from 0.04225 to 0.03642, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.03642

Epoch 00008: val_loss did not improve from 0.03642

Epoch 00009: val_loss did not improve from 0.03642

Epoch 00010: val_loss did not improve from 0.03642





Epoch 00001: val_loss improved from inf to 0.08256, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08256

Epoch 00003: val_loss did not improve from 0.08256

Epoch 00004: val_loss did not improve from 0.08256

Epoch 00005: val_loss did not improve from 0.08256

Epoch 00006: val_loss did not improve from 0.08256

Epoch 00007: val_loss did not improve from 0.08256

Epoch 00008: val_loss did not improve from 0.08256

Epoch 00009: val_loss did not improve from 0.08256

Epoch 00010: val_loss did not improve from 0.08256

Epoch 00001: val_loss improved from inf to 0.08078, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08078

Epoch 00003: val_loss did not improve from 0.08078

Epoch 00004: val_loss did not improve from 0.08078

Epoch 00005: val_loss did not improve from 0.08078

Epoch 00006: val_loss did not improve from 0.08078

Epoch 00007: val_loss did not improve from 0.08078

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.08002, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.08002 to 0.06687, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06687

Epoch 00004: val_loss did not improve from 0.06687

Epoch 00005: val_loss did not improve from 0.06687

Epoch 00006: val_loss did not improve from 0.06687

Epoch 00007: val_loss did not improve from 0.06687

Epoch 00008: val_loss did not improve from 0.06687

Epoch 00009: val_loss did not improve from 0.06687

Epoch 00010: val_loss did not improve from 0.06687





Epoch 00001: val_loss improved from inf to 0.08159, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08159

Epoch 00003: val_loss did not improve from 0.08159

Epoch 00004: val_loss did not improve from 0.08159

Epoch 00005: val_loss did not improve from 0.08159

Epoch 00006: val_loss did not improve from 0.08159

Epoch 00007: val_loss did not improve from 0.08159

Epoch 00008: val_loss did not improve from 0.08159

Epoch 00009: val_loss did not improve from 0.08159

Epoch 00010: val_loss did not improve from 0.08159




In [66]:
resultCrossVal.to_csv("results.csv")
resultCrossVal

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
P_test,87.468,85.864,86.099,85.261,82.481,87.193,87.162,84.92,84.812,83.19
P_train,89.051,87.796,88.162,86.77,85.625,88.212,88.029,88.544,86.805,86.083
P_ewo,78.938,78.155,79.673,76.452,73.532,79.324,79.343,78.099,77.792,75.034
R_test,78.488,77.96,77.961,81.293,82.009,83.738,77.127,81.988,77.96,80.46
R_train,77.086,83.385,82.176,85.221,85.736,82.702,81.866,85.515,82.475,85.715
R_ewo,62.408,66.76,65.186,68.518,70.65,67.502,65.834,68.426,65.74,69.907
F1-test,82.323,81.515,80.863,82.876,81.427,84.265,81.46,82.557,80.161,81.335
F1-train,81.476,85.263,84.546,85.703,85.225,84.408,84.385,86.962,84.046,85.5
F1-ewo,68.678,71.603,71.099,71.741,71.275,71.955,71.606,72.886,70.52,71.641


In [67]:
resultCrossVal.mean(axis=1).to_frame()

Unnamed: 0,0
P_test,85.445
P_train,87.5077
P_ewo,77.6342
R_test,79.8984
R_train,83.1877
R_ewo,67.0931
F1-test,81.8782
F1-train,84.7514
F1-ewo,71.3004


In [68]:
resultCrossVal.std(axis=1).to_frame()

Unnamed: 0,0
P_test,1.67597
P_train,1.122845
P_ewo,2.03117
R_test,2.279427
R_train,2.650276
R_ewo,2.441723
F1-test,1.16122
F1-train,1.428244
F1-ewo,1.103778


In [69]:
trainByTagResult.to_csv("results/train-by-tag.csv")
trainByTagResult

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
F1-O,97.852,98.143,98.075,98.163,98.084,98.087,98.074,98.303,98.066,98.152
P-O,97.073,97.86,97.675,98.079,98.152,97.767,97.648,98.086,97.784,98.174
R-O,98.677,98.439,98.495,98.259,98.036,98.437,98.522,98.522,98.369,98.147
F1-MISC,72.341,76.006,74.693,77.008,74.756,75.942,74.03,75.984,77.362222,76.678
P-MISC,97.273,93.66,91.528,87.584,91.25,93.472,95.5,97.5,84.632,86.675
R-MISC,60.443,66.493,65.511,71.715,66.26,65.059,63.352,63.17,61.039,70.716
F1-PER,84.902,88.763,88.188,88.838,88.844,87.39,87.854,90.21,88.149,88.743
P-PER,88.766,89.239,89.297,87.969,86.696,89.09,88.577,88.622,88.438,88.623
R-PER,83.377,88.694,87.82,90.094,91.578,87.547,87.604,91.943,88.225,89.702
F1-LOC,70.089,75.932,78.053333,78.185,79.707778,76.629,80.151111,79.042,79.094444,77.638


In [70]:
trainByTagResult.mean(axis=1).to_frame()

Unnamed: 0,0
F1-O,98.0999
P-O,97.8298
R-O,98.3903
F1-MISC,75.480022
P-MISC,91.9074
R-MISC,65.3758
F1-PER,88.1881
P-PER,88.5317
R-PER,88.6584
F1-LOC,77.452167


In [71]:
trainByTagResult.std(axis=1).to_frame()

Unnamed: 0,0
F1-O,0.112604
P-O,0.331116
R-O,0.192878
F1-MISC,1.533197
P-MISC,4.448103
R-MISC,3.696146
F1-PER,1.379519
P-PER,0.75765
R-PER,2.440284
F1-LOC,2.900851


In [72]:
testByTagResult.to_csv("results/test-by-tag.csv")
testByTagResult

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
F1-O,97.881,97.75,97.693,97.871,97.811,98.062,97.821,97.809,97.561,97.612
P-O,97.159,97.14,97.04,97.506,97.773,97.772,97.04,97.512,97.04,97.371
R-O,98.63,98.378,98.392,98.255,97.878,98.39,98.632,98.14,98.126,97.872
F1-MISC,57.037778,57.037778,57.037778,53.334444,57.037778,59.26,53.334444,57.037778,57.037778,66.6675
P-MISC,60.0,60.0,60.0,55.0,55.0,60.0,55.0,60.0,60.0,60.0
R-MISC,46.667,46.667,46.667,46.667,51.667,50.0,46.667,46.667,46.667,50.0
F1-PER,87.217,86.92,85.274,88.682,86.901,87.495,86.92,86.609,84.683,85.859
P-PER,88.07,88.682,85.118,87.432,83.206,87.729,88.682,85.507,86.182,85.229
R-PER,87.305,87.127,87.444,91.234,92.484,89.984,87.127,89.984,86.055,88.555
F1-LOC,78.75,71.90625,81.25125,72.50125,75.715714,80.741111,82.858571,77.407778,74.585,70.4175


In [73]:
testByTagResult.mean(axis=1).to_frame()

Unnamed: 0,0
F1-O,97.7871
P-O,97.3353
R-O,98.2693
F1-MISC,57.482306
P-MISC,58.5
R-MISC,47.8336
F1-PER,86.656
P-PER,86.5837
R-PER,88.7299
F1-LOC,76.613442


In [74]:
testByTagResult.std(axis=1).to_frame()

Unnamed: 0,0
F1-O,0.143619
P-O,0.2934
R-O,0.269055
F1-MISC,3.698695
P-MISC,2.415229
R-MISC,1.93242
F1-PER,1.143232
P-PER,1.818398
R-PER,2.09543
F1-LOC,4.283332


In [75]:
ewoByTagResult.to_csv("results/ewo-by-tag.csv")

In [76]:
ewoByTagResult = pd.read_csv("results/ewo-by-tag.csv", index_col=0)
ewoByTagResult

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
F1-O,96.908,97.081,97.084,97.079,96.886,97.15,97.145,97.236,97.025,97.018
P-O,95.83,96.331,96.128,96.565,96.796,96.418,96.21,96.542,96.247,96.737
R-O,98.035,97.858,98.076,97.616,97.006,97.911,98.11,97.944,97.836,97.321
F1-MISC,41.524444,41.228,42.044444,49.647,42.498,51.926,43.474444,42.833,39.46,48.981
P-MISC,60.002,63.336,54.669,61.892,60.557,69.407,58.892,67.979,56.669,60.669
R-MISC,28.75,33.75,30.0,46.25,36.25,43.75,31.25,32.5,28.75,43.75
F1-PER,75.488,78.339,78.125,77.22,77.325,76.959,78.122,78.364,77.054,77.609
P-PER,79.992,80.234,82.362,78.016,74.847,79.787,80.269,77.993,80.024,78.286
R-PER,73.375,77.0,75.25,77.125,81.125,76.0,76.5,78.875,75.0,78.5
F1-LOC,46.267,52.133,56.088889,57.388,59.341111,58.811,57.526667,58.181,59.501111,55.863


In [77]:
ewoByTagResult.mean(axis=1).to_frame()

Unnamed: 0,0
F1-O,97.0612
P-O,96.3804
R-O,97.7713
F1-MISC,44.361633
P-MISC,61.4072
R-MISC,35.5
F1-PER,77.4605
P-PER,79.181
R-PER,76.875
F1-LOC,56.110078


In [78]:
ewoByTagResult.std(axis=1).to_frame()

Unnamed: 0,0
F1-O,0.1075141
P-O,0.2939029
R-O,0.3575711
F1-MISC,4.221044
P-MISC,4.577276
R-MISC,6.697844
F1-PER,0.8738784
P-PER,2.013843
R-PER,2.213437
F1-LOC,4.08149


In [79]:
columns = en_fingerprints.columns

print("Pred", "Real", "Freq", "Word", sep="\t")
for c in columns:
    prediction = model.predict(en_fingerprints[c].values.reshape((1, 210)))
    pred_tag = int2tag[np.argmax(prediction)]
    real_tag = en_corpus[en_corpus.word == c].iloc[0]['ne-tag']
    
    if pred_tag != real_tag:
        print(pred_tag, real_tag, en_fingerprints[c].max(), c, sep="\t")

Pred	Real	Freq	Word
O	LOC	4170.0	Samaria
O	LOC	4170.0	Olivet
O	MISC	4170.0	Sabbath
PER	O	4170.0	upper
PER	O	4170.0	room
PER	O	4170.0	where
PER	O	4170.0	Zealot
LOC	O	4170.0	so
LOC	O	4170.0	language
O	MISC	4170.0	Psalms
PER	O	4170.0	forward
O	MISC	4170.0	ninth
O	PER	4170.0	Moses
O	PER	4170.0	Samuel
ORG	O	4170.0	captain
PER	O	4170.0	high-priestly
PER	O	4170.0	family
O	PER	4170.0	Pontius
O	PER	4170.0	Barnabas
O	LOC	4170.0	Cyprus
O	PER	4170.0	Elijah
O	MISC	4170.0	r
PER	O	4170.0	Ju
PER	O	4170.0	h
LOC	O	4170.0	deportation
O	PER	4170.0	Immanuel
