In [1]:
# import
import keras
import sys
import numpy as np
import string
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense
from keras.utils import np_utils, plot_model
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn import model_selection
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, precision_recall_fscore_support
import h5py as h5py

Using TensorFlow backend.


In [2]:
# if we are doeing binary classification. That means say if a token is a named entity or not
BINARY = False

# number of epochs for training
epochs = 10 

# the english side of the corpus
en_corpus_file = "corpus-en.txt"

# the ewondo side of the corpus
ewo_corpus_file = "corpus-ewo.txt"

# name of the file to same the model 
best_model_file = "best-model-conll.hdfs"

# The maximal number of phrases to use
max_nb_of_phrases =  -1

# the maximal number of duplicates for each word in the corpus
duplication = 1

# wether we are using only the vocabulary, ro redundancy
is_only_vocab = True

# if word should be shuffle or not
shuffle = is_only_vocab

# if we are using the Zennaki et al. signature
is_zennaki = False

# the number of neurons in the first layer
h1_size = 640

# number of neurons in the second layer
h2_size = 160  

In [3]:
def getTag(aString):
    """
        convert a string to a tag
    """
    tag = "O"
    if BINARY:
        if aString != "O":
            return "NE"
    else:
        tag = aString
    return tag
     

In [4]:
def load_corpus(file, max_nb_of_phrases):
    """
    Load a corpus stored in a file
    Input:
        - file: the name of the file of the corpus
        - max_nb_of_phases: maximal number of phrases to load
    
    Return:
        - a DataFrame representing the corpus
        - the number of phrases in the corpus
    """
    nb_of_phrases = 0
    dataset = {"word": [], "ne-tag": []}
    with open(file) as f:
        prev_line = None
        for cpt, line in enumerate(f):
            if cpt == 0:
                continue
            if nb_of_phrases == max_nb_of_phrases:
                break;

            l = line.strip()
            if len(l) == 0 and len(prev_line) != 0:
                nb_of_phrases += 1
                dataset["word"].append(line)
                dataset["ne-tag"].append(None)
            else:
                l = l.split("\t")
                if l[0] not in string.punctuation:
                    dataset["word"].append(l[0])
                    dataset["ne-tag"].append(ne_type(l[1]))
            prev_line = line.strip()
        
    return pd.DataFrame(dataset), nb_of_phrases+1

In [5]:
def corpus_fingerprint(aDataframe, nb_of_biphrases):
    """
    Create the distributionnal signature of each word in the corpus
    Input:
        -aDataFrame: the corpus DataFrame
        -nb_of_biphrases: number of phrases in the corpus
    Return:
        a DataFrame: corpus fingerprint, the columns are the words in the corpus
    """
    fingerprints = {}
    current_bi_phrase_index = 0
    nb_word_in_corpus = aDataframe[aDataframe.word != "\n"].word.size
    words_in_current_phrase = []
    for index, row in aDataframe.iterrows():
        if current_bi_phrase_index > nb_of_biphrases:
            break
            
        word = row['word']
        
        if word != "\n":
            words_in_current_phrase.append(word)
            if word not in fingerprints:
                fingerprints[word] = np.zeros(nb_of_biphrases, dtype=np.float32)
            fingerprints[word][current_bi_phrase_index] += 1
        else:
            nb_word_in_current_phrase = len(words_in_current_phrase)
#             for w in words_in_current_phrase:
#                 fingerprints[w][current_bi_phrase_index] = nb_word_in_corpus / fingerprints[w][current_bi_phrase_index]                
            current_bi_phrase_index += 1
            words_in_current_phrase = []
    for word in fingerprints:
        for i in range(nb_of_biphrases):
            if fingerprints[word][i] != 0:
                fingerprints[word][i] = nb_word_in_corpus / fingerprints[word][i]
#         fingerprints[word][nb_of_biphrases] = nb_word_in_corpus / aDataframe[aDataframe.word == word].word.size
    ret = pd.DataFrame(fingerprints)
    if is_zennaki:
        ret[ret > 0] = 1
        
    return ret

In [6]:
def corpus2trainingdata(aDataframe, fingerprintsDataFrame):
    """
    Convert corpus to training data => numpy array
    
    Input:
        -aDataFrame: Corpus dataframe
        -fingerprintsDataFrame: distributionnal signature of words in the corpus
    Return:
        (X, y): X is the array of words (signature) in the corpus and y is the corresponding labels (NE tags)
    """
    X = np.zeros((aDataframe.shape[0], fingerprintsDataFrame.shape[0]), dtype=np.int8)
    y = np.zeros(aDataframe.shape[0], dtype=np.int8)
    i = 0
    for row in aDataframe.iterrows():
        X[i] = fingerprintsDataFrame[row[1]['word']].values
        y[i] = tag2int[getTag(row[1]['ne-tag'])]
        i += 1
    return X, y

In [7]:
# def train_test_split(X, y, test_size = 0.33):
#     total = X.shape[0]
#     train_length = round(total * (1 - test_size)) 
#     return X[:train_length], X[train_length:], y[:train_length], y[train_length:]

In [8]:
# A utility function to convert NE tags
def ne_type(aType):
    aType = aType.lower()
    if 'per' in aType:
        t =  'NE' if BINARY else 'PER' 
    elif 'loc' in aType:
        t =  'NE' if BINARY else 'LOC'
    elif 'org' in aType:
        t =  'NE' if BINARY else 'ORG'
    elif 'hour' in aType:
        t =  'NE' if BINARY else 'MISC'
    elif aType != 'o' and len(aType) > 0 :
        t =  'NE' if BINARY else 'MISC'
    else:
        t = 'O'
    return t

In [9]:
def P_R_F1(y_pred, y_true, neg_class):
    same = y_pred[y_true==y_pred]
    tp = same[same != neg_class].size
    nb_of_pos_exple = y_true[y_true != neg_class].size
    nb_of_pos_pred = y_pred[y_pred != neg_class].size
    p = r = f1 = 0
    try:
        p = np.round(tp*100/nb_of_pos_pred, 2)
    except ZeroDivisionError:
        print("number of correct positive predictions is 0")
        
    try:
        r = np.round(tp*100/nb_of_pos_exple, 2)
    except ZeroDivisionError:
        print("number of position exple is 0")
        
    try:
        f1 = np.round(2*r*p/(r+p), 2)
    except ZeroDivisionError:
        print("Recall and precision are 0")

    return p, r, f1

In [10]:
def shuffle_data(X, y):
    indices = [i for i in  range(X.shape[0])]
    np.random.shuffle(indices)
    return X[indices], y[indices]

In [11]:
def create_model(input_dim, output_dim):
    model = Sequential()
    model.add(Dense(h1_size, input_dim=input_dim, activation='sigmoid', name="hidden1"))
    model.add(Dense(h2_size, activation='sigmoid', name="hidden2"))
    if BINARY:
        model.add(Dense(1, activation='sigmoid', name="outputlayer"))
        model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['binary_accuracy'])
    else:
        model.add(Dense(output_dim, activation='softmax', name="outputlayer"))
        model.compile(loss='categorical_crossentropy', optimizer="rmsprop", metrics=['accuracy'])
    model.summary()
    return model

In [12]:
def train_model(model, X_train, y_train, X_val, y_val, epochs=epochs):
    # stop learning if the error is the same between two consecutive epochs
    early_stop = EarlyStopping(patience=20, verbose=2)
    
    # saving best model
    best_model_cp = ModelCheckpoint(best_model_file, save_best_only=True, verbose=1)
    
    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, verbose=0, shuffle=shuffle, callbacks=[best_model_cp, early_stop])
    
    #loading and returning the best model
    return keras.models.load_model(best_model_file)

In [13]:
def predict(model, X, y, binary=BINARY):
    if BINARY:
        y_pred = np.round(model.predict(X))
        y_true = y
    else:
        predictions = model.predict(X)
        y_pred = np.array([np.argmax(p) for p in predictions])
        y_true = np.array([np.argmax(t) for t in y ])
    return y_true, y_pred

In [14]:
def model_performance(y_true, y_pred):
    return P_R_F1(y_pred, y_true, tag2int['O']) #precision, recall, f1-score

In [15]:
def model_performace_by_tag(y_true, y_pred, tag):
    p, r, f1 = 0, 0, 0
    
    eq = y_pred[y_pred==y_true]
    correctly_pred = eq[eq==tag].size
    try:
        p = np.round(100 * correctly_pred / y_pred[y_pred==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        r = np.round(100 * correctly_pred / y_true[y_true==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        f1 = np.round(2 * r * p / (r + p), 2)
    except ZeroDivisionError:
        pass
    
    return p, r, f1

In [16]:
def algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, epochs=epochs, model=None):
    """
    Train a model on (X, y) and validate on (X_val, y_val) then project on (X_ewo)
    """
    test_precision, train_precision, ewo_precision = [], [], []
    test_recall, train_recall, ewo_recall = [], [], []
    test_fscore, train_fscore, ewo_fscore = [], [], []
    
    test_result_by_tag = {}
    train_result_by_tag = {}
    ewo_result_by_tag = {}
    for t in tagSet:
        f1_key = "F1-"+t
        p_key = "P-"+t
        r_key = "R-"+t
        train_result_by_tag[f1_key], train_result_by_tag[p_key], train_result_by_tag[r_key] = [], [], []
        test_result_by_tag[f1_key], test_result_by_tag[p_key], test_result_by_tag[r_key] = [], [], []
        ewo_result_by_tag[f1_key], ewo_result_by_tag[p_key], ewo_result_by_tag[r_key] = [], [], []

    m = train_model(model, X_train, y_train, X_val, y_val, epochs=epochs)
        
    y_true, y_pred = predict(m, X_train, y_train)
    p_train, r_train, f1_train = model_performance(y_true, y_pred)
        
    y_true_val, y_pred_val = predict(m, X_val, y_val)
    p_val, r_val, f1_val = model_performance(y_true_val, y_pred_val)
        
    y_true_ewo, y_pred_ewo = predict(m, X_ewo, y_ewo) 
    p_ewo, r_ewo, f1_ewo = model_performance(y_true_ewo, y_pred_ewo)
        
    for t in range(len(int2tag)):
        f1_key = "F1-" + int2tag[t]
        p_key = "P-" + int2tag[t]
        r_key = "R-" + int2tag[t]
            
        p, r, f1 = model_performace_by_tag(y_true, y_pred, t)
        train_result_by_tag[p_key].append(p)
        train_result_by_tag[r_key].append(r)
        train_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_val, y_pred_val, t)
        test_result_by_tag[p_key].append(p)
        test_result_by_tag[r_key].append(r)
        test_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_ewo, y_pred_ewo, t)
        ewo_result_by_tag[p_key].append(p)
        ewo_result_by_tag[r_key].append(r)
        ewo_result_by_tag[f1_key].append(f1)
                
    test_precision.append(p_val)
    train_precision.append(p_train)
    ewo_precision.append(p_ewo)
        
    test_recall.append(r_val)
    train_recall.append(r_train)
    ewo_recall.append(r_ewo)
        
    test_fscore.append(f1_val)
    train_fscore.append(f1_train)
    ewo_fscore.append(f1_ewo)
    return pd.DataFrame({
        'P_val': test_precision, 
        'P_train': train_precision, 
        'P_ewo': ewo_precision, 'R_val': test_recall, 'R_train': train_recall, 
        'R_ewo': ewo_recall, 'F1-val': test_fscore, 'F1-train': train_fscore, 'F1-ewo': ewo_fscore}), pd.DataFrame(train_result_by_tag), pd.DataFrame(test_result_by_tag), pd.DataFrame(ewo_result_by_tag)

In [17]:
def algoCrossVal(X, y, X_ewo, y_ewo, k = 10, repeat=1): 
    """
    Traing a model with k-fold cross validation
    We train the model `repeat` times to check it's stability
    """
    block_size = int(X.shape[0] / k)   
    output = None
    model = None
    train_by_tags, test_by_tags, ewo_by_tags = None, None, None
    for it in range(repeat):
        print("AlgoCrossValIter -", it+1)
        model = create_model(X.shape[1], len(tagSet))
        results = None
        train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = None, None, None
        for i in range(k):
            X_val, y_val = X[i*block_size:i*block_size+block_size], y[i*block_size:i*block_size+block_size]
            X_train = np.concatenate((X[0:i*block_size], X[i*block_size+block_size:]))
            y_train = np.concatenate((y[0:i*block_size], y[i*block_size+block_size:]))

            X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
            X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])

            result, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)
            if results is None:
                results = result.copy()
                train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = train_by_tag.copy(), test_by_tag.copy(), ewo_by_tag.copy()
            else:
                results = pd.concat([results, result], ignore_index=True)
                train_by_tagsTmp = pd.concat([train_by_tagsTmp, train_by_tag], ignore_index=True)
                test_by_tagsTmp = pd.concat([test_by_tagsTmp, test_by_tag], ignore_index=True)
                ewo_by_tagsTmp = pd.concat([ewo_by_tagsTmp, ewo_by_tag], ignore_index=True)
        
        if output is None:
            output = results.mean(axis=0).to_frame()
            train_by_tags = train_by_tagsTmp.mean(axis=0).to_frame()
            test_by_tags = test_by_tagsTmp.mean(axis=0).to_frame()
            ewo_by_tags = ewo_by_tagsTmp.mean(axis=0).to_frame()
        else:
            output = pd.concat([output, results.mean(axis=0).to_frame()], axis=1)
            train_by_tags = pd.concat([train_by_tags, train_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            test_by_tags = pd.concat([test_by_tags, test_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            ewo_by_tags = pd.concat([ewo_by_tags, ewo_by_tagsTmp.mean(axis=0).to_frame()], axis=1)

    return output, train_by_tags, test_by_tags, ewo_by_tags, model

In [18]:
en_corpus, en_nb_of_phrases = load_corpus(en_corpus_file, max_nb_of_phrases)

In [19]:
en_corpus.head()
en_corpus.loc[en_corpus['ne-tag'] == 'ORG']

Unnamed: 0,word,ne-tag
1335,Sadducees,ORG


In [20]:
tagSet = en_corpus["ne-tag"].dropna().unique()
if BINARY:
    tagSet = ['NE', 'O']
tag2int = {j: i for i, j in enumerate(tagSet)}
int2tag = {i: j for i, j in enumerate(tagSet)}
print(tag2int)

{'O': 0, 'MISC': 1, 'PER': 2, 'LOC': 3, 'ORG': 4}


In [21]:
en_nb_of_phrases

210

In [22]:
en_corpus.describe()

Unnamed: 0,word,ne-tag
count,4379,4170
unique,904,5
top,the,O
freq,313,3779


In [23]:
en_corpus.head(10)

Unnamed: 0,word,ne-tag
0,The,O
1,Promise,O
2,of,O
3,the,O
4,Holy,MISC
5,Spirit,MISC
6,\n,
7,In,O
8,the,O
9,first,O


In [24]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(en_corpus[en_corpus['ne-tag']==tag].shape[0] * 100 / en_corpus[en_corpus['ne-tag']!='\n'].shape[0], 2)))

O % = 86.3 %
MISC % = 2.4 %
PER % = 5.59 %
LOC % = 0.91 %
ORG % = 0.02 %


In [25]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(en_corpus[en_corpus['ne-tag']==tag].word.unique().shape[0] * 100 / en_corpus[en_corpus['ne-tag']!='\n'].word.unique().shape[0], 2)))

O % = 89.16 %
MISC % = 1.88 %
PER % = 8.96 %
LOC % = 1.99 %
ORG % = 0.11 %


In [26]:
en_corpus[en_corpus.word == "\n"].shape

(209, 2)

In [27]:
print("Nb of bi-phrases", en_nb_of_phrases)
en_fingerprints = corpus_fingerprint(en_corpus, en_nb_of_phrases)

Nb of bi-phrases 210


In [28]:
en_fingerprints.head(5)

Unnamed: 0,The,Promise,of,the,Holy,Spirit,In,first,book,O,...,considered,dream,She,save,fulfill,Immanuel,us),woke,sleep,knew
0,4170.0,4170.0,4170.0,4170.0,4170.0,4170.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,4170.0,0.0,0.0,4170.0,4170.0,4170.0,4170.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,1390.0,4170.0,4170.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,4170.0,4170.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,4170.0,2085.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [29]:
en_fingerprints['you'].values.shape

(210,)

In [30]:
en_corpus[en_corpus.word != "\n"].shape

(4170, 2)

In [31]:
if is_only_vocab:
    text = list(en_corpus[en_corpus.word != "\n"].word.unique())
else:
    text = list(en_corpus[en_corpus.word != "\n"].word)
en_vocab = pd.DataFrame({'text': text})
en_vocab.describe()

Unnamed: 0,text
count,903
unique,903
top,(the
freq,1


In [32]:
if is_only_vocab:
    X = np.zeros((en_vocab.shape[0] * duplication, en_nb_of_phrases))
    target = np.zeros((en_vocab.shape[0] * duplication))
    p=0
    for i, row in en_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X[p] = en_fingerprints[c.split(" ")[0]]
            target[p] = tag2int[getTag(en_corpus[en_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X, target = shuffle_data(X, target)
    print(X.shape, en_fingerprints.shape, target.shape)

(903, 210) (210, 903) (903,)


In [33]:
en_vocab[-20:]

Unnamed: 0,text
883,Eliud
884,Eleazar
885,Matthan
886,husband
887,fourteen
888,unwilling
889,shame
890,resolved
891,divorce
892,quietly


In [34]:
if not is_only_vocab:
    X, target = corpus2trainingdata(en_corpus[en_corpus.word != "\n"], en_fingerprints)

In [35]:
if shuffle:
    X, target = shuffle_data(X, target)

In [36]:
y = target.copy()
y[0:100]
if not BINARY:
    y = np_utils.to_categorical(y, len(tagSet))
y.shape

(903, 5)

In [37]:
from sklearn.decomposition import PCA

def visualize(X, y):
    pca = PCA(n_components=2)
    X_embeded = pca.fit_transform(X)
    plt.figure(figsize=(5, 5))
    plt.scatter(X_embeded[:, 0], X_embeded[:, 1], c=y)
    plt.legend()
    plt.show()

In [38]:
# visualize(X, target)

In [39]:
X_train, X_val, y_train, y_val = model_selection.train_test_split(X, y, test_size=0.33)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])
print("X_train.shape =", X_train.shape)
print("y_train.shape =", y_train.shape)
print("X_val.shape =", X_val.shape)
print("y_val.shape =", y_val.shape)

tTarget = np.array([np.argmax(yy) for yy in y_train])
vTarget = np.array([np.argmax(yy) for yy in y_val])

for tag in tagSet:
    print("{0} % in training data = {1} %".format(tag, np.round(tTarget[tTarget==tag2int[tag]].size * 100 / tTarget.shape[0], 2)))
    print("{0} % in validation data = {1} %".format(tag, np.round(vTarget[vTarget==tag2int[tag]].size * 100 / vTarget.shape[0], 2)))

X_train.shape = (605, 210)
y_train.shape = (605, 5)
X_val.shape = (298, 210)
y_val.shape = (298, 5)
O % in training data = 87.93 %
O % in validation data = 88.93 %
MISC % in training data = 1.16 %
MISC % in validation data = 1.34 %
PER % in training data = 8.26 %
PER % in validation data = 9.06 %
LOC % in training data = 2.48 %
LOC % in validation data = 0.67 %
ORG % in training data = 0.17 %
ORG % in validation data = 0.0 %


In [40]:
ewo_corpus, ewo_nb_of_phrases = load_corpus(ewo_corpus_file, max_nb_of_phrases)

In [41]:
ewo_corpus.loc[ewo_corpus['ne-tag'] == 'PER']

Unnamed: 0,word,ne-tag
6,Teofil,PER
15,Yesus,PER
86,Yohannes,PER
104,Yesus,PER
230,Yesus,PER
...,...,...
3676,Maria,PER
3697,Yesus,PER
3740,Emmanuel,PER
3750,Yosef,PER


In [42]:
ewo_nb_of_phrases

210

In [43]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(ewo_corpus[ewo_corpus['ne-tag']==tag].shape[0] * 100 / ewo_corpus[ewo_corpus['ne-tag']!='\n'].shape[0], 2)))

O % = 84.15 %
MISC % = 2.54 %
PER % = 6.69 %
LOC % = 1.03 %
ORG % = 0.05 %


In [44]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(ewo_corpus[ewo_corpus['ne-tag']==tag].word.unique().shape[0] * 100 / ewo_corpus[ewo_corpus['ne-tag']!='\n'].word.unique().shape[0], 2)))

O % = 89.94 %
MISC % = 1.17 %
PER % = 8.3 %
LOC % = 1.86 %
ORG % = 0.2 %


In [45]:
ewo_corpus.describe()

Unnamed: 0,word,ne-tag
count,3779,3570
unique,1024,5
top,\n,O
freq,209,3180


In [46]:
ewo_corpus.head()

Unnamed: 0,word,ne-tag
0,Mfufub,MISC
1,Nsisim,MISC
2,ayi,O
3,sò,O
4,\n,


In [47]:
ewo_fingerprints = corpus_fingerprint(ewo_corpus, en_nb_of_phrases)

In [48]:
if is_only_vocab:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word.unique())
else:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word)
ewo_vocab = pd.DataFrame({"text":text})

In [49]:
if is_only_vocab:
    X_ewo = np.zeros((ewo_vocab.shape[0] * duplication, en_nb_of_phrases))
    ewo_target = np.zeros((ewo_vocab.shape[0] * duplication))
    p=0
    for i, row in ewo_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X_ewo[p] = ewo_fingerprints[c.split(" ")[0]]
            ewo_target[p] = tag2int[getTag(ewo_corpus[ewo_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [50]:
ewo_vocab[-10:]

Unnamed: 0,text
1013,sik
1014,Ntud
1015,bëyole
1016,Emmanuel
1017,Avëbë
1018,angavëbë
1019,oyò
1020,angabende
1021,anganòṅ
1022,angayole


In [51]:
if not is_only_vocab:
    X_ewo, ewo_target = corpus2trainingdata(ewo_corpus[ewo_corpus.word != "\n"], ewo_fingerprints)

In [52]:
if shuffle:
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [53]:
y_ewo = ewo_target.copy()
print(y_ewo.shape, len(ewo_vocab))

(1023,) 1023


In [54]:
X_ewo.shape

(1023, 210)

In [55]:
y_ewo = ewo_target.copy()
y_ewo[:20]
if not BINARY:
    y_ewo = np_utils.to_categorical(y_ewo)

In [56]:
X_ewo = X_ewo.reshape((X_ewo.shape[0], en_nb_of_phrases))

In [57]:
# model = create_model(X.shape[1], len(tagSet))
# resultEval, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)

In [58]:
# resultEval

In [59]:
# train_by_tag

In [60]:
# test_by_tag

In [61]:
# ewo_by_tag

In [62]:
# resultEval.mean()

In [63]:
# resultEval.std()

In [64]:
resultCrossVal, trainByTagResult, testByTagResult, ewoByTagResult, model = algoCrossVal(X, y, X_ewo, y_ewo, repeat=10)

AlgoCrossValIter - 1
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.29419, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.29419

Epoch 00003: val_loss did not improve from 0.29419

Epoch 00004: val_loss improved from 0.29419 to 0.23311, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.23311

Epoch 00006: v




Epoch 00001: val_loss improved from inf to 0.08584, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.08584 to 0.08438, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08438

Epoch 00004: val_loss did not improve from 0.08438

Epoch 00005: val_loss did not improve from 0.08438

Epoch 00006: val_loss did not improve from 0.08438

Epoch 00007: val_loss did not improve from 0.08438

Epoch 00008: val_loss did not improve from 0.08438

Epoch 00009: val_loss did not improve from 0.08438

Epoch 00010: val_loss did not improve from 0.08438





Epoch 00001: val_loss improved from inf to 0.11388, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.11388 to 0.10864, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.10864

Epoch 00004: val_loss did not improve from 0.10864

Epoch 00005: val_loss did not improve from 0.10864

Epoch 00006: val_loss did not improve from 0.10864

Epoch 00007: val_loss did not improve from 0.10864

Epoch 00008: val_loss did not improve from 0.10864

Epoch 00009: val_loss did not improve from 0.10864

Epoch 00010: val_loss did not improve from 0.10864





Epoch 00001: val_loss improved from inf to 0.10010, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10010 to 0.06423, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06423

Epoch 00004: val_loss did not improve from 0.06423

Epoch 00005: val_loss did not improve from 0.06423

Epoch 00006: val_loss did not improve from 0.06423

Epoch 00007: val_loss did not improve from 0.06423

Epoch 00008: val_loss did not improve from 0.06423

Epoch 00009: val_loss did not improve from 0.06423

Epoch 00010: val_loss did not improve from 0.06423





Epoch 00001: val_loss improved from inf to 0.18781, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.18781 to 0.18374, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.18374

Epoch 00004: val_loss improved from 0.18374 to 0.15634, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.15634

Epoch 00006: val_loss did not improve from 0.15634

Epoch 00007: val_loss did not improve from 0.15634

Epoch 00008: val_loss did not improve from 0.15634

Epoch 00009: val_loss did not improve from 0.15634

Epoch 00010: val_loss did not improve from 0.15634

Epoch 00001: val_loss improved from inf to 0.05820, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05820

Epoch 00003: val_loss did not improve from 0.05820

Epoch 00004: val_loss did not improve from 0.05820

Epoch 00005: val_loss did not improve from 0.05820

Epoch 00006: val_loss did not improve from 0.05820

Epo




Epoch 00001: val_loss improved from inf to 0.14936, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.14936

Epoch 00003: val_loss did not improve from 0.14936

Epoch 00004: val_loss did not improve from 0.14936

Epoch 00005: val_loss did not improve from 0.14936

Epoch 00006: val_loss did not improve from 0.14936

Epoch 00007: val_loss did not improve from 0.14936

Epoch 00008: val_loss did not improve from 0.14936

Epoch 00009: val_loss did not improve from 0.14936

Epoch 00010: val_loss did not improve from 0.14936





Epoch 00001: val_loss improved from inf to 0.02456, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02456

Epoch 00003: val_loss did not improve from 0.02456

Epoch 00004: val_loss did not improve from 0.02456

Epoch 00005: val_loss did not improve from 0.02456

Epoch 00006: val_loss did not improve from 0.02456

Epoch 00007: val_loss did not improve from 0.02456

Epoch 00008: val_loss did not improve from 0.02456

Epoch 00009: val_loss did not improve from 0.02456

Epoch 00010: val_loss did not improve from 0.02456





Epoch 00001: val_loss improved from inf to 0.03976, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03976

Epoch 00003: val_loss did not improve from 0.03976

Epoch 00004: val_loss did not improve from 0.03976

Epoch 00005: val_loss did not improve from 0.03976

Epoch 00006: val_loss did not improve from 0.03976

Epoch 00007: val_loss did not improve from 0.03976

Epoch 00008: val_loss did not improve from 0.03976

Epoch 00009: val_loss did not improve from 0.03976

Epoch 00010: val_loss did not improve from 0.03976

Epoch 00001: val_loss improved from inf to 0.06814, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06814

Epoch 00003: val_loss did not improve from 0.06814

Epoch 00004: val_loss did not improve from 0.06814

Epoch 00005: val_loss did not improve from 0.06814

Epoch 00006: val_loss did not improve from 0.06814

Epoch 00007: val_loss did not improve from 0.06814

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.09024, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09024

Epoch 00003: val_loss did not improve from 0.09024

Epoch 00004: val_loss did not improve from 0.09024

Epoch 00005: val_loss did not improve from 0.09024

Epoch 00006: val_loss did not improve from 0.09024

Epoch 00007: val_loss did not improve from 0.09024

Epoch 00008: val_loss did not improve from 0.09024

Epoch 00009: val_loss did not improve from 0.09024

Epoch 00010: val_loss did not improve from 0.09024





Epoch 00001: val_loss improved from inf to 0.09060, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09060

Epoch 00003: val_loss did not improve from 0.09060

Epoch 00004: val_loss did not improve from 0.09060

Epoch 00005: val_loss did not improve from 0.09060

Epoch 00006: val_loss did not improve from 0.09060

Epoch 00007: val_loss did not improve from 0.09060

Epoch 00008: val_loss did not improve from 0.09060

Epoch 00009: val_loss did not improve from 0.09060

Epoch 00010: val_loss did not improve from 0.09060





Epoch 00001: val_loss improved from inf to 0.05555, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05555 to 0.04293, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.04293

Epoch 00004: val_loss did not improve from 0.04293

Epoch 00005: val_loss did not improve from 0.04293

Epoch 00006: val_loss did not improve from 0.04293

Epoch 00007: val_loss did not improve from 0.04293

Epoch 00008: val_loss did not improve from 0.04293

Epoch 00009: val_loss did not improve from 0.04293

Epoch 00010: val_loss did not improve from 0.04293





Epoch 00001: val_loss improved from inf to 0.12329, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12329

Epoch 00003: val_loss did not improve from 0.12329

Epoch 00004: val_loss did not improve from 0.12329

Epoch 00005: val_loss did not improve from 0.12329

Epoch 00006: val_loss did not improve from 0.12329

Epoch 00007: val_loss did not improve from 0.12329

Epoch 00008: val_loss did not improve from 0.12329

Epoch 00009: val_loss did not improve from 0.12329

Epoch 00010: val_loss did not improve from 0.12329





Epoch 00001: val_loss improved from inf to 0.05213, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05213

Epoch 00003: val_loss did not improve from 0.05213

Epoch 00004: val_loss did not improve from 0.05213

Epoch 00005: val_loss did not improve from 0.05213

Epoch 00006: val_loss did not improve from 0.05213

Epoch 00007: val_loss did not improve from 0.05213

Epoch 00008: val_loss did not improve from 0.05213

Epoch 00009: val_loss did not improve from 0.05213

Epoch 00010: val_loss did not improve from 0.05213





Epoch 00001: val_loss improved from inf to 0.16377, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.16377

Epoch 00003: val_loss did not improve from 0.16377

Epoch 00004: val_loss did not improve from 0.16377

Epoch 00005: val_loss did not improve from 0.16377

Epoch 00006: val_loss did not improve from 0.16377

Epoch 00007: val_loss did not improve from 0.16377

Epoch 00008: val_loss did not improve from 0.16377

Epoch 00009: val_loss did not improve from 0.16377

Epoch 00010: val_loss did not improve from 0.16377





Epoch 00001: val_loss improved from inf to 0.02684, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02684

Epoch 00003: val_loss did not improve from 0.02684

Epoch 00004: val_loss did not improve from 0.02684

Epoch 00005: val_loss did not improve from 0.02684

Epoch 00006: val_loss did not improve from 0.02684

Epoch 00007: val_loss did not improve from 0.02684

Epoch 00008: val_loss did not improve from 0.02684

Epoch 00009: val_loss did not improve from 0.02684

Epoch 00010: val_loss did not improve from 0.02684





Epoch 00001: val_loss improved from inf to 0.04152, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04152

Epoch 00003: val_loss did not improve from 0.04152

Epoch 00004: val_loss did not improve from 0.04152

Epoch 00005: val_loss did not improve from 0.04152

Epoch 00006: val_loss did not improve from 0.04152

Epoch 00007: val_loss did not improve from 0.04152

Epoch 00008: val_loss did not improve from 0.04152

Epoch 00009: val_loss did not improve from 0.04152

Epoch 00010: val_loss did not improve from 0.04152





Epoch 00001: val_loss improved from inf to 0.06816, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06816

Epoch 00003: val_loss did not improve from 0.06816

Epoch 00004: val_loss did not improve from 0.06816

Epoch 00005: val_loss did not improve from 0.06816

Epoch 00006: val_loss did not improve from 0.06816

Epoch 00007: val_loss did not improve from 0.06816

Epoch 00008: val_loss did not improve from 0.06816

Epoch 00009: val_loss did not improve from 0.06816

Epoch 00010: val_loss did not improve from 0.06816
AlgoCrossValIter - 3
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
ou




Epoch 00001: val_loss improved from inf to 0.08093, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08093

Epoch 00003: val_loss did not improve from 0.08093

Epoch 00004: val_loss did not improve from 0.08093

Epoch 00005: val_loss did not improve from 0.08093

Epoch 00006: val_loss did not improve from 0.08093

Epoch 00007: val_loss did not improve from 0.08093

Epoch 00008: val_loss did not improve from 0.08093

Epoch 00009: val_loss did not improve from 0.08093

Epoch 00010: val_loss did not improve from 0.08093





Epoch 00001: val_loss improved from inf to 0.12521, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12521

Epoch 00003: val_loss did not improve from 0.12521

Epoch 00004: val_loss did not improve from 0.12521

Epoch 00005: val_loss did not improve from 0.12521

Epoch 00006: val_loss did not improve from 0.12521

Epoch 00007: val_loss did not improve from 0.12521

Epoch 00008: val_loss did not improve from 0.12521

Epoch 00009: val_loss did not improve from 0.12521

Epoch 00010: val_loss did not improve from 0.12521

Epoch 00001: val_loss improved from inf to 0.09288, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09288

Epoch 00003: val_loss improved from 0.09288 to 0.07771, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.07771

Epoch 00005: val_loss did not improve from 0.07771

Epoch 00006: val_loss did not improve from 0.07771

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.07783, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07783 to 0.07259, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07259

Epoch 00004: val_loss did not improve from 0.07259

Epoch 00005: val_loss did not improve from 0.07259

Epoch 00006: val_loss did not improve from 0.07259

Epoch 00007: val_loss did not improve from 0.07259

Epoch 00008: val_loss did not improve from 0.07259

Epoch 00009: val_loss did not improve from 0.07259

Epoch 00010: val_loss did not improve from 0.07259





Epoch 00001: val_loss improved from inf to 0.14646, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.14646

Epoch 00003: val_loss did not improve from 0.14646

Epoch 00004: val_loss did not improve from 0.14646

Epoch 00005: val_loss did not improve from 0.14646

Epoch 00006: val_loss did not improve from 0.14646

Epoch 00007: val_loss did not improve from 0.14646

Epoch 00008: val_loss did not improve from 0.14646

Epoch 00009: val_loss did not improve from 0.14646

Epoch 00010: val_loss did not improve from 0.14646





Epoch 00001: val_loss improved from inf to 0.01989, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.01989

Epoch 00003: val_loss did not improve from 0.01989

Epoch 00004: val_loss did not improve from 0.01989

Epoch 00005: val_loss did not improve from 0.01989

Epoch 00006: val_loss did not improve from 0.01989

Epoch 00007: val_loss did not improve from 0.01989

Epoch 00008: val_loss did not improve from 0.01989

Epoch 00009: val_loss did not improve from 0.01989

Epoch 00010: val_loss did not improve from 0.01989

Epoch 00001: val_loss improved from inf to 0.04750, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04750

Epoch 00003: val_loss did not improve from 0.04750

Epoch 00004: val_loss did not improve from 0.04750

Epoch 00005: val_loss improved from 0.04750 to 0.04445, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.04445

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.06485, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06485

Epoch 00003: val_loss did not improve from 0.06485

Epoch 00004: val_loss did not improve from 0.06485

Epoch 00005: val_loss did not improve from 0.06485

Epoch 00006: val_loss did not improve from 0.06485

Epoch 00007: val_loss did not improve from 0.06485

Epoch 00008: val_loss did not improve from 0.06485

Epoch 00009: val_loss did not improve from 0.06485

Epoch 00010: val_loss did not improve from 0.06485




AlgoCrossValIter - 4
Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.30519, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.30519 to 0.26850, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.26850 to 0.25258, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.25258 to 0.24820, saving model to bes




Epoch 00001: val_loss improved from inf to 0.11403, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.11403 to 0.08352, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08352

Epoch 00004: val_loss did not improve from 0.08352

Epoch 00005: val_loss did not improve from 0.08352

Epoch 00006: val_loss did not improve from 0.08352

Epoch 00007: val_loss did not improve from 0.08352

Epoch 00008: val_loss did not improve from 0.08352

Epoch 00009: val_loss did not improve from 0.08352

Epoch 00010: val_loss did not improve from 0.08352





Epoch 00001: val_loss improved from inf to 0.13694, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.13694 to 0.11520, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.11520

Epoch 00004: val_loss did not improve from 0.11520

Epoch 00005: val_loss did not improve from 0.11520

Epoch 00006: val_loss did not improve from 0.11520

Epoch 00007: val_loss did not improve from 0.11520

Epoch 00008: val_loss did not improve from 0.11520

Epoch 00009: val_loss did not improve from 0.11520

Epoch 00010: val_loss did not improve from 0.11520





Epoch 00001: val_loss improved from inf to 0.05523, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05523 to 0.05457, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.05457

Epoch 00004: val_loss did not improve from 0.05457

Epoch 00005: val_loss did not improve from 0.05457

Epoch 00006: val_loss did not improve from 0.05457

Epoch 00007: val_loss did not improve from 0.05457

Epoch 00008: val_loss did not improve from 0.05457

Epoch 00009: val_loss did not improve from 0.05457

Epoch 00010: val_loss did not improve from 0.05457





Epoch 00001: val_loss improved from inf to 0.11583, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11583

Epoch 00003: val_loss did not improve from 0.11583

Epoch 00004: val_loss did not improve from 0.11583

Epoch 00005: val_loss did not improve from 0.11583

Epoch 00006: val_loss did not improve from 0.11583

Epoch 00007: val_loss did not improve from 0.11583

Epoch 00008: val_loss did not improve from 0.11583

Epoch 00009: val_loss did not improve from 0.11583

Epoch 00010: val_loss did not improve from 0.11583





Epoch 00001: val_loss improved from inf to 0.05589, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05589

Epoch 00003: val_loss did not improve from 0.05589

Epoch 00004: val_loss did not improve from 0.05589

Epoch 00005: val_loss did not improve from 0.05589

Epoch 00006: val_loss did not improve from 0.05589

Epoch 00007: val_loss did not improve from 0.05589

Epoch 00008: val_loss did not improve from 0.05589

Epoch 00009: val_loss did not improve from 0.05589

Epoch 00010: val_loss did not improve from 0.05589





Epoch 00001: val_loss improved from inf to 0.14171, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.14171

Epoch 00003: val_loss did not improve from 0.14171

Epoch 00004: val_loss did not improve from 0.14171

Epoch 00005: val_loss did not improve from 0.14171

Epoch 00006: val_loss did not improve from 0.14171

Epoch 00007: val_loss did not improve from 0.14171

Epoch 00008: val_loss did not improve from 0.14171

Epoch 00009: val_loss did not improve from 0.14171

Epoch 00010: val_loss did not improve from 0.14171





Epoch 00001: val_loss improved from inf to 0.02181, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02181

Epoch 00003: val_loss did not improve from 0.02181

Epoch 00004: val_loss did not improve from 0.02181

Epoch 00005: val_loss did not improve from 0.02181

Epoch 00006: val_loss did not improve from 0.02181

Epoch 00007: val_loss did not improve from 0.02181

Epoch 00008: val_loss did not improve from 0.02181

Epoch 00009: val_loss did not improve from 0.02181

Epoch 00010: val_loss did not improve from 0.02181





Epoch 00001: val_loss improved from inf to 0.05861, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05861 to 0.03394, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.03394

Epoch 00004: val_loss did not improve from 0.03394

Epoch 00005: val_loss did not improve from 0.03394

Epoch 00006: val_loss did not improve from 0.03394

Epoch 00007: val_loss did not improve from 0.03394

Epoch 00008: val_loss did not improve from 0.03394

Epoch 00009: val_loss did not improve from 0.03394

Epoch 00010: val_loss did not improve from 0.03394





Epoch 00001: val_loss improved from inf to 0.06559, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06559

Epoch 00003: val_loss did not improve from 0.06559

Epoch 00004: val_loss did not improve from 0.06559

Epoch 00005: val_loss did not improve from 0.06559

Epoch 00006: val_loss did not improve from 0.06559

Epoch 00007: val_loss did not improve from 0.06559

Epoch 00008: val_loss did not improve from 0.06559

Epoch 00009: val_loss did not improve from 0.06559

Epoch 00010: val_loss did not improve from 0.06559





Epoch 00001: val_loss improved from inf to 0.09761, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09761

Epoch 00003: val_loss did not improve from 0.09761

Epoch 00004: val_loss did not improve from 0.09761

Epoch 00005: val_loss did not improve from 0.09761

Epoch 00006: val_loss did not improve from 0.09761

Epoch 00007: val_loss did not improve from 0.09761

Epoch 00008: val_loss did not improve from 0.09761

Epoch 00009: val_loss did not improve from 0.09761

Epoch 00010: val_loss did not improve from 0.09761
AlgoCrossValIter - 5
Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
ou




Epoch 00001: val_loss improved from inf to 0.08195, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08195

Epoch 00003: val_loss did not improve from 0.08195

Epoch 00004: val_loss did not improve from 0.08195

Epoch 00005: val_loss did not improve from 0.08195

Epoch 00006: val_loss did not improve from 0.08195

Epoch 00007: val_loss did not improve from 0.08195

Epoch 00008: val_loss did not improve from 0.08195

Epoch 00009: val_loss did not improve from 0.08195

Epoch 00010: val_loss did not improve from 0.08195





Epoch 00001: val_loss improved from inf to 0.08471, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08471

Epoch 00003: val_loss did not improve from 0.08471

Epoch 00004: val_loss did not improve from 0.08471

Epoch 00005: val_loss did not improve from 0.08471

Epoch 00006: val_loss did not improve from 0.08471

Epoch 00007: val_loss did not improve from 0.08471

Epoch 00008: val_loss did not improve from 0.08471

Epoch 00009: val_loss did not improve from 0.08471

Epoch 00010: val_loss did not improve from 0.08471





Epoch 00001: val_loss improved from inf to 0.06860, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06860

Epoch 00003: val_loss did not improve from 0.06860

Epoch 00004: val_loss did not improve from 0.06860

Epoch 00005: val_loss did not improve from 0.06860

Epoch 00006: val_loss did not improve from 0.06860

Epoch 00007: val_loss did not improve from 0.06860

Epoch 00008: val_loss did not improve from 0.06860

Epoch 00009: val_loss did not improve from 0.06860

Epoch 00010: val_loss did not improve from 0.06860

Epoch 00001: val_loss improved from inf to 0.16444, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.16444 to 0.14140, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.14140

Epoch 00004: val_loss did not improve from 0.14140

Epoch 00005: val_loss did not improve from 0.14140

Epoch 00006: val_loss did not improve from 0.14140

Epoch 00007: val_loss did not improve from 0.1




Epoch 00001: val_loss improved from inf to 0.05643, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05643

Epoch 00003: val_loss did not improve from 0.05643

Epoch 00004: val_loss did not improve from 0.05643

Epoch 00005: val_loss did not improve from 0.05643

Epoch 00006: val_loss did not improve from 0.05643

Epoch 00007: val_loss did not improve from 0.05643

Epoch 00008: val_loss did not improve from 0.05643

Epoch 00009: val_loss did not improve from 0.05643

Epoch 00010: val_loss did not improve from 0.05643





Epoch 00001: val_loss improved from inf to 0.15460, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.15460

Epoch 00003: val_loss did not improve from 0.15460

Epoch 00004: val_loss did not improve from 0.15460

Epoch 00005: val_loss did not improve from 0.15460

Epoch 00006: val_loss did not improve from 0.15460

Epoch 00007: val_loss did not improve from 0.15460

Epoch 00008: val_loss did not improve from 0.15460

Epoch 00009: val_loss did not improve from 0.15460

Epoch 00010: val_loss did not improve from 0.15460





Epoch 00001: val_loss improved from inf to 0.02117, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02117

Epoch 00003: val_loss did not improve from 0.02117

Epoch 00004: val_loss did not improve from 0.02117

Epoch 00005: val_loss did not improve from 0.02117

Epoch 00006: val_loss did not improve from 0.02117

Epoch 00007: val_loss did not improve from 0.02117

Epoch 00008: val_loss did not improve from 0.02117

Epoch 00009: val_loss did not improve from 0.02117

Epoch 00010: val_loss did not improve from 0.02117

Epoch 00001: val_loss improved from inf to 0.04101, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.04101 to 0.03415, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.03415

Epoch 00004: val_loss did not improve from 0.03415

Epoch 00005: val_loss did not improve from 0.03415

Epoch 00006: val_loss did not improve from 0.03415

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.06650, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06650

Epoch 00003: val_loss did not improve from 0.06650

Epoch 00004: val_loss did not improve from 0.06650

Epoch 00005: val_loss did not improve from 0.06650

Epoch 00006: val_loss did not improve from 0.06650

Epoch 00007: val_loss did not improve from 0.06650

Epoch 00008: val_loss did not improve from 0.06650

Epoch 00009: val_loss did not improve from 0.06650

Epoch 00010: val_loss did not improve from 0.06650
AlgoCrossValIter - 6
Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
ou




Epoch 00001: val_loss improved from inf to 0.08738, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08738

Epoch 00003: val_loss did not improve from 0.08738

Epoch 00004: val_loss did not improve from 0.08738

Epoch 00005: val_loss did not improve from 0.08738

Epoch 00006: val_loss did not improve from 0.08738

Epoch 00007: val_loss did not improve from 0.08738

Epoch 00008: val_loss did not improve from 0.08738

Epoch 00009: val_loss did not improve from 0.08738

Epoch 00010: val_loss did not improve from 0.08738





Epoch 00001: val_loss improved from inf to 0.12287, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12287

Epoch 00003: val_loss did not improve from 0.12287

Epoch 00004: val_loss did not improve from 0.12287

Epoch 00005: val_loss did not improve from 0.12287

Epoch 00006: val_loss did not improve from 0.12287

Epoch 00007: val_loss did not improve from 0.12287

Epoch 00008: val_loss did not improve from 0.12287

Epoch 00009: val_loss did not improve from 0.12287

Epoch 00010: val_loss did not improve from 0.12287

Epoch 00001: val_loss improved from inf to 0.06153, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06153

Epoch 00003: val_loss did not improve from 0.06153

Epoch 00004: val_loss did not improve from 0.06153

Epoch 00005: val_loss did not improve from 0.06153

Epoch 00006: val_loss did not improve from 0.06153

Epoch 00007: val_loss did not improve from 0.06153

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.05013, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05013

Epoch 00003: val_loss did not improve from 0.05013

Epoch 00004: val_loss did not improve from 0.05013

Epoch 00005: val_loss did not improve from 0.05013

Epoch 00006: val_loss did not improve from 0.05013

Epoch 00007: val_loss did not improve from 0.05013

Epoch 00008: val_loss did not improve from 0.05013

Epoch 00009: val_loss did not improve from 0.05013

Epoch 00010: val_loss did not improve from 0.05013





Epoch 00001: val_loss improved from inf to 0.13882, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.13882

Epoch 00003: val_loss did not improve from 0.13882

Epoch 00004: val_loss did not improve from 0.13882

Epoch 00005: val_loss did not improve from 0.13882

Epoch 00006: val_loss did not improve from 0.13882

Epoch 00007: val_loss did not improve from 0.13882

Epoch 00008: val_loss did not improve from 0.13882

Epoch 00009: val_loss did not improve from 0.13882

Epoch 00010: val_loss did not improve from 0.13882





Epoch 00001: val_loss improved from inf to 0.02605, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02605

Epoch 00003: val_loss did not improve from 0.02605

Epoch 00004: val_loss did not improve from 0.02605

Epoch 00005: val_loss did not improve from 0.02605

Epoch 00006: val_loss did not improve from 0.02605

Epoch 00007: val_loss did not improve from 0.02605

Epoch 00008: val_loss did not improve from 0.02605

Epoch 00009: val_loss did not improve from 0.02605

Epoch 00010: val_loss did not improve from 0.02605





Epoch 00001: val_loss improved from inf to 0.04456, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04456

Epoch 00003: val_loss did not improve from 0.04456

Epoch 00004: val_loss did not improve from 0.04456

Epoch 00005: val_loss did not improve from 0.04456

Epoch 00006: val_loss did not improve from 0.04456

Epoch 00007: val_loss did not improve from 0.04456

Epoch 00008: val_loss did not improve from 0.04456

Epoch 00009: val_loss improved from 0.04456 to 0.04427, saving model to best-model-conll.hdfs

Epoch 00010: val_loss did not improve from 0.04427





Epoch 00001: val_loss improved from inf to 0.06752, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06752

Epoch 00003: val_loss did not improve from 0.06752

Epoch 00004: val_loss did not improve from 0.06752

Epoch 00005: val_loss did not improve from 0.06752

Epoch 00006: val_loss did not improve from 0.06752

Epoch 00007: val_loss did not improve from 0.06752

Epoch 00008: val_loss did not improve from 0.06752

Epoch 00009: val_loss did not improve from 0.06752

Epoch 00010: val_loss did not improve from 0.06752
AlgoCrossValIter - 7
Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
ou




Epoch 00001: val_loss improved from inf to 0.08683, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08683

Epoch 00003: val_loss did not improve from 0.08683

Epoch 00004: val_loss did not improve from 0.08683

Epoch 00005: val_loss did not improve from 0.08683

Epoch 00006: val_loss did not improve from 0.08683

Epoch 00007: val_loss did not improve from 0.08683

Epoch 00008: val_loss did not improve from 0.08683

Epoch 00009: val_loss did not improve from 0.08683

Epoch 00010: val_loss did not improve from 0.08683





Epoch 00001: val_loss improved from inf to 0.15137, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.15137 to 0.11146, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.11146

Epoch 00004: val_loss did not improve from 0.11146

Epoch 00005: val_loss did not improve from 0.11146

Epoch 00006: val_loss did not improve from 0.11146

Epoch 00007: val_loss did not improve from 0.11146

Epoch 00008: val_loss did not improve from 0.11146

Epoch 00009: val_loss did not improve from 0.11146

Epoch 00010: val_loss did not improve from 0.11146

Epoch 00001: val_loss improved from inf to 0.10641, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10641 to 0.07489, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.07489 to 0.07430, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.07430

Epoch 00005: val_loss did not improve from 0.07430

Epoch 00006:




Epoch 00001: val_loss improved from inf to 0.06029, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06029

Epoch 00003: val_loss did not improve from 0.06029

Epoch 00004: val_loss did not improve from 0.06029

Epoch 00005: val_loss did not improve from 0.06029

Epoch 00006: val_loss did not improve from 0.06029

Epoch 00007: val_loss did not improve from 0.06029

Epoch 00008: val_loss did not improve from 0.06029

Epoch 00009: val_loss did not improve from 0.06029

Epoch 00010: val_loss did not improve from 0.06029





Epoch 00001: val_loss improved from inf to 0.15110, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.15110

Epoch 00003: val_loss did not improve from 0.15110

Epoch 00004: val_loss did not improve from 0.15110

Epoch 00005: val_loss did not improve from 0.15110

Epoch 00006: val_loss did not improve from 0.15110

Epoch 00007: val_loss did not improve from 0.15110

Epoch 00008: val_loss did not improve from 0.15110

Epoch 00009: val_loss did not improve from 0.15110

Epoch 00010: val_loss did not improve from 0.15110





Epoch 00001: val_loss improved from inf to 0.03737, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.03737 to 0.03187, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.03187

Epoch 00004: val_loss did not improve from 0.03187

Epoch 00005: val_loss did not improve from 0.03187

Epoch 00006: val_loss did not improve from 0.03187

Epoch 00007: val_loss did not improve from 0.03187

Epoch 00008: val_loss did not improve from 0.03187

Epoch 00009: val_loss did not improve from 0.03187

Epoch 00010: val_loss did not improve from 0.03187

Epoch 00001: val_loss improved from inf to 0.04569, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04569

Epoch 00003: val_loss improved from 0.04569 to 0.04336, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.04336

Epoch 00005: val_loss did not improve from 0.04336

Epoch 00006: val_loss improved from 0.04336 to 0.04224,




Epoch 00001: val_loss improved from inf to 0.07363, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07363

Epoch 00003: val_loss did not improve from 0.07363

Epoch 00004: val_loss did not improve from 0.07363

Epoch 00005: val_loss did not improve from 0.07363

Epoch 00006: val_loss did not improve from 0.07363

Epoch 00007: val_loss did not improve from 0.07363

Epoch 00008: val_loss did not improve from 0.07363

Epoch 00009: val_loss did not improve from 0.07363

Epoch 00010: val_loss did not improve from 0.07363




AlgoCrossValIter - 8
Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.27183, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.27183

Epoch 00003: val_loss did not improve from 0.27183

Epoch 00004: val_loss improved from 0.27183 to 0.24617, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.24617

Epoch 00006: v




Epoch 00001: val_loss improved from inf to 0.07984, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07984

Epoch 00003: val_loss did not improve from 0.07984

Epoch 00004: val_loss did not improve from 0.07984

Epoch 00005: val_loss did not improve from 0.07984

Epoch 00006: val_loss did not improve from 0.07984

Epoch 00007: val_loss did not improve from 0.07984

Epoch 00008: val_loss did not improve from 0.07984

Epoch 00009: val_loss did not improve from 0.07984

Epoch 00010: val_loss did not improve from 0.07984





Epoch 00001: val_loss improved from inf to 0.12785, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.12785 to 0.10458, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.10458 to 0.09434, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.09434

Epoch 00005: val_loss did not improve from 0.09434

Epoch 00006: val_loss did not improve from 0.09434

Epoch 00007: val_loss did not improve from 0.09434

Epoch 00008: val_loss did not improve from 0.09434

Epoch 00009: val_loss did not improve from 0.09434

Epoch 00010: val_loss did not improve from 0.09434

Epoch 00001: val_loss improved from inf to 0.07530, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07530

Epoch 00003: val_loss did not improve from 0.07530

Epoch 00004: val_loss did not improve from 0.07530

Epoch 00005: val_loss did not improve from 0.07530

Epoch 00006: val_loss did not improve from 0.07530

Epo




Epoch 00001: val_loss improved from inf to 0.11512, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11512

Epoch 00003: val_loss did not improve from 0.11512

Epoch 00004: val_loss did not improve from 0.11512

Epoch 00005: val_loss did not improve from 0.11512

Epoch 00006: val_loss did not improve from 0.11512

Epoch 00007: val_loss did not improve from 0.11512

Epoch 00008: val_loss did not improve from 0.11512

Epoch 00009: val_loss did not improve from 0.11512

Epoch 00010: val_loss did not improve from 0.11512

Epoch 00001: val_loss improved from inf to 0.05039, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05039

Epoch 00003: val_loss did not improve from 0.05039

Epoch 00004: val_loss did not improve from 0.05039

Epoch 00005: val_loss did not improve from 0.05039

Epoch 00006: val_loss did not improve from 0.05039

Epoch 00007: val_loss did not improve from 0.05039

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.14149, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.14149

Epoch 00003: val_loss did not improve from 0.14149

Epoch 00004: val_loss did not improve from 0.14149

Epoch 00005: val_loss did not improve from 0.14149

Epoch 00006: val_loss did not improve from 0.14149

Epoch 00007: val_loss did not improve from 0.14149

Epoch 00008: val_loss did not improve from 0.14149

Epoch 00009: val_loss did not improve from 0.14149

Epoch 00010: val_loss did not improve from 0.14149





Epoch 00001: val_loss improved from inf to 0.03200, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.03200 to 0.03094, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.03094 to 0.02664, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.02664

Epoch 00005: val_loss did not improve from 0.02664

Epoch 00006: val_loss did not improve from 0.02664

Epoch 00007: val_loss did not improve from 0.02664

Epoch 00008: val_loss did not improve from 0.02664

Epoch 00009: val_loss did not improve from 0.02664

Epoch 00010: val_loss did not improve from 0.02664

Epoch 00001: val_loss improved from inf to 0.04221, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.04221 to 0.04051, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.04051

Epoch 00004: val_loss did not improve from 0.04051

Epoch 00005: val_loss did not improve from 0.04051

Epoch 00006:



AlgoCrossValIter - 9
Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.28154, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.28154

Epoch 00003: val_loss did not improve from 0.28154

Epoch 00004: val_loss did not improve from 0.28154

Epoch 00005: val_loss improved from 0.28154 to 0.26731, saving model to best-model-conll.hdfs

Epoch 00006: v




Epoch 00001: val_loss improved from inf to 0.09206, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09206 to 0.08798, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08798

Epoch 00004: val_loss did not improve from 0.08798

Epoch 00005: val_loss did not improve from 0.08798

Epoch 00006: val_loss did not improve from 0.08798

Epoch 00007: val_loss did not improve from 0.08798

Epoch 00008: val_loss did not improve from 0.08798

Epoch 00009: val_loss did not improve from 0.08798

Epoch 00010: val_loss did not improve from 0.08798





Epoch 00001: val_loss improved from inf to 0.21540, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.21540 to 0.13632, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.13632 to 0.11826, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.11826

Epoch 00005: val_loss did not improve from 0.11826

Epoch 00006: val_loss did not improve from 0.11826

Epoch 00007: val_loss did not improve from 0.11826

Epoch 00008: val_loss did not improve from 0.11826

Epoch 00009: val_loss did not improve from 0.11826

Epoch 00010: val_loss did not improve from 0.11826





Epoch 00001: val_loss improved from inf to 0.05625, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05625

Epoch 00003: val_loss improved from 0.05625 to 0.05362, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.05362

Epoch 00005: val_loss did not improve from 0.05362

Epoch 00006: val_loss did not improve from 0.05362

Epoch 00007: val_loss did not improve from 0.05362

Epoch 00008: val_loss did not improve from 0.05362

Epoch 00009: val_loss did not improve from 0.05362

Epoch 00010: val_loss did not improve from 0.05362

Epoch 00001: val_loss improved from inf to 0.11795, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11795

Epoch 00003: val_loss did not improve from 0.11795

Epoch 00004: val_loss did not improve from 0.11795

Epoch 00005: val_loss did not improve from 0.11795

Epoch 00006: val_loss did not improve from 0.11795

Epoch 00007: val_loss did not improve from 0.1




Epoch 00001: val_loss improved from inf to 0.16257, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.16257

Epoch 00003: val_loss did not improve from 0.16257

Epoch 00004: val_loss did not improve from 0.16257

Epoch 00005: val_loss did not improve from 0.16257

Epoch 00006: val_loss did not improve from 0.16257

Epoch 00007: val_loss did not improve from 0.16257

Epoch 00008: val_loss did not improve from 0.16257

Epoch 00009: val_loss did not improve from 0.16257

Epoch 00010: val_loss did not improve from 0.16257





Epoch 00001: val_loss improved from inf to 0.02151, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02151

Epoch 00003: val_loss did not improve from 0.02151

Epoch 00004: val_loss did not improve from 0.02151

Epoch 00005: val_loss did not improve from 0.02151

Epoch 00006: val_loss did not improve from 0.02151

Epoch 00007: val_loss did not improve from 0.02151

Epoch 00008: val_loss did not improve from 0.02151

Epoch 00009: val_loss did not improve from 0.02151

Epoch 00010: val_loss did not improve from 0.02151

Epoch 00001: val_loss improved from inf to 0.04207, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04207

Epoch 00003: val_loss improved from 0.04207 to 0.03951, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.03951

Epoch 00005: val_loss did not improve from 0.03951

Epoch 00006: val_loss did not improve from 0.03951

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.07055, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07055

Epoch 00003: val_loss did not improve from 0.07055

Epoch 00004: val_loss did not improve from 0.07055

Epoch 00005: val_loss did not improve from 0.07055

Epoch 00006: val_loss did not improve from 0.07055

Epoch 00007: val_loss did not improve from 0.07055

Epoch 00008: val_loss did not improve from 0.07055

Epoch 00009: val_loss did not improve from 0.07055

Epoch 00010: val_loss did not improve from 0.07055




AlgoCrossValIter - 10
Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.28832, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.28832 to 0.25328, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.25328

Epoch 00004: val_loss improved from 0.25328 to 0.23485, saving model to best-model-conll.hdfs

Epoch 00005: val_loss




Epoch 00001: val_loss improved from inf to 0.08560, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08560

Epoch 00003: val_loss did not improve from 0.08560

Epoch 00004: val_loss did not improve from 0.08560

Epoch 00005: val_loss did not improve from 0.08560

Epoch 00006: val_loss did not improve from 0.08560

Epoch 00007: val_loss did not improve from 0.08560

Epoch 00008: val_loss did not improve from 0.08560

Epoch 00009: val_loss did not improve from 0.08560

Epoch 00010: val_loss did not improve from 0.08560





Epoch 00001: val_loss improved from inf to 0.08252, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08252

Epoch 00003: val_loss did not improve from 0.08252

Epoch 00004: val_loss did not improve from 0.08252

Epoch 00005: val_loss did not improve from 0.08252

Epoch 00006: val_loss did not improve from 0.08252

Epoch 00007: val_loss did not improve from 0.08252

Epoch 00008: val_loss did not improve from 0.08252

Epoch 00009: val_loss did not improve from 0.08252

Epoch 00010: val_loss did not improve from 0.08252

Epoch 00001: val_loss improved from inf to 0.09320, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09320 to 0.05967, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.05967

Epoch 00004: val_loss did not improve from 0.05967

Epoch 00005: val_loss did not improve from 0.05967

Epoch 00006: val_loss did not improve from 0.05967

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.13099, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.13099 to 0.12896, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.12896

Epoch 00004: val_loss did not improve from 0.12896

Epoch 00005: val_loss did not improve from 0.12896

Epoch 00006: val_loss did not improve from 0.12896

Epoch 00007: val_loss did not improve from 0.12896

Epoch 00008: val_loss did not improve from 0.12896

Epoch 00009: val_loss did not improve from 0.12896

Epoch 00010: val_loss did not improve from 0.12896

Epoch 00001: val_loss improved from inf to 0.11199, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.11199 to 0.06618, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06618

Epoch 00004: val_loss did not improve from 0.06618

Epoch 00005: val_loss did not improve from 0.06618

Epoch 00006: val_loss did not improve from 0.06618

Epo




Epoch 00001: val_loss improved from inf to 0.18500, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.18500 to 0.18020, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.18020

Epoch 00004: val_loss did not improve from 0.18020

Epoch 00005: val_loss did not improve from 0.18020

Epoch 00006: val_loss did not improve from 0.18020

Epoch 00007: val_loss did not improve from 0.18020

Epoch 00008: val_loss did not improve from 0.18020

Epoch 00009: val_loss did not improve from 0.18020

Epoch 00010: val_loss did not improve from 0.18020





Epoch 00001: val_loss improved from inf to 0.01547, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.01547

Epoch 00003: val_loss did not improve from 0.01547

Epoch 00004: val_loss did not improve from 0.01547

Epoch 00005: val_loss did not improve from 0.01547

Epoch 00006: val_loss did not improve from 0.01547

Epoch 00007: val_loss did not improve from 0.01547

Epoch 00008: val_loss did not improve from 0.01547

Epoch 00009: val_loss did not improve from 0.01547

Epoch 00010: val_loss did not improve from 0.01547

Epoch 00001: val_loss improved from inf to 0.04193, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04193

Epoch 00003: val_loss did not improve from 0.04193

Epoch 00004: val_loss improved from 0.04193 to 0.03771, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.03771

Epoch 00006: val_loss did not improve from 0.03771

Epoch 00007: val_loss did not improve from 0.0



In [65]:
resultCrossVal.to_csv("results.csv")
resultCrossVal

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
P_val,82.069,85.724,79.706,82.784,84.504,84.769,78.601,85.746,81.353,81.553
P_train,87.964,87.661,86.345,88.557,87.611,87.581,86.966,87.588,87.307,87.454
P_ewo,80.936,77.373,76.763,78.735,78.116,79.0,76.332,77.473,76.939,78.176
R_val,82.089,78.792,81.879,80.776,81.982,77.234,79.816,80.175,82.032,83.941
R_train,82.289,82.166,85.766,81.329,82.101,79.348,85.231,82.098,85.89,83.967
R_ewo,66.109,64.351,70.555,66.481,67.316,65.0,68.981,66.943,69.352,69.353
F1-val,81.29,78.416,80.168,80.998,82.181,79.52,78.823,79.293,81.113,82.181
F1-train,84.596,83.967,85.724,84.5,84.147,82.033,85.731,83.989,86.449,85.297
F1-ewo,72.475,69.507,72.823,71.702,71.659,69.814,71.889,70.949,72.672,73.072


In [66]:
resultCrossVal.mean(axis=1).to_frame()

Unnamed: 0,0
P_val,82.6809
P_train,87.5034
P_ewo,77.9843
R_val,80.8716
R_train,83.0185
R_ewo,67.4441
F1-val,80.3983
F1-train,84.6433
F1-ewo,71.6562


In [67]:
resultCrossVal.std(axis=1).to_frame()

Unnamed: 0,0
P_val,2.477797
P_train,0.581523
P_ewo,1.34624
R_val,1.934
R_train,2.130835
R_ewo,2.050534
F1-val,1.353137
F1-train,1.247536
F1-ewo,1.230961


In [68]:
trainByTagResult.to_csv("results/train-by-tag.csv")
trainByTagResult

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
F1-O,98.088,98.05,98.132,98.076,98.041,97.866,98.156,98.03,98.231,98.125
P-O,97.671,97.663,98.131,97.546,97.651,97.324,98.052,97.645,98.128,97.881
R-O,98.524,98.467,98.149,98.623,98.456,98.456,98.274,98.44,98.342,98.384
F1-MISC,73.673,78.417778,76.274,73.819,75.54,78.245556,75.33,78.56,77.624,77.568
P-MISC,94.25,87.5,91.261,96.528,93.889,87.639,90.102,83.523,88.301,91.301
R-MISC,63.539,59.562,67.127,62.198,65.448,59.721,68.627,63.062,71.647,69.175
F1-PER,88.399,87.719,88.952,88.459,88.1,85.786,89.18,88.041,89.762,88.593
P-PER,88.692,87.812,86.538,88.225,88.463,87.735,87.418,88.541,88.016,88.581
R-PER,88.399,88.491,92.004,89.015,88.269,85.448,91.514,88.111,91.723,89.032
F1-LOC,80.464444,81.544444,77.421,77.877778,77.55,78.494444,75.88,79.536667,77.488,81.992222


In [69]:
trainByTagResult.mean(axis=1).to_frame()

Unnamed: 0,0
F1-O,98.0795
P-O,97.7692
R-O,98.4115
F1-MISC,76.505133
P-MISC,90.4294
R-MISC,65.0106
F1-PER,88.2991
P-PER,88.0021
R-PER,89.2006
F1-LOC,78.8249


In [70]:
trainByTagResult.std(axis=1).to_frame()

Unnamed: 0,0
F1-O,0.096667
P-O,0.268835
R-O,0.132766
F1-MISC,1.852729
P-MISC,3.857364
R-MISC,4.091635
F1-PER,1.065766
P-PER,0.662994
R-PER,2.025379
F1-LOC,1.991227


In [71]:
testByTagResult.to_csv("results/test-by-tag.csv")
testByTagResult

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
F1-O,97.93,97.938,97.854,97.935,97.922,98.003,97.798,97.865,97.797,98.047
P-O,97.754,97.544,98.005,97.636,97.624,97.552,97.873,97.661,97.743,98.112
R-O,98.125,98.376,97.72,98.251,98.251,98.485,97.734,98.102,97.861,97.99
F1-MISC,61.111111,61.111111,63.0,61.111111,61.111111,60.0,60.0,60.0,60.0,60.0
P-MISC,60.0,60.0,66.667,60.0,60.0,65.0,65.0,65.0,65.0,65.0
R-MISC,53.333,53.333,63.333,53.333,53.333,58.333,58.333,58.333,58.333,58.333
F1-PER,82.186,80.446,79.54,81.432,83.521,81.731,82.208,79.406,83.122,83.469
P-PER,80.912,84.932,77.614,80.023,83.685,84.644,78.667,85.038,81.094,81.023
R-PER,86.4,85.083,84.845,85.844,86.94,83.178,88.94,82.988,87.829,88.94
F1-LOC,68.667,64.667,67.334,69.334,65.334,63.704444,55.926667,68.667,62.667,65.334


In [72]:
testByTagResult.mean(axis=1).to_frame()

Unnamed: 0,0
F1-O,97.9089
P-O,97.7504
R-O,98.0895
F1-MISC,60.744444
P-MISC,63.1667
R-MISC,56.833
F1-PER,81.7061
P-PER,81.7632
R-PER,86.0987
F1-LOC,65.163511


In [73]:
testByTagResult.std(axis=1).to_frame()

Unnamed: 0,0
F1-O,0.081536
P-O,0.191397
R-O,0.261854
F1-MISC,0.965879
P-MISC,2.772263
R-MISC,3.374743
F1-PER,1.510508
P-PER,2.673092
R-PER,2.12952
F1-LOC,3.953033


In [74]:
ewoByTagResult.to_csv("results/ewo-by-tag.csv")

In [75]:
ewoByTagResult = pd.read_csv("results/ewo-by-tag.csv", index_col=0)
ewoByTagResult

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
F1-O,97.248,96.926,97.12,97.149,97.149,97.026,97.082,97.073,97.162,97.253
P-O,96.233,96.03,96.798,96.316,96.425,96.2,96.627,96.376,96.645,96.657
R-O,98.296,97.855,97.475,98.01,97.901,97.911,97.562,97.803,97.694,97.869
F1-MISC,42.665556,40.977778,48.041,43.286667,47.904444,49.494444,46.21,52.784444,54.275,50.192
P-MISC,57.336,60.003,64.924,60.003,60.003,63.097,62.079,61.153,64.924,65.003
R-MISC,30.0,27.5,41.25,30.0,35.0,36.25,42.5,41.25,48.75,42.5
F1-PER,79.054,75.679,78.402,78.603,77.903,76.243,78.035,76.543,77.992,78.32
P-PER,82.103,77.698,76.964,78.37,79.027,79.832,77.669,78.277,77.322,78.748
R-PER,76.5,74.875,81.125,79.625,77.75,75.25,79.25,75.875,79.125,78.625
F1-LOC,60.317778,57.435556,55.901,50.177778,57.75,53.504444,52.064,60.253333,55.072,64.368889


In [76]:
ewoByTagResult.mean(axis=1).to_frame()

Unnamed: 0,0
F1-O,97.1188
P-O,96.4307
R-O,97.8376
F1-MISC,47.583133
P-MISC,61.8525
R-MISC,37.5
F1-PER,77.6774
P-PER,78.601
R-PER,77.8
F1-LOC,56.684478


In [77]:
ewoByTagResult.std(axis=1).to_frame()

Unnamed: 0,0
F1-O,0.098674
P-O,0.244865
R-O,0.230892
F1-MISC,4.355907
P-MISC,2.614999
R-MISC,6.871843
F1-PER,1.120786
P-PER,1.495936
R-PER,2.091816
F1-LOC,4.26484


In [78]:
columns = en_fingerprints.columns

print("Pred", "Real", "Freq", "Word", sep="\t")
for c in columns:
    prediction = model.predict(en_fingerprints[c].values.reshape((1, 210)))
    pred_tag = int2tag[np.argmax(prediction)]
    real_tag = en_corpus[en_corpus.word == c].iloc[0]['ne-tag']
    
    if pred_tag != real_tag:
        print(pred_tag, real_tag, en_fingerprints[c].max(), c, sep="\t")

Pred	Real	Freq	Word
O	LOC	4170.0	Samaria
O	LOC	4170.0	Olivet
O	MISC	4170.0	Sabbath
PER	O	4170.0	upper
PER	O	4170.0	room
PER	O	4170.0	where
PER	O	4170.0	Zealot
LOC	O	4170.0	so
LOC	O	4170.0	language
O	MISC	4170.0	Psalms
PER	O	4170.0	forward
MISC	O	4170.0	going
O	PER	4170.0	Moses
PER	O	4170.0	proclaimed
ORG	O	4170.0	captain
PER	O	4170.0	high-priestly
PER	O	4170.0	family
O	PER	4170.0	Pontius
O	PER	4170.0	Barnabas
O	LOC	4170.0	Cyprus
O	PER	4170.0	Elijah
O	MISC	4170.0	r
PER	O	4170.0	Ju
PER	O	4170.0	h
LOC	O	4170.0	deportation
PER	O	4170.0	us)
