In [292]:
# import
import keras
import sys
import numpy as np
import string
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense
from keras.utils import np_utils, plot_model
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn import model_selection
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, precision_recall_fscore_support
import h5py as h5py

In [378]:
# if we are doeing binary classification. That means say if a token is a named entity or not
BINARY = False

# number of epochs for training
epochs = 10 

# the english side of the corpus
en_corpus_file = "corpus-en.txt"

# the ewondo side of the corpus
ewo_corpus_file = "corpus-ewo.txt"

# name of the file to same the model 
best_model_file = "best-model-conll.hdfs"

# The maximal number of phrases to use
max_nb_of_phrases =  -1

# the maximal number of duplicates for each word in the corpus
duplication = 1

# wether we are using only the vocabulary, ro redundancy
is_only_vocab = True

# if word should be shuffle or not
shuffle = is_only_vocab

# normalization strategy
# log, max, mean_log, 
# None: for no normalization
normalization_strategy = None

# if we are using the Zennaki et al. signature
is_zennaki = False

# the number of neurons in the first layer
h1_size = 640

# number of neurons in the second layer
h2_size = 160  

In [379]:
def getTag(aString):
    """
        convert a string to a tag
    """
    tag = "O"
    if BINARY:
        if aString != "O":
            return "NE"
    else:
        tag = aString
    return tag
     

In [380]:
def load_corpus(file, max_nb_of_phrases):
    """
    Load a corpus stored in a file
    Input:
        - file: the name of the file of the corpus
        - max_nb_of_phases: maximal number of phrases to load
    
    Return:
        - a DataFrame representing the corpus
        - the number of phrases in the corpus
    """
    nb_of_phrases = 0
    dataset = {"word": [], "ne-tag": []}
    with open(file) as f:
        prev_line = None
        for cpt, line in enumerate(f):
            if cpt == 0:
                continue
            if nb_of_phrases == max_nb_of_phrases:
                break;

            l = line.strip()
            if len(l) == 0 and len(prev_line) != 0:
                nb_of_phrases += 1
                dataset["word"].append(line)
                dataset["ne-tag"].append(None)
            else:
                l = l.split("\t")
                if l[0] not in string.punctuation:
                    dataset["word"].append(l[0])
                    dataset["ne-tag"].append(ne_type(l[1]))
            prev_line = line.strip()
        
    return pd.DataFrame(dataset), nb_of_phrases+1

In [381]:
def log_normalization(fingerprints):
    fingerprints[fingerprints > 0] = 1 + np.log(fingerprints[fingerprints > 0])
    return fingerprints

In [382]:
def max_normalization(fingerprints):
    maxis = fingerprints.max(axis = 1)
    fingerprints[fingerprints > 0] = fingerprints[fingerprints > 0].apply(lambda row: 0.5 + 0.5 * row / maxis[row.index])
    return fingerprints

In [383]:
def mean_log_normalization(fingerprints):
    means = fingerprints.mean(axis=1)
    fingerprints[fingerprints > 0] = fingerprints[fingerprints > 0].apply(lambda row: (1 + np.log(row)) / 1 + np.log(means[row.index]))
    return fingerprints

In [384]:
def normalize(fingerprints):
    if normalization_strategy == "log":
        return log_normalization(fingerprints)
    elif normalization_strategy == "max":
        return max_normalization(fingerprints)
    elif normalization_strategy == "mean_log":
        return mean_log_normalization(fingerprints)
    elif normalization_strategy == "log_inv":
        fp = log_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "max_inv":
        fp = max_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "mean_log_inv":
        fp = mean_log_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "tf":
        fp = fingerprints
        fp[fp > 0] = 1 / fp[fp > 0]
        return fp
    else:
        return fingerprints

In [385]:
def corpus_fingerprint(aDataframe, nb_of_biphrases):
    """
    Create the distributionnal signature of each word in the corpus
    Input:
        -aDataFrame: the corpus DataFrame
        -nb_of_biphrases: number of phrases in the corpus
    Return:
        a DataFrame: corpus fingerprint, the columns are the words in the corpus
    """
    print("Normalization strategy:", normalization_strategy)
    total_per_phrase = [0] # nb of words per phrase
    fingerprints = {}
    current_bi_phrase_index = 0
    nb_word_in_corpus = aDataframe[aDataframe.word != "\n"].word.size
    for index, row in aDataframe.iterrows():
        if current_bi_phrase_index > nb_of_biphrases:
            break
            
        word = row['word']
        
        if word != "\n":
            if word not in fingerprints:
                fingerprints[word] = np.zeros(nb_of_biphrases, dtype=np.float32)
            total_per_phrase[current_bi_phrase_index] += 1
            fingerprints[word][current_bi_phrase_index] = +1
        else:
            current_bi_phrase_index += 1
            total_per_phrase.append(0)
        
    if not is_zennaki:
        for word in fingerprints:
            for i in range(nb_of_biphrases):
                if fingerprints[word][i] != 0:
                    fingerprints[word][i] = total_per_phrase[i] / fingerprints[word][i]
    ret = pd.DataFrame(fingerprints)
        
    return normalize(ret)

In [386]:
def corpus2trainingdata(aDataframe, fingerprintsDataFrame):
    """
    Convert corpus to training data => numpy array
    
    Input:
        -aDataFrame: Corpus dataframe
        -fingerprintsDataFrame: distributionnal signature of words in the corpus
    Return:
        (X, y): X is the array of words (signature) in the corpus and y is the corresponding labels (NE tags)
    """
    X = np.zeros((aDataframe.shape[0], fingerprintsDataFrame.shape[0]), dtype=np.int8)
    y = np.zeros(aDataframe.shape[0], dtype=np.int8)
    i = 0
    for row in aDataframe.iterrows():
        X[i] = fingerprintsDataFrame[row[1]['word']].values
        y[i] = tag2int[getTag(row[1]['ne-tag'])]
        i += 1
    return X, y

In [387]:
# A utility function to convert NE tags
def ne_type(aType):
    aType = aType.lower()
    if 'per' in aType:
        t =  'NE' if BINARY else 'PER' 
    elif 'loc' in aType:
        t =  'NE' if BINARY else 'LOC'
    elif 'org' in aType:
        t =  'NE' if BINARY else 'ORG'
    elif 'hour' in aType:
        t =  'NE' if BINARY else 'MISC'
    elif aType != 'o' and len(aType) > 0 :
        t =  'NE' if BINARY else 'MISC'
    else:
        t = 'O'
    return t

In [388]:
def P_R_F1(y_pred, y_true, neg_class):
    same = y_pred[y_true==y_pred]
    tp = same[same != neg_class].size
    nb_of_pos_exple = y_true[y_true != neg_class].size
    nb_of_pos_pred = y_pred[y_pred != neg_class].size
    p = r = f1 = 0
    try:
        p = np.round(tp*100/nb_of_pos_pred, 2)
    except ZeroDivisionError:
        print("number of correct positive predictions is 0")
        
    try:
        r = np.round(tp*100/nb_of_pos_exple, 2)
    except ZeroDivisionError:
        print("number of position exple is 0")
        
    try:
        f1 = np.round(2*r*p/(r+p), 2)
    except ZeroDivisionError:
        print("Recall and precision are 0")

    return p, r, f1

In [389]:
def shuffle_data(X, y):
    indices = [i for i in  range(X.shape[0])]
    np.random.shuffle(indices)
    return X[indices], y[indices]

In [390]:
def create_model(input_dim, output_dim):
    model = Sequential()
    model.add(Dense(h1_size, input_dim=input_dim, activation='sigmoid', name="hidden1"))
    model.add(Dense(h2_size, activation='sigmoid', name="hidden2"))
    if BINARY:
        model.add(Dense(1, activation='sigmoid', name="outputlayer"))
        model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['binary_accuracy'])
    else:
        model.add(Dense(output_dim, activation='softmax', name="outputlayer"))
        model.compile(loss='categorical_crossentropy', optimizer="rmsprop", metrics=['accuracy'])
    model.summary()
    return model

In [391]:
def train_model(model, X_train, y_train, X_val, y_val, epochs=epochs):
    # stop learning if the error is the same between two consecutive epochs
    early_stop = EarlyStopping(patience=20, verbose=2)
    
    # saving best model
    best_model_cp = ModelCheckpoint(best_model_file, save_best_only=True, verbose=1)
    
    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, verbose=0, shuffle=shuffle, callbacks=[best_model_cp, early_stop])
    
    #loading and returning the best model
    return keras.models.load_model(best_model_file)

In [392]:
def predict(model, X, y, binary=BINARY):
    if BINARY:
        y_pred = np.round(model.predict(X))
        y_true = y
    else:
        predictions = model.predict(X)
        y_pred = np.array([np.argmax(p) for p in predictions])
        y_true = np.array([np.argmax(t) for t in y ])
    return y_true, y_pred

In [393]:
def model_performance(y_true, y_pred):
    return P_R_F1(y_pred, y_true, tag2int['O']) #precision, recall, f1-score

In [394]:
def model_performace_by_tag(y_true, y_pred, tag):
    p, r, f1 = 0, 0, 0
    
    eq = y_pred[y_pred==y_true]
    correctly_pred = eq[eq==tag].size
    try:
        p = np.round(100 * correctly_pred / y_pred[y_pred==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        r = np.round(100 * correctly_pred / y_true[y_true==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        f1 = np.round(2 * r * p / (r + p), 2)
    except ZeroDivisionError:
        pass
    
    return p, r, f1

In [395]:
def algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, epochs=epochs, model=None):
    """
    Train a model on (X, y) and validate on (X_val, y_val) then project on (X_ewo)
    """
    test_precision, train_precision, ewo_precision = [], [], []
    test_recall, train_recall, ewo_recall = [], [], []
    test_fscore, train_fscore, ewo_fscore = [], [], []
    
    test_result_by_tag = {}
    train_result_by_tag = {}
    ewo_result_by_tag = {}
    for t in tagSet:
        f1_key = "F1-"+t
        p_key = "P-"+t
        r_key = "R-"+t
        train_result_by_tag[f1_key], train_result_by_tag[p_key], train_result_by_tag[r_key] = [], [], []
        test_result_by_tag[f1_key], test_result_by_tag[p_key], test_result_by_tag[r_key] = [], [], []
        ewo_result_by_tag[f1_key], ewo_result_by_tag[p_key], ewo_result_by_tag[r_key] = [], [], []

    m = train_model(model, X_train, y_train, X_val, y_val, epochs=epochs)
        
    y_true, y_pred = predict(m, X_train, y_train)
    p_train, r_train, f1_train = model_performance(y_true, y_pred)
        
    y_true_val, y_pred_val = predict(m, X_val, y_val)
    p_val, r_val, f1_val = model_performance(y_true_val, y_pred_val)
        
    y_true_ewo, y_pred_ewo = predict(m, X_ewo, y_ewo) 
    p_ewo, r_ewo, f1_ewo = model_performance(y_true_ewo, y_pred_ewo)
        
    for t in range(len(int2tag)):
        f1_key = "F1-" + int2tag[t]
        p_key = "P-" + int2tag[t]
        r_key = "R-" + int2tag[t]
            
        p, r, f1 = model_performace_by_tag(y_true, y_pred, t)
        train_result_by_tag[p_key].append(p)
        train_result_by_tag[r_key].append(r)
        train_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_val, y_pred_val, t)
        test_result_by_tag[p_key].append(p)
        test_result_by_tag[r_key].append(r)
        test_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_ewo, y_pred_ewo, t)
        ewo_result_by_tag[p_key].append(p)
        ewo_result_by_tag[r_key].append(r)
        ewo_result_by_tag[f1_key].append(f1)
                
    test_precision.append(p_val)
    train_precision.append(p_train)
    ewo_precision.append(p_ewo)
        
    test_recall.append(r_val)
    train_recall.append(r_train)
    ewo_recall.append(r_ewo)
        
    test_fscore.append(f1_val)
    train_fscore.append(f1_train)
    ewo_fscore.append(f1_ewo)
    return pd.DataFrame({
        'P_val': test_precision, 
        'P_train': train_precision, 
        'P_ewo': ewo_precision, 'R_val': test_recall, 'R_train': train_recall, 
        'R_ewo': ewo_recall, 'F1-val': test_fscore, 'F1-train': train_fscore, 'F1-ewo': ewo_fscore}), pd.DataFrame(train_result_by_tag), pd.DataFrame(test_result_by_tag), pd.DataFrame(ewo_result_by_tag)

In [396]:
def algoCrossVal(X, y, X_ewo, y_ewo, k = 10, repeat=1): 
    """
    Traing a model with k-fold cross validation
    We train the model `repeat` times to check it's stability
    """
    block_size = int(X.shape[0] / k)   
    output = None
    model = None
    train_by_tags, test_by_tags, ewo_by_tags = None, None, None
    for it in range(repeat):
        print("AlgoCrossValIter -", it+1)
        model = create_model(X.shape[1], len(tagSet))
        results = None
        train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = None, None, None
        for i in range(k):
            X_val, y_val = X[i*block_size:i*block_size+block_size], y[i*block_size:i*block_size+block_size]
            X_train = np.concatenate((X[0:i*block_size], X[i*block_size+block_size:]))
            y_train = np.concatenate((y[0:i*block_size], y[i*block_size+block_size:]))

            X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
            X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])

            result, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)
            if results is None:
                results = result.copy()
                train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = train_by_tag.copy(), test_by_tag.copy(), ewo_by_tag.copy()
            else:
                results = pd.concat([results, result], ignore_index=True)
                train_by_tagsTmp = pd.concat([train_by_tagsTmp, train_by_tag], ignore_index=True)
                test_by_tagsTmp = pd.concat([test_by_tagsTmp, test_by_tag], ignore_index=True)
                ewo_by_tagsTmp = pd.concat([ewo_by_tagsTmp, ewo_by_tag], ignore_index=True)
        
        if output is None:
            output = results.mean(axis=0).to_frame()
            train_by_tags = train_by_tagsTmp.mean(axis=0).to_frame()
            test_by_tags = test_by_tagsTmp.mean(axis=0).to_frame()
            ewo_by_tags = ewo_by_tagsTmp.mean(axis=0).to_frame()
        else:
            output = pd.concat([output, results.mean(axis=0).to_frame()], axis=1)
            train_by_tags = pd.concat([train_by_tags, train_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            test_by_tags = pd.concat([test_by_tags, test_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            ewo_by_tags = pd.concat([ewo_by_tags, ewo_by_tagsTmp.mean(axis=0).to_frame()], axis=1)

    return output, train_by_tags, test_by_tags, ewo_by_tags, model

In [397]:
en_corpus, en_nb_of_phrases = load_corpus(en_corpus_file, max_nb_of_phrases)

In [398]:
nb_word_in_corpus = en_corpus[en_corpus.word != "\n"].word.size
print("Nb word in corpus", nb_word_in_corpus)

Nb word in corpus 4170


In [399]:
en_corpus.head()
en_corpus.loc[en_corpus['ne-tag'] == 'ORG']

Unnamed: 0,word,ne-tag
1335,Sadducees,ORG


In [400]:
tagSet = en_corpus["ne-tag"].dropna().unique()
if BINARY:
    tagSet = ['NE', 'O']
tag2int = {j: i for i, j in enumerate(tagSet)}
int2tag = {i: j for i, j in enumerate(tagSet)}
print(tag2int)

{'O': 0, 'MISC': 1, 'PER': 2, 'LOC': 3, 'ORG': 4}


In [401]:
en_nb_of_phrases

210

In [402]:
en_corpus.describe()

Unnamed: 0,word,ne-tag
count,4379,4170
unique,904,5
top,the,O
freq,313,3779


In [403]:
en_corpus.head(10)

Unnamed: 0,word,ne-tag
0,The,O
1,Promise,O
2,of,O
3,the,O
4,Holy,MISC
5,Spirit,MISC
6,\n,
7,In,O
8,the,O
9,first,O


In [404]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(en_corpus[en_corpus['ne-tag']==tag].shape[0] * 100 / en_corpus[en_corpus['ne-tag']!='\n'].shape[0], 2)))

O % = 86.3 %
MISC % = 2.4 %
PER % = 5.59 %
LOC % = 0.91 %
ORG % = 0.02 %


In [405]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(en_corpus[en_corpus['ne-tag']==tag].word.unique().shape[0] * 100 / en_corpus[en_corpus['ne-tag']!='\n'].word.unique().shape[0], 2)))

O % = 89.16 %
MISC % = 1.88 %
PER % = 8.96 %
LOC % = 1.99 %
ORG % = 0.11 %


In [406]:
en_corpus[en_corpus.word == "\n"].shape

(209, 2)

In [407]:
print("Nb of bi-phrases", en_nb_of_phrases)

Nb of bi-phrases 210


In [408]:
en_fingerprints = corpus_fingerprint(en_corpus, en_nb_of_phrases)

Normalization strategy: None


In [409]:
en_fingerprints.head(10)

Unnamed: 0,The,Promise,of,the,Holy,Spirit,In,first,book,O,...,considered,dream,She,save,fulfill,Immanuel,us),woke,sleep,knew
0,6.0,6.0,6.0,6.0,6.0,6.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,18.0,0.0,0.0,18.0,18.0,18.0,18.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,24.0,24.0,24.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,25.0,25.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,29.0,29.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,19.0,19.0,19.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,23.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,36.0,36.0,36.0,36.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [410]:
(4170 / nb_word_in_corpus)

1.0

In [411]:
en_corpus.shape

(4379, 2)

In [412]:
en_fingerprints['you'].values.shape

(210,)

In [413]:
en_corpus[en_corpus.word != "\n"].shape

(4170, 2)

In [414]:
if is_only_vocab:
    text = list(en_corpus[en_corpus.word != "\n"].word.unique())
else:
    text = list(en_corpus[en_corpus.word != "\n"].word)
en_vocab = pd.DataFrame({'text': text})
en_vocab.describe()

Unnamed: 0,text
count,903
unique,903
top,Father
freq,1


In [415]:
if is_only_vocab:
    X = np.zeros((en_vocab.shape[0] * duplication, en_nb_of_phrases))
    target = np.zeros((en_vocab.shape[0] * duplication))
    p=0
    for i, row in en_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X[p] = en_fingerprints[c.split(" ")[0]]
            target[p] = tag2int[getTag(en_corpus[en_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X, target = shuffle_data(X, target)
    print(X.shape, en_fingerprints.shape, target.shape)

(903, 210) (210, 903) (903,)


In [416]:
en_vocab[-20:]

Unnamed: 0,text
883,Eliud
884,Eleazar
885,Matthan
886,husband
887,fourteen
888,unwilling
889,shame
890,resolved
891,divorce
892,quietly


In [417]:
if not is_only_vocab:
    X, target = corpus2trainingdata(en_corpus[en_corpus.word != "\n"], en_fingerprints)

In [418]:
if shuffle:
    X, target = shuffle_data(X, target)

In [419]:
y = target.copy()
y[0:100]
if not BINARY:
    y = np_utils.to_categorical(y, len(tagSet))
y.shape

(903, 5)

In [420]:
from sklearn.decomposition import PCA

def visualize(X, y):
    pca = PCA(n_components=2)
    X_embeded = pca.fit_transform(X)
    plt.figure(figsize=(5, 5))
    plt.scatter(X_embeded[:, 0], X_embeded[:, 1], c=y)
    plt.legend()
    plt.show()

In [421]:
# visualize(X, target)

In [422]:
X_train, X_val, y_train, y_val = model_selection.train_test_split(X, y, test_size=0.33)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])
print("X_train.shape =", X_train.shape)
print("y_train.shape =", y_train.shape)
print("X_val.shape =", X_val.shape)
print("y_val.shape =", y_val.shape)

tTarget = np.array([np.argmax(yy) for yy in y_train])
vTarget = np.array([np.argmax(yy) for yy in y_val])

for tag in tagSet:
    print("{0} % in training data = {1} %".format(tag, np.round(tTarget[tTarget==tag2int[tag]].size * 100 / tTarget.shape[0], 2)))
    print("{0} % in validation data = {1} %".format(tag, np.round(vTarget[vTarget==tag2int[tag]].size * 100 / vTarget.shape[0], 2)))

X_train.shape = (605, 210)
y_train.shape = (605, 5)
X_val.shape = (298, 210)
y_val.shape = (298, 5)
O % in training data = 87.44 %
O % in validation data = 89.93 %
MISC % in training data = 1.49 %
MISC % in validation data = 0.67 %
PER % in training data = 8.93 %
PER % in validation data = 7.72 %
LOC % in training data = 1.98 %
LOC % in validation data = 1.68 %
ORG % in training data = 0.17 %
ORG % in validation data = 0.0 %


In [423]:
ewo_corpus, ewo_nb_of_phrases = load_corpus(ewo_corpus_file, max_nb_of_phrases)

In [424]:
ewo_corpus.loc[ewo_corpus['ne-tag'] == 'PER']

Unnamed: 0,word,ne-tag
6,Teofil,PER
15,Yesus,PER
86,Yohannes,PER
104,Yesus,PER
230,Yesus,PER
...,...,...
3676,Maria,PER
3697,Yesus,PER
3740,Emmanuel,PER
3750,Yosef,PER


In [425]:
ewo_nb_of_phrases

210

In [426]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(ewo_corpus[ewo_corpus['ne-tag']==tag].shape[0] * 100 / ewo_corpus[ewo_corpus['ne-tag']!='\n'].shape[0], 2)))

O % = 84.15 %
MISC % = 2.54 %
PER % = 6.69 %
LOC % = 1.03 %
ORG % = 0.05 %


In [427]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(ewo_corpus[ewo_corpus['ne-tag']==tag].word.unique().shape[0] * 100 / ewo_corpus[ewo_corpus['ne-tag']!='\n'].word.unique().shape[0], 2)))

O % = 89.94 %
MISC % = 1.17 %
PER % = 8.3 %
LOC % = 1.86 %
ORG % = 0.2 %


In [428]:
ewo_corpus.describe()

Unnamed: 0,word,ne-tag
count,3779,3570
unique,1024,5
top,\n,O
freq,209,3180


In [429]:
ewo_corpus.head()

Unnamed: 0,word,ne-tag
0,Mfufub,MISC
1,Nsisim,MISC
2,ayi,O
3,sò,O
4,\n,


In [430]:
ewo_fingerprints = corpus_fingerprint(ewo_corpus, en_nb_of_phrases)

Normalization strategy: None


In [431]:
if is_only_vocab:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word.unique())
else:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word)
ewo_vocab = pd.DataFrame({"text":text})

In [432]:
if is_only_vocab:
    X_ewo = np.zeros((ewo_vocab.shape[0] * duplication, en_nb_of_phrases))
    ewo_target = np.zeros((ewo_vocab.shape[0] * duplication))
    p=0
    for i, row in ewo_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X_ewo[p] = ewo_fingerprints[c.split(" ")[0]]
            ewo_target[p] = tag2int[getTag(ewo_corpus[ewo_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [433]:
ewo_vocab[-10:]

Unnamed: 0,text
1013,sik
1014,Ntud
1015,bëyole
1016,Emmanuel
1017,Avëbë
1018,angavëbë
1019,oyò
1020,angabende
1021,anganòṅ
1022,angayole


In [434]:
if not is_only_vocab:
    X_ewo, ewo_target = corpus2trainingdata(ewo_corpus[ewo_corpus.word != "\n"], ewo_fingerprints)

In [435]:
if shuffle:
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [436]:
y_ewo = ewo_target.copy()
print(y_ewo.shape, len(ewo_vocab))

(1023,) 1023


In [437]:
X_ewo.shape

(1023, 210)

In [438]:
y_ewo = ewo_target.copy()
y_ewo[:20]
if not BINARY:
    y_ewo = np_utils.to_categorical(y_ewo)

In [439]:
X_ewo = X_ewo.reshape((X_ewo.shape[0], en_nb_of_phrases))

In [440]:
# model = create_model(X.shape[1], len(tagSet))
# resultEval, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)

In [441]:
# resultEval

In [442]:
# train_by_tag

In [443]:
# test_by_tag

In [444]:
# ewo_by_tag

In [445]:
# resultEval.mean()

In [446]:
# resultEval.std()

In [447]:
resultCrossVal, trainByTagResult, testByTagResult, ewoByTagResult, model = algoCrossVal(X, y, X_ewo, y_ewo, repeat=10)

AlgoCrossValIter - 1
Model: "sequential_41"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.53456, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.53456 to 0.35105, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.35105 to 0.28141, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.28141 to 0.25543, saving model to be




Epoch 00001: val_loss improved from inf to 0.08007, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08007

Epoch 00003: val_loss did not improve from 0.08007

Epoch 00004: val_loss did not improve from 0.08007

Epoch 00005: val_loss did not improve from 0.08007

Epoch 00006: val_loss did not improve from 0.08007

Epoch 00007: val_loss did not improve from 0.08007

Epoch 00008: val_loss did not improve from 0.08007

Epoch 00009: val_loss did not improve from 0.08007

Epoch 00010: val_loss did not improve from 0.08007





Epoch 00001: val_loss improved from inf to 0.09510, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09510

Epoch 00003: val_loss did not improve from 0.09510

Epoch 00004: val_loss did not improve from 0.09510

Epoch 00005: val_loss did not improve from 0.09510

Epoch 00006: val_loss did not improve from 0.09510

Epoch 00007: val_loss did not improve from 0.09510

Epoch 00008: val_loss did not improve from 0.09510

Epoch 00009: val_loss did not improve from 0.09510

Epoch 00010: val_loss did not improve from 0.09510





Epoch 00001: val_loss improved from inf to 0.05716, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05716

Epoch 00003: val_loss did not improve from 0.05716

Epoch 00004: val_loss did not improve from 0.05716

Epoch 00005: val_loss did not improve from 0.05716

Epoch 00006: val_loss did not improve from 0.05716

Epoch 00007: val_loss did not improve from 0.05716

Epoch 00008: val_loss did not improve from 0.05716

Epoch 00009: val_loss did not improve from 0.05716

Epoch 00010: val_loss did not improve from 0.05716





Epoch 00001: val_loss improved from inf to 0.07435, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07435

Epoch 00003: val_loss did not improve from 0.07435

Epoch 00004: val_loss did not improve from 0.07435

Epoch 00005: val_loss did not improve from 0.07435

Epoch 00006: val_loss did not improve from 0.07435

Epoch 00007: val_loss did not improve from 0.07435

Epoch 00008: val_loss did not improve from 0.07435

Epoch 00009: val_loss did not improve from 0.07435

Epoch 00010: val_loss did not improve from 0.07435





Epoch 00001: val_loss improved from inf to 0.05611, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05611

Epoch 00003: val_loss did not improve from 0.05611

Epoch 00004: val_loss did not improve from 0.05611

Epoch 00005: val_loss did not improve from 0.05611

Epoch 00006: val_loss did not improve from 0.05611

Epoch 00007: val_loss did not improve from 0.05611

Epoch 00008: val_loss did not improve from 0.05611

Epoch 00009: val_loss did not improve from 0.05611

Epoch 00010: val_loss did not improve from 0.05611





Epoch 00001: val_loss improved from inf to 0.11742, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11742

Epoch 00003: val_loss improved from 0.11742 to 0.09290, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.09290

Epoch 00005: val_loss did not improve from 0.09290

Epoch 00006: val_loss did not improve from 0.09290

Epoch 00007: val_loss did not improve from 0.09290

Epoch 00008: val_loss did not improve from 0.09290

Epoch 00009: val_loss did not improve from 0.09290

Epoch 00010: val_loss did not improve from 0.09290





Epoch 00001: val_loss improved from inf to 0.04109, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04109

Epoch 00003: val_loss did not improve from 0.04109

Epoch 00004: val_loss did not improve from 0.04109

Epoch 00005: val_loss did not improve from 0.04109

Epoch 00006: val_loss did not improve from 0.04109

Epoch 00007: val_loss did not improve from 0.04109

Epoch 00008: val_loss did not improve from 0.04109

Epoch 00009: val_loss did not improve from 0.04109

Epoch 00010: val_loss did not improve from 0.04109





Epoch 00001: val_loss improved from inf to 0.09396, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09396

Epoch 00003: val_loss did not improve from 0.09396

Epoch 00004: val_loss did not improve from 0.09396

Epoch 00005: val_loss did not improve from 0.09396

Epoch 00006: val_loss did not improve from 0.09396

Epoch 00007: val_loss did not improve from 0.09396

Epoch 00008: val_loss did not improve from 0.09396

Epoch 00009: val_loss did not improve from 0.09396

Epoch 00010: val_loss did not improve from 0.09396





Epoch 00001: val_loss improved from inf to 0.10292, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10292

Epoch 00003: val_loss did not improve from 0.10292

Epoch 00004: val_loss did not improve from 0.10292

Epoch 00005: val_loss did not improve from 0.10292

Epoch 00006: val_loss did not improve from 0.10292

Epoch 00007: val_loss did not improve from 0.10292

Epoch 00008: val_loss did not improve from 0.10292

Epoch 00009: val_loss did not improve from 0.10292

Epoch 00010: val_loss did not improve from 0.10292




AlgoCrossValIter - 2
Model: "sequential_42"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.37335, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.37335 to 0.28949, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.28949 to 0.25998, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.25998

Epoch 00005: val_loss 




Epoch 00001: val_loss improved from inf to 0.19766, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.19766 to 0.15789, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.15789

Epoch 00004: val_loss did not improve from 0.15789

Epoch 00005: val_loss did not improve from 0.15789

Epoch 00006: val_loss did not improve from 0.15789

Epoch 00007: val_loss did not improve from 0.15789

Epoch 00008: val_loss did not improve from 0.15789

Epoch 00009: val_loss did not improve from 0.15789

Epoch 00010: val_loss did not improve from 0.15789





Epoch 00001: val_loss improved from inf to 0.09476, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09476

Epoch 00003: val_loss did not improve from 0.09476

Epoch 00004: val_loss did not improve from 0.09476

Epoch 00005: val_loss did not improve from 0.09476

Epoch 00006: val_loss did not improve from 0.09476

Epoch 00007: val_loss did not improve from 0.09476

Epoch 00008: val_loss did not improve from 0.09476

Epoch 00009: val_loss did not improve from 0.09476

Epoch 00010: val_loss did not improve from 0.09476





Epoch 00001: val_loss improved from inf to 0.07790, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07790 to 0.05915, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.05915

Epoch 00004: val_loss did not improve from 0.05915

Epoch 00005: val_loss did not improve from 0.05915

Epoch 00006: val_loss did not improve from 0.05915

Epoch 00007: val_loss did not improve from 0.05915

Epoch 00008: val_loss did not improve from 0.05915

Epoch 00009: val_loss did not improve from 0.05915

Epoch 00010: val_loss did not improve from 0.05915





Epoch 00001: val_loss improved from inf to 0.09565, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09565

Epoch 00003: val_loss did not improve from 0.09565

Epoch 00004: val_loss did not improve from 0.09565

Epoch 00005: val_loss did not improve from 0.09565

Epoch 00006: val_loss did not improve from 0.09565

Epoch 00007: val_loss did not improve from 0.09565

Epoch 00008: val_loss did not improve from 0.09565

Epoch 00009: val_loss did not improve from 0.09565

Epoch 00010: val_loss did not improve from 0.09565





Epoch 00001: val_loss improved from inf to 0.04750, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04750

Epoch 00003: val_loss did not improve from 0.04750

Epoch 00004: val_loss did not improve from 0.04750

Epoch 00005: val_loss did not improve from 0.04750

Epoch 00006: val_loss did not improve from 0.04750

Epoch 00007: val_loss did not improve from 0.04750

Epoch 00008: val_loss did not improve from 0.04750

Epoch 00009: val_loss did not improve from 0.04750

Epoch 00010: val_loss did not improve from 0.04750





Epoch 00001: val_loss improved from inf to 0.09382, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09382

Epoch 00003: val_loss did not improve from 0.09382

Epoch 00004: val_loss did not improve from 0.09382

Epoch 00005: val_loss did not improve from 0.09382

Epoch 00006: val_loss did not improve from 0.09382

Epoch 00007: val_loss did not improve from 0.09382

Epoch 00008: val_loss did not improve from 0.09382

Epoch 00009: val_loss did not improve from 0.09382

Epoch 00010: val_loss did not improve from 0.09382





Epoch 00001: val_loss improved from inf to 0.03861, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03861

Epoch 00003: val_loss did not improve from 0.03861

Epoch 00004: val_loss did not improve from 0.03861

Epoch 00005: val_loss did not improve from 0.03861

Epoch 00006: val_loss did not improve from 0.03861

Epoch 00007: val_loss did not improve from 0.03861

Epoch 00008: val_loss did not improve from 0.03861

Epoch 00009: val_loss did not improve from 0.03861

Epoch 00010: val_loss did not improve from 0.03861





Epoch 00001: val_loss improved from inf to 0.12058, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12058

Epoch 00003: val_loss did not improve from 0.12058

Epoch 00004: val_loss did not improve from 0.12058

Epoch 00005: val_loss did not improve from 0.12058

Epoch 00006: val_loss did not improve from 0.12058

Epoch 00007: val_loss did not improve from 0.12058

Epoch 00008: val_loss did not improve from 0.12058

Epoch 00009: val_loss did not improve from 0.12058

Epoch 00010: val_loss did not improve from 0.12058





Epoch 00001: val_loss improved from inf to 0.09681, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09681

Epoch 00003: val_loss did not improve from 0.09681

Epoch 00004: val_loss did not improve from 0.09681

Epoch 00005: val_loss did not improve from 0.09681

Epoch 00006: val_loss did not improve from 0.09681

Epoch 00007: val_loss did not improve from 0.09681

Epoch 00008: val_loss did not improve from 0.09681

Epoch 00009: val_loss did not improve from 0.09681

Epoch 00010: val_loss did not improve from 0.09681




AlgoCrossValIter - 3
Model: "sequential_43"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.34775, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.34775

Epoch 00003: val_loss improved from 0.34775 to 0.26032, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.26032

Epoch 00005: val_loss improved from 0.26032 to 0.22267, saving mo




Epoch 00001: val_loss improved from inf to 0.10309, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10309 to 0.09528, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.09528

Epoch 00004: val_loss did not improve from 0.09528

Epoch 00005: val_loss did not improve from 0.09528

Epoch 00006: val_loss did not improve from 0.09528

Epoch 00007: val_loss did not improve from 0.09528

Epoch 00008: val_loss did not improve from 0.09528

Epoch 00009: val_loss did not improve from 0.09528

Epoch 00010: val_loss did not improve from 0.09528





Epoch 00001: val_loss improved from inf to 0.12990, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12990

Epoch 00003: val_loss did not improve from 0.12990

Epoch 00004: val_loss did not improve from 0.12990

Epoch 00005: val_loss did not improve from 0.12990

Epoch 00006: val_loss did not improve from 0.12990

Epoch 00007: val_loss did not improve from 0.12990

Epoch 00008: val_loss did not improve from 0.12990

Epoch 00009: val_loss did not improve from 0.12990

Epoch 00010: val_loss did not improve from 0.12990

Epoch 00001: val_loss improved from inf to 0.06510, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.06510 to 0.06487, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06487

Epoch 00004: val_loss did not improve from 0.06487

Epoch 00005: val_loss did not improve from 0.06487

Epoch 00006: val_loss did not improve from 0.06487

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.07182, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07182

Epoch 00003: val_loss did not improve from 0.07182

Epoch 00004: val_loss did not improve from 0.07182

Epoch 00005: val_loss did not improve from 0.07182

Epoch 00006: val_loss did not improve from 0.07182

Epoch 00007: val_loss did not improve from 0.07182

Epoch 00008: val_loss did not improve from 0.07182

Epoch 00009: val_loss did not improve from 0.07182

Epoch 00010: val_loss did not improve from 0.07182





Epoch 00001: val_loss improved from inf to 0.06082, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06082

Epoch 00003: val_loss did not improve from 0.06082

Epoch 00004: val_loss did not improve from 0.06082

Epoch 00005: val_loss did not improve from 0.06082

Epoch 00006: val_loss did not improve from 0.06082

Epoch 00007: val_loss did not improve from 0.06082

Epoch 00008: val_loss did not improve from 0.06082

Epoch 00009: val_loss did not improve from 0.06082

Epoch 00010: val_loss did not improve from 0.06082





Epoch 00001: val_loss improved from inf to 0.10223, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10223

Epoch 00003: val_loss did not improve from 0.10223

Epoch 00004: val_loss did not improve from 0.10223

Epoch 00005: val_loss did not improve from 0.10223

Epoch 00006: val_loss did not improve from 0.10223

Epoch 00007: val_loss did not improve from 0.10223

Epoch 00008: val_loss did not improve from 0.10223

Epoch 00009: val_loss did not improve from 0.10223

Epoch 00010: val_loss did not improve from 0.10223





Epoch 00001: val_loss improved from inf to 0.04722, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04722

Epoch 00003: val_loss did not improve from 0.04722

Epoch 00004: val_loss did not improve from 0.04722

Epoch 00005: val_loss did not improve from 0.04722

Epoch 00006: val_loss did not improve from 0.04722

Epoch 00007: val_loss did not improve from 0.04722

Epoch 00008: val_loss did not improve from 0.04722

Epoch 00009: val_loss did not improve from 0.04722

Epoch 00010: val_loss did not improve from 0.04722





Epoch 00001: val_loss improved from inf to 0.10446, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10446

Epoch 00003: val_loss did not improve from 0.10446

Epoch 00004: val_loss did not improve from 0.10446

Epoch 00005: val_loss did not improve from 0.10446

Epoch 00006: val_loss did not improve from 0.10446

Epoch 00007: val_loss did not improve from 0.10446

Epoch 00008: val_loss did not improve from 0.10446

Epoch 00009: val_loss did not improve from 0.10446

Epoch 00010: val_loss did not improve from 0.10446





Epoch 00001: val_loss improved from inf to 0.10397, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10397

Epoch 00003: val_loss did not improve from 0.10397

Epoch 00004: val_loss did not improve from 0.10397

Epoch 00005: val_loss did not improve from 0.10397

Epoch 00006: val_loss did not improve from 0.10397

Epoch 00007: val_loss did not improve from 0.10397

Epoch 00008: val_loss did not improve from 0.10397

Epoch 00009: val_loss did not improve from 0.10397

Epoch 00010: val_loss did not improve from 0.10397




AlgoCrossValIter - 4
Model: "sequential_44"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.39150, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.39150 to 0.30497, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.30497 to 0.28724, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.28724 to 0.24179, saving model to be




Epoch 00001: val_loss improved from inf to 0.07790, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07790

Epoch 00003: val_loss did not improve from 0.07790

Epoch 00004: val_loss did not improve from 0.07790

Epoch 00005: val_loss did not improve from 0.07790

Epoch 00006: val_loss did not improve from 0.07790

Epoch 00007: val_loss did not improve from 0.07790

Epoch 00008: val_loss did not improve from 0.07790

Epoch 00009: val_loss did not improve from 0.07790

Epoch 00010: val_loss did not improve from 0.07790





Epoch 00001: val_loss improved from inf to 0.08500, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08500

Epoch 00003: val_loss did not improve from 0.08500

Epoch 00004: val_loss did not improve from 0.08500

Epoch 00005: val_loss did not improve from 0.08500

Epoch 00006: val_loss did not improve from 0.08500

Epoch 00007: val_loss did not improve from 0.08500

Epoch 00008: val_loss did not improve from 0.08500

Epoch 00009: val_loss did not improve from 0.08500

Epoch 00010: val_loss did not improve from 0.08500





Epoch 00001: val_loss improved from inf to 0.05929, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05929

Epoch 00003: val_loss did not improve from 0.05929

Epoch 00004: val_loss did not improve from 0.05929

Epoch 00005: val_loss did not improve from 0.05929

Epoch 00006: val_loss did not improve from 0.05929

Epoch 00007: val_loss did not improve from 0.05929

Epoch 00008: val_loss did not improve from 0.05929

Epoch 00009: val_loss did not improve from 0.05929

Epoch 00010: val_loss did not improve from 0.05929





Epoch 00001: val_loss improved from inf to 0.11775, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.11775 to 0.11589, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.11589 to 0.10889, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.10889

Epoch 00005: val_loss did not improve from 0.10889

Epoch 00006: val_loss did not improve from 0.10889

Epoch 00007: val_loss did not improve from 0.10889

Epoch 00008: val_loss did not improve from 0.10889

Epoch 00009: val_loss did not improve from 0.10889

Epoch 00010: val_loss did not improve from 0.10889





Epoch 00001: val_loss improved from inf to 0.05882, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05882

Epoch 00003: val_loss did not improve from 0.05882

Epoch 00004: val_loss did not improve from 0.05882

Epoch 00005: val_loss did not improve from 0.05882

Epoch 00006: val_loss did not improve from 0.05882

Epoch 00007: val_loss did not improve from 0.05882

Epoch 00008: val_loss did not improve from 0.05882

Epoch 00009: val_loss did not improve from 0.05882

Epoch 00010: val_loss did not improve from 0.05882





Epoch 00001: val_loss improved from inf to 0.10648, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10648 to 0.09832, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.09832

Epoch 00004: val_loss did not improve from 0.09832

Epoch 00005: val_loss did not improve from 0.09832

Epoch 00006: val_loss did not improve from 0.09832

Epoch 00007: val_loss did not improve from 0.09832

Epoch 00008: val_loss did not improve from 0.09832

Epoch 00009: val_loss did not improve from 0.09832

Epoch 00010: val_loss did not improve from 0.09832





Epoch 00001: val_loss improved from inf to 0.05309, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05309

Epoch 00003: val_loss did not improve from 0.05309

Epoch 00004: val_loss did not improve from 0.05309

Epoch 00005: val_loss did not improve from 0.05309

Epoch 00006: val_loss did not improve from 0.05309

Epoch 00007: val_loss did not improve from 0.05309

Epoch 00008: val_loss did not improve from 0.05309

Epoch 00009: val_loss did not improve from 0.05309

Epoch 00010: val_loss did not improve from 0.05309





Epoch 00001: val_loss improved from inf to 0.09440, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09440

Epoch 00003: val_loss did not improve from 0.09440

Epoch 00004: val_loss did not improve from 0.09440

Epoch 00005: val_loss did not improve from 0.09440

Epoch 00006: val_loss did not improve from 0.09440

Epoch 00007: val_loss did not improve from 0.09440

Epoch 00008: val_loss did not improve from 0.09440

Epoch 00009: val_loss did not improve from 0.09440

Epoch 00010: val_loss did not improve from 0.09440





Epoch 00001: val_loss improved from inf to 0.09852, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09852

Epoch 00003: val_loss did not improve from 0.09852

Epoch 00004: val_loss did not improve from 0.09852

Epoch 00005: val_loss did not improve from 0.09852

Epoch 00006: val_loss did not improve from 0.09852

Epoch 00007: val_loss did not improve from 0.09852

Epoch 00008: val_loss did not improve from 0.09852

Epoch 00009: val_loss did not improve from 0.09852

Epoch 00010: val_loss did not improve from 0.09852




AlgoCrossValIter - 5
Model: "sequential_45"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.42446, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.42446

Epoch 00003: val_loss improved from 0.42446 to 0.28053, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.28053 to 0.21197, saving model to best-model-conll.hdfs

Epoch 00005: val_loss 




Epoch 00001: val_loss improved from inf to 0.10935, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10935

Epoch 00003: val_loss did not improve from 0.10935

Epoch 00004: val_loss did not improve from 0.10935

Epoch 00005: val_loss did not improve from 0.10935

Epoch 00006: val_loss did not improve from 0.10935

Epoch 00007: val_loss did not improve from 0.10935

Epoch 00008: val_loss did not improve from 0.10935

Epoch 00009: val_loss did not improve from 0.10935

Epoch 00010: val_loss did not improve from 0.10935





Epoch 00001: val_loss improved from inf to 0.09479, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09479

Epoch 00003: val_loss did not improve from 0.09479

Epoch 00004: val_loss did not improve from 0.09479

Epoch 00005: val_loss did not improve from 0.09479

Epoch 00006: val_loss did not improve from 0.09479

Epoch 00007: val_loss did not improve from 0.09479

Epoch 00008: val_loss did not improve from 0.09479

Epoch 00009: val_loss did not improve from 0.09479

Epoch 00010: val_loss did not improve from 0.09479





Epoch 00001: val_loss improved from inf to 0.06675, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06675

Epoch 00003: val_loss did not improve from 0.06675

Epoch 00004: val_loss did not improve from 0.06675

Epoch 00005: val_loss did not improve from 0.06675

Epoch 00006: val_loss did not improve from 0.06675

Epoch 00007: val_loss did not improve from 0.06675

Epoch 00008: val_loss did not improve from 0.06675

Epoch 00009: val_loss did not improve from 0.06675

Epoch 00010: val_loss did not improve from 0.06675





Epoch 00001: val_loss improved from inf to 0.08684, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08684

Epoch 00003: val_loss did not improve from 0.08684

Epoch 00004: val_loss did not improve from 0.08684

Epoch 00005: val_loss did not improve from 0.08684

Epoch 00006: val_loss did not improve from 0.08684

Epoch 00007: val_loss did not improve from 0.08684

Epoch 00008: val_loss did not improve from 0.08684

Epoch 00009: val_loss did not improve from 0.08684

Epoch 00010: val_loss did not improve from 0.08684





Epoch 00001: val_loss improved from inf to 0.05902, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05902

Epoch 00003: val_loss did not improve from 0.05902

Epoch 00004: val_loss did not improve from 0.05902

Epoch 00005: val_loss did not improve from 0.05902

Epoch 00006: val_loss did not improve from 0.05902

Epoch 00007: val_loss did not improve from 0.05902

Epoch 00008: val_loss did not improve from 0.05902

Epoch 00009: val_loss did not improve from 0.05902

Epoch 00010: val_loss did not improve from 0.05902





Epoch 00001: val_loss improved from inf to 0.12073, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12073

Epoch 00003: val_loss did not improve from 0.12073

Epoch 00004: val_loss did not improve from 0.12073

Epoch 00005: val_loss did not improve from 0.12073

Epoch 00006: val_loss did not improve from 0.12073

Epoch 00007: val_loss did not improve from 0.12073

Epoch 00008: val_loss did not improve from 0.12073

Epoch 00009: val_loss did not improve from 0.12073

Epoch 00010: val_loss did not improve from 0.12073





Epoch 00001: val_loss improved from inf to 0.04163, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04163

Epoch 00003: val_loss did not improve from 0.04163

Epoch 00004: val_loss did not improve from 0.04163

Epoch 00005: val_loss did not improve from 0.04163

Epoch 00006: val_loss did not improve from 0.04163

Epoch 00007: val_loss did not improve from 0.04163

Epoch 00008: val_loss did not improve from 0.04163

Epoch 00009: val_loss did not improve from 0.04163

Epoch 00010: val_loss did not improve from 0.04163





Epoch 00001: val_loss improved from inf to 0.11595, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11595

Epoch 00003: val_loss did not improve from 0.11595

Epoch 00004: val_loss did not improve from 0.11595

Epoch 00005: val_loss did not improve from 0.11595

Epoch 00006: val_loss did not improve from 0.11595

Epoch 00007: val_loss did not improve from 0.11595

Epoch 00008: val_loss did not improve from 0.11595

Epoch 00009: val_loss did not improve from 0.11595

Epoch 00010: val_loss did not improve from 0.11595





Epoch 00001: val_loss improved from inf to 0.10945, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10945

Epoch 00003: val_loss did not improve from 0.10945

Epoch 00004: val_loss did not improve from 0.10945

Epoch 00005: val_loss did not improve from 0.10945

Epoch 00006: val_loss did not improve from 0.10945

Epoch 00007: val_loss did not improve from 0.10945

Epoch 00008: val_loss did not improve from 0.10945

Epoch 00009: val_loss did not improve from 0.10945

Epoch 00010: val_loss did not improve from 0.10945




AlgoCrossValIter - 6
Model: "sequential_46"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.51632, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.51632 to 0.29941, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.29941

Epoch 00004: val_loss improved from 0.29941 to 0.22207, saving model to best-model-conll.hdfs

Epoch 00005: val_loss 




Epoch 00001: val_loss improved from inf to 0.05999, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05999

Epoch 00003: val_loss did not improve from 0.05999

Epoch 00004: val_loss did not improve from 0.05999

Epoch 00005: val_loss did not improve from 0.05999

Epoch 00006: val_loss did not improve from 0.05999

Epoch 00007: val_loss did not improve from 0.05999

Epoch 00008: val_loss did not improve from 0.05999

Epoch 00009: val_loss did not improve from 0.05999

Epoch 00010: val_loss did not improve from 0.05999





Epoch 00001: val_loss improved from inf to 0.11253, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11253

Epoch 00003: val_loss did not improve from 0.11253

Epoch 00004: val_loss did not improve from 0.11253

Epoch 00005: val_loss did not improve from 0.11253

Epoch 00006: val_loss did not improve from 0.11253

Epoch 00007: val_loss did not improve from 0.11253

Epoch 00008: val_loss did not improve from 0.11253

Epoch 00009: val_loss did not improve from 0.11253

Epoch 00010: val_loss did not improve from 0.11253





Epoch 00001: val_loss improved from inf to 0.06785, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06785

Epoch 00003: val_loss did not improve from 0.06785

Epoch 00004: val_loss did not improve from 0.06785

Epoch 00005: val_loss did not improve from 0.06785

Epoch 00006: val_loss did not improve from 0.06785

Epoch 00007: val_loss did not improve from 0.06785

Epoch 00008: val_loss did not improve from 0.06785

Epoch 00009: val_loss did not improve from 0.06785

Epoch 00010: val_loss did not improve from 0.06785





Epoch 00001: val_loss improved from inf to 0.07582, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07582

Epoch 00003: val_loss did not improve from 0.07582

Epoch 00004: val_loss did not improve from 0.07582

Epoch 00005: val_loss did not improve from 0.07582

Epoch 00006: val_loss did not improve from 0.07582

Epoch 00007: val_loss did not improve from 0.07582

Epoch 00008: val_loss did not improve from 0.07582

Epoch 00009: val_loss did not improve from 0.07582

Epoch 00010: val_loss did not improve from 0.07582





Epoch 00001: val_loss improved from inf to 0.05200, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05200

Epoch 00003: val_loss did not improve from 0.05200

Epoch 00004: val_loss did not improve from 0.05200

Epoch 00005: val_loss did not improve from 0.05200

Epoch 00006: val_loss did not improve from 0.05200

Epoch 00007: val_loss did not improve from 0.05200

Epoch 00008: val_loss did not improve from 0.05200

Epoch 00009: val_loss did not improve from 0.05200

Epoch 00010: val_loss did not improve from 0.05200





Epoch 00001: val_loss improved from inf to 0.11683, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11683

Epoch 00003: val_loss did not improve from 0.11683

Epoch 00004: val_loss did not improve from 0.11683

Epoch 00005: val_loss did not improve from 0.11683

Epoch 00006: val_loss did not improve from 0.11683

Epoch 00007: val_loss did not improve from 0.11683

Epoch 00008: val_loss did not improve from 0.11683

Epoch 00009: val_loss did not improve from 0.11683

Epoch 00010: val_loss did not improve from 0.11683





Epoch 00001: val_loss improved from inf to 0.05600, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05600

Epoch 00003: val_loss did not improve from 0.05600

Epoch 00004: val_loss did not improve from 0.05600

Epoch 00005: val_loss did not improve from 0.05600

Epoch 00006: val_loss did not improve from 0.05600

Epoch 00007: val_loss did not improve from 0.05600

Epoch 00008: val_loss did not improve from 0.05600

Epoch 00009: val_loss did not improve from 0.05600

Epoch 00010: val_loss did not improve from 0.05600





Epoch 00001: val_loss improved from inf to 0.11855, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11855

Epoch 00003: val_loss did not improve from 0.11855

Epoch 00004: val_loss did not improve from 0.11855

Epoch 00005: val_loss did not improve from 0.11855

Epoch 00006: val_loss did not improve from 0.11855

Epoch 00007: val_loss did not improve from 0.11855

Epoch 00008: val_loss did not improve from 0.11855

Epoch 00009: val_loss did not improve from 0.11855

Epoch 00010: val_loss did not improve from 0.11855





Epoch 00001: val_loss improved from inf to 0.10072, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10072

Epoch 00003: val_loss did not improve from 0.10072

Epoch 00004: val_loss did not improve from 0.10072

Epoch 00005: val_loss did not improve from 0.10072

Epoch 00006: val_loss did not improve from 0.10072

Epoch 00007: val_loss did not improve from 0.10072

Epoch 00008: val_loss did not improve from 0.10072

Epoch 00009: val_loss did not improve from 0.10072

Epoch 00010: val_loss did not improve from 0.10072




AlgoCrossValIter - 7
Model: "sequential_47"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.35594, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.35594 to 0.29249, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.29249 to 0.27143, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.27143 to 0.25682, saving model to be




Epoch 00001: val_loss improved from inf to 0.10726, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10726

Epoch 00003: val_loss did not improve from 0.10726

Epoch 00004: val_loss did not improve from 0.10726

Epoch 00005: val_loss did not improve from 0.10726

Epoch 00006: val_loss did not improve from 0.10726

Epoch 00007: val_loss did not improve from 0.10726

Epoch 00008: val_loss did not improve from 0.10726

Epoch 00009: val_loss did not improve from 0.10726

Epoch 00010: val_loss did not improve from 0.10726





Epoch 00001: val_loss improved from inf to 0.09532, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09532

Epoch 00003: val_loss did not improve from 0.09532

Epoch 00004: val_loss did not improve from 0.09532

Epoch 00005: val_loss did not improve from 0.09532

Epoch 00006: val_loss did not improve from 0.09532

Epoch 00007: val_loss did not improve from 0.09532

Epoch 00008: val_loss did not improve from 0.09532

Epoch 00009: val_loss did not improve from 0.09532

Epoch 00010: val_loss did not improve from 0.09532





Epoch 00001: val_loss improved from inf to 0.08503, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.08503 to 0.06169, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06169

Epoch 00004: val_loss did not improve from 0.06169

Epoch 00005: val_loss did not improve from 0.06169

Epoch 00006: val_loss did not improve from 0.06169

Epoch 00007: val_loss did not improve from 0.06169

Epoch 00008: val_loss did not improve from 0.06169

Epoch 00009: val_loss did not improve from 0.06169

Epoch 00010: val_loss did not improve from 0.06169





Epoch 00001: val_loss improved from inf to 0.06540, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06540

Epoch 00003: val_loss did not improve from 0.06540

Epoch 00004: val_loss did not improve from 0.06540

Epoch 00005: val_loss did not improve from 0.06540

Epoch 00006: val_loss did not improve from 0.06540

Epoch 00007: val_loss did not improve from 0.06540

Epoch 00008: val_loss did not improve from 0.06540

Epoch 00009: val_loss did not improve from 0.06540

Epoch 00010: val_loss did not improve from 0.06540





Epoch 00001: val_loss improved from inf to 0.06561, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.06561 to 0.05258, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.05258

Epoch 00004: val_loss did not improve from 0.05258

Epoch 00005: val_loss did not improve from 0.05258

Epoch 00006: val_loss did not improve from 0.05258

Epoch 00007: val_loss did not improve from 0.05258

Epoch 00008: val_loss did not improve from 0.05258

Epoch 00009: val_loss did not improve from 0.05258

Epoch 00010: val_loss did not improve from 0.05258





Epoch 00001: val_loss improved from inf to 0.10401, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10401

Epoch 00003: val_loss did not improve from 0.10401

Epoch 00004: val_loss did not improve from 0.10401

Epoch 00005: val_loss did not improve from 0.10401

Epoch 00006: val_loss did not improve from 0.10401

Epoch 00007: val_loss did not improve from 0.10401

Epoch 00008: val_loss did not improve from 0.10401

Epoch 00009: val_loss did not improve from 0.10401

Epoch 00010: val_loss did not improve from 0.10401





Epoch 00001: val_loss improved from inf to 0.05613, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05613 to 0.05594, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.05594

Epoch 00004: val_loss did not improve from 0.05594

Epoch 00005: val_loss did not improve from 0.05594

Epoch 00006: val_loss did not improve from 0.05594

Epoch 00007: val_loss did not improve from 0.05594

Epoch 00008: val_loss did not improve from 0.05594

Epoch 00009: val_loss did not improve from 0.05594

Epoch 00010: val_loss did not improve from 0.05594





Epoch 00001: val_loss improved from inf to 0.11404, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11404

Epoch 00003: val_loss did not improve from 0.11404

Epoch 00004: val_loss did not improve from 0.11404

Epoch 00005: val_loss did not improve from 0.11404

Epoch 00006: val_loss did not improve from 0.11404

Epoch 00007: val_loss did not improve from 0.11404

Epoch 00008: val_loss did not improve from 0.11404

Epoch 00009: val_loss did not improve from 0.11404

Epoch 00010: val_loss did not improve from 0.11404





Epoch 00001: val_loss improved from inf to 0.09663, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09663

Epoch 00003: val_loss did not improve from 0.09663

Epoch 00004: val_loss did not improve from 0.09663

Epoch 00005: val_loss did not improve from 0.09663

Epoch 00006: val_loss did not improve from 0.09663

Epoch 00007: val_loss did not improve from 0.09663

Epoch 00008: val_loss did not improve from 0.09663

Epoch 00009: val_loss did not improve from 0.09663

Epoch 00010: val_loss did not improve from 0.09663




AlgoCrossValIter - 8
Model: "sequential_48"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.36099, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.36099 to 0.31543, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.31543 to 0.24334, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.24334

Epoch 00005: val_loss 




Epoch 00001: val_loss improved from inf to 0.12230, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.12230 to 0.08992, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08992

Epoch 00004: val_loss did not improve from 0.08992

Epoch 00005: val_loss did not improve from 0.08992

Epoch 00006: val_loss did not improve from 0.08992

Epoch 00007: val_loss did not improve from 0.08992

Epoch 00008: val_loss did not improve from 0.08992

Epoch 00009: val_loss did not improve from 0.08992

Epoch 00010: val_loss did not improve from 0.08992





Epoch 00001: val_loss improved from inf to 0.14865, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.14865 to 0.11853, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.11853

Epoch 00004: val_loss did not improve from 0.11853

Epoch 00005: val_loss did not improve from 0.11853

Epoch 00006: val_loss did not improve from 0.11853

Epoch 00007: val_loss did not improve from 0.11853

Epoch 00008: val_loss did not improve from 0.11853

Epoch 00009: val_loss did not improve from 0.11853

Epoch 00010: val_loss did not improve from 0.11853





Epoch 00001: val_loss improved from inf to 0.05411, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05411

Epoch 00003: val_loss did not improve from 0.05411

Epoch 00004: val_loss did not improve from 0.05411

Epoch 00005: val_loss did not improve from 0.05411

Epoch 00006: val_loss did not improve from 0.05411

Epoch 00007: val_loss did not improve from 0.05411

Epoch 00008: val_loss did not improve from 0.05411

Epoch 00009: val_loss did not improve from 0.05411

Epoch 00010: val_loss did not improve from 0.05411





Epoch 00001: val_loss improved from inf to 0.07285, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07285

Epoch 00003: val_loss did not improve from 0.07285

Epoch 00004: val_loss did not improve from 0.07285

Epoch 00005: val_loss did not improve from 0.07285

Epoch 00006: val_loss did not improve from 0.07285

Epoch 00007: val_loss did not improve from 0.07285

Epoch 00008: val_loss did not improve from 0.07285

Epoch 00009: val_loss did not improve from 0.07285

Epoch 00010: val_loss did not improve from 0.07285





Epoch 00001: val_loss improved from inf to 0.05294, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05294

Epoch 00003: val_loss did not improve from 0.05294

Epoch 00004: val_loss did not improve from 0.05294

Epoch 00005: val_loss did not improve from 0.05294

Epoch 00006: val_loss did not improve from 0.05294

Epoch 00007: val_loss did not improve from 0.05294

Epoch 00008: val_loss did not improve from 0.05294

Epoch 00009: val_loss did not improve from 0.05294

Epoch 00010: val_loss did not improve from 0.05294





Epoch 00001: val_loss improved from inf to 0.11812, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11812

Epoch 00003: val_loss improved from 0.11812 to 0.11527, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.11527

Epoch 00005: val_loss did not improve from 0.11527

Epoch 00006: val_loss did not improve from 0.11527

Epoch 00007: val_loss did not improve from 0.11527

Epoch 00008: val_loss did not improve from 0.11527

Epoch 00009: val_loss did not improve from 0.11527

Epoch 00010: val_loss did not improve from 0.11527





Epoch 00001: val_loss improved from inf to 0.05902, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05902

Epoch 00003: val_loss did not improve from 0.05902

Epoch 00004: val_loss did not improve from 0.05902

Epoch 00005: val_loss did not improve from 0.05902

Epoch 00006: val_loss did not improve from 0.05902

Epoch 00007: val_loss did not improve from 0.05902

Epoch 00008: val_loss did not improve from 0.05902

Epoch 00009: val_loss did not improve from 0.05902

Epoch 00010: val_loss did not improve from 0.05902





Epoch 00001: val_loss improved from inf to 0.10985, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10985

Epoch 00003: val_loss did not improve from 0.10985

Epoch 00004: val_loss did not improve from 0.10985

Epoch 00005: val_loss did not improve from 0.10985

Epoch 00006: val_loss did not improve from 0.10985

Epoch 00007: val_loss did not improve from 0.10985

Epoch 00008: val_loss did not improve from 0.10985

Epoch 00009: val_loss did not improve from 0.10985

Epoch 00010: val_loss did not improve from 0.10985





Epoch 00001: val_loss improved from inf to 0.10813, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10813

Epoch 00003: val_loss did not improve from 0.10813

Epoch 00004: val_loss did not improve from 0.10813

Epoch 00005: val_loss did not improve from 0.10813

Epoch 00006: val_loss did not improve from 0.10813

Epoch 00007: val_loss did not improve from 0.10813

Epoch 00008: val_loss did not improve from 0.10813

Epoch 00009: val_loss did not improve from 0.10813

Epoch 00010: val_loss did not improve from 0.10813




AlgoCrossValIter - 9
Model: "sequential_49"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.37315, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.37315 to 0.36440, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.36440 to 0.26072, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.26072

Epoch 00005: val_loss 




Epoch 00001: val_loss improved from inf to 0.10470, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10470

Epoch 00003: val_loss did not improve from 0.10470

Epoch 00004: val_loss did not improve from 0.10470

Epoch 00005: val_loss did not improve from 0.10470

Epoch 00006: val_loss did not improve from 0.10470

Epoch 00007: val_loss did not improve from 0.10470

Epoch 00008: val_loss did not improve from 0.10470

Epoch 00009: val_loss did not improve from 0.10470

Epoch 00010: val_loss did not improve from 0.10470





Epoch 00001: val_loss improved from inf to 0.13643, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.13643 to 0.11787, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.11787

Epoch 00004: val_loss did not improve from 0.11787

Epoch 00005: val_loss did not improve from 0.11787

Epoch 00006: val_loss did not improve from 0.11787

Epoch 00007: val_loss did not improve from 0.11787

Epoch 00008: val_loss did not improve from 0.11787

Epoch 00009: val_loss did not improve from 0.11787

Epoch 00010: val_loss did not improve from 0.11787





Epoch 00001: val_loss improved from inf to 0.05400, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05400

Epoch 00003: val_loss did not improve from 0.05400

Epoch 00004: val_loss did not improve from 0.05400

Epoch 00005: val_loss did not improve from 0.05400

Epoch 00006: val_loss did not improve from 0.05400

Epoch 00007: val_loss did not improve from 0.05400

Epoch 00008: val_loss did not improve from 0.05400

Epoch 00009: val_loss did not improve from 0.05400

Epoch 00010: val_loss did not improve from 0.05400





Epoch 00001: val_loss improved from inf to 0.10371, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10371

Epoch 00003: val_loss did not improve from 0.10371

Epoch 00004: val_loss did not improve from 0.10371

Epoch 00005: val_loss did not improve from 0.10371

Epoch 00006: val_loss did not improve from 0.10371

Epoch 00007: val_loss did not improve from 0.10371

Epoch 00008: val_loss did not improve from 0.10371

Epoch 00009: val_loss did not improve from 0.10371

Epoch 00010: val_loss did not improve from 0.10371





Epoch 00001: val_loss improved from inf to 0.03737, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03737

Epoch 00003: val_loss did not improve from 0.03737

Epoch 00004: val_loss did not improve from 0.03737

Epoch 00005: val_loss did not improve from 0.03737

Epoch 00006: val_loss did not improve from 0.03737

Epoch 00007: val_loss did not improve from 0.03737

Epoch 00008: val_loss did not improve from 0.03737

Epoch 00009: val_loss did not improve from 0.03737

Epoch 00010: val_loss did not improve from 0.03737





Epoch 00001: val_loss improved from inf to 0.08403, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08403

Epoch 00003: val_loss did not improve from 0.08403

Epoch 00004: val_loss did not improve from 0.08403

Epoch 00005: val_loss did not improve from 0.08403

Epoch 00006: val_loss did not improve from 0.08403

Epoch 00007: val_loss did not improve from 0.08403

Epoch 00008: val_loss did not improve from 0.08403

Epoch 00009: val_loss did not improve from 0.08403

Epoch 00010: val_loss did not improve from 0.08403





Epoch 00001: val_loss improved from inf to 0.05106, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05106

Epoch 00003: val_loss did not improve from 0.05106

Epoch 00004: val_loss did not improve from 0.05106

Epoch 00005: val_loss did not improve from 0.05106

Epoch 00006: val_loss did not improve from 0.05106

Epoch 00007: val_loss did not improve from 0.05106

Epoch 00008: val_loss did not improve from 0.05106

Epoch 00009: val_loss did not improve from 0.05106

Epoch 00010: val_loss did not improve from 0.05106





Epoch 00001: val_loss improved from inf to 0.11594, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11594

Epoch 00003: val_loss did not improve from 0.11594

Epoch 00004: val_loss did not improve from 0.11594

Epoch 00005: val_loss did not improve from 0.11594

Epoch 00006: val_loss did not improve from 0.11594

Epoch 00007: val_loss did not improve from 0.11594

Epoch 00008: val_loss did not improve from 0.11594

Epoch 00009: val_loss did not improve from 0.11594

Epoch 00010: val_loss did not improve from 0.11594





Epoch 00001: val_loss improved from inf to 0.10937, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10937

Epoch 00003: val_loss did not improve from 0.10937

Epoch 00004: val_loss did not improve from 0.10937

Epoch 00005: val_loss did not improve from 0.10937

Epoch 00006: val_loss did not improve from 0.10937

Epoch 00007: val_loss did not improve from 0.10937

Epoch 00008: val_loss did not improve from 0.10937

Epoch 00009: val_loss did not improve from 0.10937

Epoch 00010: val_loss did not improve from 0.10937




AlgoCrossValIter - 10
Model: "sequential_50"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.40509, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.40509 to 0.37090, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.37090 to 0.24475, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.24475 to 0.22375, saving model to b




Epoch 00001: val_loss improved from inf to 0.13771, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.13771 to 0.11004, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.11004

Epoch 00004: val_loss did not improve from 0.11004

Epoch 00005: val_loss did not improve from 0.11004

Epoch 00006: val_loss did not improve from 0.11004

Epoch 00007: val_loss did not improve from 0.11004

Epoch 00008: val_loss did not improve from 0.11004

Epoch 00009: val_loss did not improve from 0.11004

Epoch 00010: val_loss did not improve from 0.11004





Epoch 00001: val_loss improved from inf to 0.09921, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09921

Epoch 00003: val_loss did not improve from 0.09921

Epoch 00004: val_loss did not improve from 0.09921

Epoch 00005: val_loss did not improve from 0.09921

Epoch 00006: val_loss did not improve from 0.09921

Epoch 00007: val_loss did not improve from 0.09921

Epoch 00008: val_loss did not improve from 0.09921

Epoch 00009: val_loss did not improve from 0.09921

Epoch 00010: val_loss did not improve from 0.09921





Epoch 00001: val_loss improved from inf to 0.11420, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.11420 to 0.06860, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06860

Epoch 00004: val_loss did not improve from 0.06860

Epoch 00005: val_loss did not improve from 0.06860

Epoch 00006: val_loss did not improve from 0.06860

Epoch 00007: val_loss did not improve from 0.06860

Epoch 00008: val_loss did not improve from 0.06860

Epoch 00009: val_loss did not improve from 0.06860

Epoch 00010: val_loss did not improve from 0.06860





Epoch 00001: val_loss improved from inf to 0.07700, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07700

Epoch 00003: val_loss did not improve from 0.07700

Epoch 00004: val_loss did not improve from 0.07700

Epoch 00005: val_loss did not improve from 0.07700

Epoch 00006: val_loss did not improve from 0.07700

Epoch 00007: val_loss did not improve from 0.07700

Epoch 00008: val_loss did not improve from 0.07700

Epoch 00009: val_loss did not improve from 0.07700

Epoch 00010: val_loss did not improve from 0.07700





Epoch 00001: val_loss improved from inf to 0.05103, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05103

Epoch 00003: val_loss did not improve from 0.05103

Epoch 00004: val_loss did not improve from 0.05103

Epoch 00005: val_loss did not improve from 0.05103

Epoch 00006: val_loss did not improve from 0.05103

Epoch 00007: val_loss did not improve from 0.05103

Epoch 00008: val_loss did not improve from 0.05103

Epoch 00009: val_loss did not improve from 0.05103

Epoch 00010: val_loss did not improve from 0.05103





Epoch 00001: val_loss improved from inf to 0.10273, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10273 to 0.09907, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.09907

Epoch 00004: val_loss did not improve from 0.09907

Epoch 00005: val_loss did not improve from 0.09907

Epoch 00006: val_loss did not improve from 0.09907

Epoch 00007: val_loss did not improve from 0.09907

Epoch 00008: val_loss did not improve from 0.09907

Epoch 00009: val_loss did not improve from 0.09907

Epoch 00010: val_loss did not improve from 0.09907





Epoch 00001: val_loss improved from inf to 0.07299, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07299 to 0.06267, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.06267 to 0.05911, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.05911

Epoch 00005: val_loss did not improve from 0.05911

Epoch 00006: val_loss did not improve from 0.05911

Epoch 00007: val_loss did not improve from 0.05911

Epoch 00008: val_loss did not improve from 0.05911

Epoch 00009: val_loss did not improve from 0.05911

Epoch 00010: val_loss did not improve from 0.05911





Epoch 00001: val_loss improved from inf to 0.09712, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09712

Epoch 00003: val_loss did not improve from 0.09712

Epoch 00004: val_loss did not improve from 0.09712

Epoch 00005: val_loss did not improve from 0.09712

Epoch 00006: val_loss did not improve from 0.09712

Epoch 00007: val_loss did not improve from 0.09712

Epoch 00008: val_loss did not improve from 0.09712

Epoch 00009: val_loss did not improve from 0.09712

Epoch 00010: val_loss did not improve from 0.09712





Epoch 00001: val_loss improved from inf to 0.09797, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09797

Epoch 00003: val_loss did not improve from 0.09797

Epoch 00004: val_loss did not improve from 0.09797

Epoch 00005: val_loss did not improve from 0.09797

Epoch 00006: val_loss did not improve from 0.09797

Epoch 00007: val_loss did not improve from 0.09797

Epoch 00008: val_loss did not improve from 0.09797

Epoch 00009: val_loss did not improve from 0.09797

Epoch 00010: val_loss did not improve from 0.09797




In [448]:
normalization_strategy

In [449]:
resultCrossVal.to_csv("results.csv")
resultCrossVal

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
P_val,85.865,87.766,84.466,82.757,86.123,84.233,85.199,83.937,84.483,83.136
P_train,87.662,88.522,86.38,87.818,87.784,87.509,85.594,85.634,87.593,86.859
P_ewo,75.604,78.046,75.42,76.948,74.239,76.541,73.083,76.064,77.637,73.709
R_val,79.609,78.958,81.823,80.76,79.053,77.871,81.255,79.701,80.776,81.164
R_train,83.441,81.365,84.298,82.62,81.812,84.027,85.503,85.431,83.0,84.455
R_ewo,65.556,61.946,68.426,66.111,65.832,68.796,68.981,69.352,67.686,69.167
F1-val,81.635,82.026,82.473,81.072,81.043,80.172,82.349,81.156,81.528,81.21
F1-train,85.361,84.37,85.15,84.876,84.468,85.346,85.462,85.438,85.15,85.542
F1-ewo,69.74,68.042,71.344,70.823,69.137,71.84,70.672,72.35,72.209,71.104


In [450]:
resultCrossVal.mean(axis=1).to_frame()

Unnamed: 0,0
P_val,84.7965
P_train,87.1355
P_ewo,75.7291
R_val,80.097
R_train,83.5952
R_ewo,67.1853
F1-val,81.4664
F1-train,85.1163
F1-ewo,70.7261


In [451]:
resultCrossVal.std(axis=1).to_frame()

Unnamed: 0,0
P_val,1.493015
P_train,0.98391
P_ewo,1.655946
R_val,1.251507
R_train,1.410758
R_ewo,2.340966
F1-val,0.692016
F1-train,0.415549
F1-ewo,1.387074


In [452]:
# trainByTagResult.to_csv("results/train-by-tag.csv")
# trainByTagResult

In [453]:
# trainByTagResult.mean(axis=1).to_frame()

In [454]:
# trainByTagResult.std(axis=1).to_frame()

In [455]:
# testByTagResult.to_csv("results/test-by-tag.csv")
# testByTagResult

In [456]:
# testByTagResult.mean(axis=1).to_frame()

In [457]:
# testByTagResult.std(axis=1).to_frame()

In [458]:
# ewoByTagResult.to_csv("results/ewo-by-tag.csv")

In [459]:
# ewoByTagResult = pd.read_csv("results/ewo-by-tag.csv", index_col=0)
# ewoByTagResult

In [460]:
# ewoByTagResult.mean(axis=1).to_frame()

In [461]:
# ewoByTagResult.std(axis=1).to_frame()

In [462]:
# columns = en_fingerprints.columns

# print("Pred", "Real", "Freq", "Word", sep="\t")
# for c in columns:
#     prediction = model.predict(en_fingerprints[c].values.reshape((1, 210)))
#     pred_tag = int2tag[np.argmax(prediction)]
#     real_tag = en_corpus[en_corpus.word == c].iloc[0]['ne-tag']
    
#     if pred_tag != real_tag:
#         print(pred_tag, real_tag, en_fingerprints[c].max(), c, sep="\t")