In [1]:
# import
import keras
import sys
import numpy as np
import string
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense
from keras.utils import np_utils, plot_model
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn import model_selection
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, precision_recall_fscore_support
import h5py as h5py

Using TensorFlow backend.


In [197]:
# if we are doeing binary classification. That means say if a token is a named entity or not
BINARY = False

# number of epochs for training
epochs = 10 

# the english side of the corpus
en_corpus_file = "corpus-en.txt"

# the ewondo side of the corpus
ewo_corpus_file = "corpus-ewo.txt"

# name of the file to same the model 
best_model_file = "best-model-conll.hdfs"

# The maximal number of phrases to use
max_nb_of_phrases =  -1

# the maximal number of duplicates for each word in the corpus
duplication = 1

# wether we are using only the vocabulary, ro redundancy
is_only_vocab = True

# if word should be shuffle or not
shuffle = is_only_vocab

# normalization strategy
# log, max, mean_log, log_inv, max_inv or mean_log_inv, tf
# None: for no normalization => 1/tf = nbWC/nbOcc(w)
normalization_strategy = None

# if we are using the Zennaki et al. signature
is_zennaki = False

# the number of neurons in the first layer
h1_size = 640

# number of neurons in the second layer
h2_size = 160  

In [198]:
def getTag(aString):
    """
        convert a string to a tag
    """
    tag = "O"
    if BINARY:
        if aString != "O":
            return "NE"
    else:
        tag = aString
    return tag
     

In [199]:
def load_corpus(file, max_nb_of_phrases):
    """
    Load a corpus stored in a file
    Input:
        - file: the name of the file of the corpus
        - max_nb_of_phases: maximal number of phrases to load
    
    Return:
        - a DataFrame representing the corpus
        - the number of phrases in the corpus
    """
    nb_of_phrases = 0
    dataset = {"word": [], "ne-tag": []}
    with open(file) as f:
        prev_line = None
        for cpt, line in enumerate(f):
            if cpt == 0:
                continue
            if nb_of_phrases == max_nb_of_phrases:
                break;

            l = line.strip()
            if len(l) == 0 and len(prev_line) != 0:
                nb_of_phrases += 1
                dataset["word"].append(line)
                dataset["ne-tag"].append(None)
            else:
                l = l.split("\t")
                if l[0] not in string.punctuation:
                    dataset["word"].append(l[0])
                    dataset["ne-tag"].append(ne_type(l[1]))
            prev_line = line.strip()
        
    return pd.DataFrame(dataset), nb_of_phrases+1

In [200]:
def log_normalization(fingerprints):
    fingerprints[fingerprints > 0] = 1 / fingerprints[fingerprints > 0] # get tf = nbOcc(w)/nbWC
    fingerprints[fingerprints > 0] = 1 + np.log(fingerprints[fingerprints > 0])
    return fingerprints

In [201]:
def max_normalization(fingerprints):
    fingerprints[fingerprints > 0] = 1 / fingerprints[fingerprints > 0] # get tf = nbOcc(w)/nbWC
    maxis = fingerprints.max(axis = 1)
    fingerprints[fingerprints > 0] = fingerprints[fingerprints > 0].apply(lambda row: 0.5 + 0.5 * row / maxis[row.index])
    return fingerprints

In [202]:
def mean_log_normalization(fingerprints):
    fingerprints[fingerprints > 0] = 1 / fingerprints[fingerprints > 0] # get tf = nbOcc(w)/nbWC
    means = fingerprints.mean(axis=1)
    fingerprints[fingerprints > 0] = fingerprints[fingerprints > 0].apply(lambda row: (1 + np.log(row)) / 1 + np.log(means[row.index]))
    return fingerprints

In [203]:
def normalize(fingerprints):
    if normalization_strategy == "log":
        return log_normalization(fingerprints)
    elif normalization_strategy == "max":
        return max_normalization(fingerprints)
    elif normalization_strategy == "mean_log":
        return mean_log_normalization(fingerprints)
    elif normalization_strategy == "log_inv":
        fp = log_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "max_inv":
        fp = max_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "mean_log_inv":
        fp = mean_log_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "tf":
        fp = fingerprints
        fp[fp > 0] = 1 / fp[fp > 0]
        return fp
    else:
        return fingerprints

In [204]:
def corpus_fingerprint(aDataframe, nb_of_biphrases):
    """
    Create the distributionnal signature of each word in the corpus
    Input:
        -aDataFrame: the corpus DataFrame
        -nb_of_biphrases: number of phrases in the corpus
    Return:
        a DataFrame: corpus fingerprint, the columns are the words in the corpus
    """
    print("Normalization strategy:", normalization_strategy)
    fingerprints = {}
    current_bi_phrase_index = 0
    nb_word_in_corpus = aDataframe[aDataframe.word != "\n"].word.size
    words_in_current_phrase = []
    for index, row in aDataframe.iterrows():
        if current_bi_phrase_index > nb_of_biphrases:
            break
            
        word = row['word']
        
        if word != "\n":
            words_in_current_phrase.append(word)
            if word not in fingerprints:
                fingerprints[word] = np.zeros(nb_of_biphrases, dtype=np.float32)
            fingerprints[word][current_bi_phrase_index] += 1
        else:
            nb_word_in_current_phrase = len(words_in_current_phrase)
            current_bi_phrase_index += 1
            words_in_current_phrase = []
        
    for word in fingerprints:
        for i in range(nb_of_biphrases):
            if fingerprints[word][i] != 0:
                fingerprints[word][i] = nb_word_in_corpus / fingerprints[word][i]
    ret = pd.DataFrame(fingerprints)
    
    if is_zennaki:
        ret[ret > 0] = 1
        
    return normalize(ret)

In [205]:
def corpus2trainingdata(aDataframe, fingerprintsDataFrame):
    """
    Convert corpus to training data => numpy array
    
    Input:
        -aDataFrame: Corpus dataframe
        -fingerprintsDataFrame: distributionnal signature of words in the corpus
    Return:
        (X, y): X is the array of words (signature) in the corpus and y is the corresponding labels (NE tags)
    """
    X = np.zeros((aDataframe.shape[0], fingerprintsDataFrame.shape[0]), dtype=np.int8)
    y = np.zeros(aDataframe.shape[0], dtype=np.int8)
    i = 0
    for row in aDataframe.iterrows():
        X[i] = fingerprintsDataFrame[row[1]['word']].values
        y[i] = tag2int[getTag(row[1]['ne-tag'])]
        i += 1
    return X, y

In [206]:
# A utility function to convert NE tags
def ne_type(aType):
    aType = aType.lower()
    if 'per' in aType:
        t =  'NE' if BINARY else 'PER' 
    elif 'loc' in aType:
        t =  'NE' if BINARY else 'LOC'
    elif 'org' in aType:
        t =  'NE' if BINARY else 'ORG'
    elif 'hour' in aType:
        t =  'NE' if BINARY else 'MISC'
    elif aType != 'o' and len(aType) > 0 :
        t =  'NE' if BINARY else 'MISC'
    else:
        t = 'O'
    return t

In [207]:
def P_R_F1(y_pred, y_true, neg_class):
    same = y_pred[y_true==y_pred]
    tp = same[same != neg_class].size
    nb_of_pos_exple = y_true[y_true != neg_class].size
    nb_of_pos_pred = y_pred[y_pred != neg_class].size
    p = r = f1 = 0
    try:
        p = np.round(tp*100/nb_of_pos_pred, 2)
    except ZeroDivisionError:
        print("number of correct positive predictions is 0")
        
    try:
        r = np.round(tp*100/nb_of_pos_exple, 2)
    except ZeroDivisionError:
        print("number of position exple is 0")
        
    try:
        f1 = np.round(2*r*p/(r+p), 2)
    except ZeroDivisionError:
        print("Recall and precision are 0")

    return p, r, f1

In [208]:
def shuffle_data(X, y):
    indices = [i for i in  range(X.shape[0])]
    np.random.shuffle(indices)
    return X[indices], y[indices]

In [209]:
def create_model(input_dim, output_dim):
    model = Sequential()
    model.add(Dense(h1_size, input_dim=input_dim, activation='sigmoid', name="hidden1"))
    model.add(Dense(h2_size, activation='sigmoid', name="hidden2"))
    if BINARY:
        model.add(Dense(1, activation='sigmoid', name="outputlayer"))
        model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['binary_accuracy'])
    else:
        model.add(Dense(output_dim, activation='softmax', name="outputlayer"))
        model.compile(loss='categorical_crossentropy', optimizer="rmsprop", metrics=['accuracy'])
    model.summary()
    return model

In [210]:
def train_model(model, X_train, y_train, X_val, y_val, epochs=epochs):
    # stop learning if the error is the same between two consecutive epochs
    early_stop = EarlyStopping(patience=20, verbose=2)
    
    # saving best model
    best_model_cp = ModelCheckpoint(best_model_file, save_best_only=True, verbose=1)
    
    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, verbose=0, shuffle=shuffle, callbacks=[best_model_cp, early_stop])
    
    #loading and returning the best model
    return keras.models.load_model(best_model_file)

In [211]:
def predict(model, X, y, binary=BINARY):
    if BINARY:
        y_pred = np.round(model.predict(X))
        y_true = y
    else:
        predictions = model.predict(X)
        y_pred = np.array([np.argmax(p) for p in predictions])
        y_true = np.array([np.argmax(t) for t in y ])
    return y_true, y_pred

In [212]:
def model_performance(y_true, y_pred):
    return P_R_F1(y_pred, y_true, tag2int['O']) #precision, recall, f1-score

In [213]:
def model_performace_by_tag(y_true, y_pred, tag):
    p, r, f1 = 0, 0, 0
    
    eq = y_pred[y_pred==y_true]
    correctly_pred = eq[eq==tag].size
    try:
        p = np.round(100 * correctly_pred / y_pred[y_pred==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        r = np.round(100 * correctly_pred / y_true[y_true==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        f1 = np.round(2 * r * p / (r + p), 2)
    except ZeroDivisionError:
        pass
    
    return p, r, f1

In [214]:
def algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, epochs=epochs, model=None):
    """
    Train a model on (X, y) and validate on (X_val, y_val) then project on (X_ewo)
    """
    test_precision, train_precision, ewo_precision = [], [], []
    test_recall, train_recall, ewo_recall = [], [], []
    test_fscore, train_fscore, ewo_fscore = [], [], []
    
    test_result_by_tag = {}
    train_result_by_tag = {}
    ewo_result_by_tag = {}
    for t in tagSet:
        f1_key = "F1-"+t
        p_key = "P-"+t
        r_key = "R-"+t
        train_result_by_tag[f1_key], train_result_by_tag[p_key], train_result_by_tag[r_key] = [], [], []
        test_result_by_tag[f1_key], test_result_by_tag[p_key], test_result_by_tag[r_key] = [], [], []
        ewo_result_by_tag[f1_key], ewo_result_by_tag[p_key], ewo_result_by_tag[r_key] = [], [], []

    m = train_model(model, X_train, y_train, X_val, y_val, epochs=epochs)
        
    y_true, y_pred = predict(m, X_train, y_train)
    p_train, r_train, f1_train = model_performance(y_true, y_pred)
        
    y_true_val, y_pred_val = predict(m, X_val, y_val)
    p_val, r_val, f1_val = model_performance(y_true_val, y_pred_val)
        
    y_true_ewo, y_pred_ewo = predict(m, X_ewo, y_ewo) 
    p_ewo, r_ewo, f1_ewo = model_performance(y_true_ewo, y_pred_ewo)
        
    for t in range(len(int2tag)):
        f1_key = "F1-" + int2tag[t]
        p_key = "P-" + int2tag[t]
        r_key = "R-" + int2tag[t]
            
        p, r, f1 = model_performace_by_tag(y_true, y_pred, t)
        train_result_by_tag[p_key].append(p)
        train_result_by_tag[r_key].append(r)
        train_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_val, y_pred_val, t)
        test_result_by_tag[p_key].append(p)
        test_result_by_tag[r_key].append(r)
        test_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_ewo, y_pred_ewo, t)
        ewo_result_by_tag[p_key].append(p)
        ewo_result_by_tag[r_key].append(r)
        ewo_result_by_tag[f1_key].append(f1)
                
    test_precision.append(p_val)
    train_precision.append(p_train)
    ewo_precision.append(p_ewo)
        
    test_recall.append(r_val)
    train_recall.append(r_train)
    ewo_recall.append(r_ewo)
        
    test_fscore.append(f1_val)
    train_fscore.append(f1_train)
    ewo_fscore.append(f1_ewo)
    return pd.DataFrame({
        'P_val': test_precision, 
        'P_train': train_precision, 
        'P_ewo': ewo_precision, 'R_val': test_recall, 'R_train': train_recall, 
        'R_ewo': ewo_recall, 'F1-val': test_fscore, 'F1-train': train_fscore, 'F1-ewo': ewo_fscore}), pd.DataFrame(train_result_by_tag), pd.DataFrame(test_result_by_tag), pd.DataFrame(ewo_result_by_tag)

In [215]:
def algoCrossVal(X, y, X_ewo, y_ewo, k = 10, repeat=1): 
    """
    Traing a model with k-fold cross validation
    We train the model `repeat` times to check it's stability
    """
    block_size = int(X.shape[0] / k)   
    output = None
    model = None
    train_by_tags, test_by_tags, ewo_by_tags = None, None, None
    for it in range(repeat):
        print("AlgoCrossValIter -", it+1)
        model = create_model(X.shape[1], len(tagSet))
        results = None
        train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = None, None, None
        for i in range(k):
            X_val, y_val = X[i*block_size:i*block_size+block_size], y[i*block_size:i*block_size+block_size]
            X_train = np.concatenate((X[0:i*block_size], X[i*block_size+block_size:]))
            y_train = np.concatenate((y[0:i*block_size], y[i*block_size+block_size:]))

            X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
            X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])

            result, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)
            if results is None:
                results = result.copy()
                train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = train_by_tag.copy(), test_by_tag.copy(), ewo_by_tag.copy()
            else:
                results = pd.concat([results, result], ignore_index=True)
                train_by_tagsTmp = pd.concat([train_by_tagsTmp, train_by_tag], ignore_index=True)
                test_by_tagsTmp = pd.concat([test_by_tagsTmp, test_by_tag], ignore_index=True)
                ewo_by_tagsTmp = pd.concat([ewo_by_tagsTmp, ewo_by_tag], ignore_index=True)
        
        if output is None:
            output = results.mean(axis=0).to_frame()
            train_by_tags = train_by_tagsTmp.mean(axis=0).to_frame()
            test_by_tags = test_by_tagsTmp.mean(axis=0).to_frame()
            ewo_by_tags = ewo_by_tagsTmp.mean(axis=0).to_frame()
        else:
            output = pd.concat([output, results.mean(axis=0).to_frame()], axis=1)
            train_by_tags = pd.concat([train_by_tags, train_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            test_by_tags = pd.concat([test_by_tags, test_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            ewo_by_tags = pd.concat([ewo_by_tags, ewo_by_tagsTmp.mean(axis=0).to_frame()], axis=1)

    return output, train_by_tags, test_by_tags, ewo_by_tags, model

In [216]:
en_corpus, en_nb_of_phrases = load_corpus(en_corpus_file, max_nb_of_phrases)

In [217]:
nb_word_in_corpus = en_corpus[en_corpus.word != "\n"].word.size
print("Nb word in corpus", nb_word_in_corpus)

Nb word in corpus 4170


In [218]:
en_corpus.head()
en_corpus.loc[en_corpus['ne-tag'] == 'ORG']

Unnamed: 0,word,ne-tag
1335,Sadducees,ORG


In [219]:
tagSet = en_corpus["ne-tag"].dropna().unique()
if BINARY:
    tagSet = ['NE', 'O']
tag2int = {j: i for i, j in enumerate(tagSet)}
int2tag = {i: j for i, j in enumerate(tagSet)}
print(tag2int)

{'O': 0, 'MISC': 1, 'PER': 2, 'LOC': 3, 'ORG': 4}


In [220]:
en_nb_of_phrases

210

In [221]:
en_corpus.describe()

Unnamed: 0,word,ne-tag
count,4379,4170
unique,904,5
top,the,O
freq,313,3779


In [222]:
en_corpus.head(10)

Unnamed: 0,word,ne-tag
0,The,O
1,Promise,O
2,of,O
3,the,O
4,Holy,MISC
5,Spirit,MISC
6,\n,
7,In,O
8,the,O
9,first,O


In [223]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(en_corpus[en_corpus['ne-tag']==tag].shape[0] * 100 / en_corpus[en_corpus['ne-tag']!='\n'].shape[0], 2)))

O % = 86.3 %
MISC % = 2.4 %
PER % = 5.59 %
LOC % = 0.91 %
ORG % = 0.02 %


In [224]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(en_corpus[en_corpus['ne-tag']==tag].word.unique().shape[0] * 100 / en_corpus[en_corpus['ne-tag']!='\n'].word.unique().shape[0], 2)))

O % = 89.16 %
MISC % = 1.88 %
PER % = 8.96 %
LOC % = 1.99 %
ORG % = 0.11 %


In [225]:
en_corpus[en_corpus.word == "\n"].shape

(209, 2)

In [226]:
print("Nb of bi-phrases", en_nb_of_phrases)

Nb of bi-phrases 210


In [227]:
en_fingerprints = corpus_fingerprint(en_corpus, en_nb_of_phrases)

Normalization strategy: None


In [228]:
en_fingerprints.head(10)

Unnamed: 0,The,Promise,of,the,Holy,Spirit,In,first,book,O,...,considered,dream,She,save,fulfill,Immanuel,us),woke,sleep,knew
0,4170.0,4170.0,4170.0,4170.0,4170.0,4170.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,4170.0,0.0,0.0,4170.0,4170.0,4170.0,4170.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,1390.0,4170.0,4170.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,4170.0,4170.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,4170.0,2085.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,4170.0,4170.0,4170.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,4170.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,4170.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,4170.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,4170.0,1390.0,4170.0,4170.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [229]:
(4170 / nb_word_in_corpus)

1.0

In [230]:
en_corpus.shape

(4379, 2)

In [231]:
en_fingerprints['you'].values.shape

(210,)

In [232]:
en_corpus[en_corpus.word != "\n"].shape

(4170, 2)

In [233]:
if is_only_vocab:
    text = list(en_corpus[en_corpus.word != "\n"].word.unique())
else:
    text = list(en_corpus[en_corpus.word != "\n"].word)
en_vocab = pd.DataFrame({'text': text})
en_vocab.describe()

Unnamed: 0,text
count,903
unique,903
top,enemies
freq,1


In [234]:
if is_only_vocab:
    X = np.zeros((en_vocab.shape[0] * duplication, en_nb_of_phrases))
    target = np.zeros((en_vocab.shape[0] * duplication))
    p=0
    for i, row in en_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X[p] = en_fingerprints[c.split(" ")[0]]
            target[p] = tag2int[getTag(en_corpus[en_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X, target = shuffle_data(X, target)
    print(X.shape, en_fingerprints.shape, target.shape)

(903, 210) (210, 903) (903,)


In [235]:
en_vocab[-20:]

Unnamed: 0,text
883,Eliud
884,Eleazar
885,Matthan
886,husband
887,fourteen
888,unwilling
889,shame
890,resolved
891,divorce
892,quietly


In [236]:
if not is_only_vocab:
    X, target = corpus2trainingdata(en_corpus[en_corpus.word != "\n"], en_fingerprints)

In [237]:
if shuffle:
    X, target = shuffle_data(X, target)

In [238]:
y = target.copy()
y[0:100]
if not BINARY:
    y = np_utils.to_categorical(y, len(tagSet))
y.shape

(903, 5)

In [239]:
from sklearn.decomposition import PCA

def visualize(X, y):
    pca = PCA(n_components=2)
    X_embeded = pca.fit_transform(X)
    plt.figure(figsize=(5, 5))
    plt.scatter(X_embeded[:, 0], X_embeded[:, 1], c=y)
    plt.legend()
    plt.show()

In [240]:
# visualize(X, target)

In [241]:
X_train, X_val, y_train, y_val = model_selection.train_test_split(X, y, test_size=0.33)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])
print("X_train.shape =", X_train.shape)
print("y_train.shape =", y_train.shape)
print("X_val.shape =", X_val.shape)
print("y_val.shape =", y_val.shape)

tTarget = np.array([np.argmax(yy) for yy in y_train])
vTarget = np.array([np.argmax(yy) for yy in y_val])

for tag in tagSet:
    print("{0} % in training data = {1} %".format(tag, np.round(tTarget[tTarget==tag2int[tag]].size * 100 / tTarget.shape[0], 2)))
    print("{0} % in validation data = {1} %".format(tag, np.round(vTarget[vTarget==tag2int[tag]].size * 100 / vTarget.shape[0], 2)))

X_train.shape = (605, 210)
y_train.shape = (605, 5)
X_val.shape = (298, 210)
y_val.shape = (298, 5)
O % in training data = 89.26 %
O % in validation data = 86.24 %
MISC % in training data = 0.99 %
MISC % in validation data = 1.68 %
PER % in training data = 8.1 %
PER % in validation data = 9.4 %
LOC % in training data = 1.49 %
LOC % in validation data = 2.68 %
ORG % in training data = 0.17 %
ORG % in validation data = 0.0 %


In [242]:
ewo_corpus, ewo_nb_of_phrases = load_corpus(ewo_corpus_file, max_nb_of_phrases)

In [243]:
ewo_corpus.loc[ewo_corpus['ne-tag'] == 'PER']

Unnamed: 0,word,ne-tag
6,Teofil,PER
15,Yesus,PER
86,Yohannes,PER
104,Yesus,PER
230,Yesus,PER
...,...,...
3676,Maria,PER
3697,Yesus,PER
3740,Emmanuel,PER
3750,Yosef,PER


In [244]:
ewo_nb_of_phrases

210

In [245]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(ewo_corpus[ewo_corpus['ne-tag']==tag].shape[0] * 100 / ewo_corpus[ewo_corpus['ne-tag']!='\n'].shape[0], 2)))

O % = 84.15 %
MISC % = 2.54 %
PER % = 6.69 %
LOC % = 1.03 %
ORG % = 0.05 %


In [246]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(ewo_corpus[ewo_corpus['ne-tag']==tag].word.unique().shape[0] * 100 / ewo_corpus[ewo_corpus['ne-tag']!='\n'].word.unique().shape[0], 2)))

O % = 89.94 %
MISC % = 1.17 %
PER % = 8.3 %
LOC % = 1.86 %
ORG % = 0.2 %


In [247]:
ewo_corpus.describe()

Unnamed: 0,word,ne-tag
count,3779,3570
unique,1024,5
top,\n,O
freq,209,3180


In [248]:
ewo_corpus.head()

Unnamed: 0,word,ne-tag
0,Mfufub,MISC
1,Nsisim,MISC
2,ayi,O
3,sò,O
4,\n,


In [249]:
ewo_fingerprints = corpus_fingerprint(ewo_corpus, en_nb_of_phrases)

Normalization strategy: None


In [250]:
if is_only_vocab:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word.unique())
else:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word)
ewo_vocab = pd.DataFrame({"text":text})

In [251]:
if is_only_vocab:
    X_ewo = np.zeros((ewo_vocab.shape[0] * duplication, en_nb_of_phrases))
    ewo_target = np.zeros((ewo_vocab.shape[0] * duplication))
    p=0
    for i, row in ewo_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X_ewo[p] = ewo_fingerprints[c.split(" ")[0]]
            ewo_target[p] = tag2int[getTag(ewo_corpus[ewo_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [252]:
ewo_vocab[-10:]

Unnamed: 0,text
1013,sik
1014,Ntud
1015,bëyole
1016,Emmanuel
1017,Avëbë
1018,angavëbë
1019,oyò
1020,angabende
1021,anganòṅ
1022,angayole


In [253]:
if not is_only_vocab:
    X_ewo, ewo_target = corpus2trainingdata(ewo_corpus[ewo_corpus.word != "\n"], ewo_fingerprints)

In [254]:
if shuffle:
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [255]:
y_ewo = ewo_target.copy()
print(y_ewo.shape, len(ewo_vocab))

(1023,) 1023


In [256]:
X_ewo.shape

(1023, 210)

In [257]:
y_ewo = ewo_target.copy()
y_ewo[:20]
if not BINARY:
    y_ewo = np_utils.to_categorical(y_ewo)

In [258]:
X_ewo = X_ewo.reshape((X_ewo.shape[0], en_nb_of_phrases))

In [259]:
# model = create_model(X.shape[1], len(tagSet))
# resultEval, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)

In [260]:
# resultEval

In [261]:
# train_by_tag

In [262]:
# test_by_tag

In [263]:
# ewo_by_tag

In [264]:
# resultEval.mean()

In [265]:
# resultEval.std()

In [266]:
resultCrossVal, trainByTagResult, testByTagResult, ewoByTagResult, model = algoCrossVal(X, y, X_ewo, y_ewo, repeat=10)

AlgoCrossValIter - 1
Model: "sequential_21"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.60744, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.60744 to 0.46227, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.46227 to 0.41973, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.41973

Epoch 00005: val_loss 




Epoch 00001: val_loss improved from inf to 0.22644, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.22644 to 0.20648, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.20648 to 0.19487, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.19487

Epoch 00005: val_loss did not improve from 0.19487

Epoch 00006: val_loss did not improve from 0.19487

Epoch 00007: val_loss did not improve from 0.19487

Epoch 00008: val_loss did not improve from 0.19487

Epoch 00009: val_loss did not improve from 0.19487

Epoch 00010: val_loss did not improve from 0.19487





Epoch 00001: val_loss improved from inf to 0.05689, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05689

Epoch 00003: val_loss did not improve from 0.05689

Epoch 00004: val_loss did not improve from 0.05689

Epoch 00005: val_loss did not improve from 0.05689

Epoch 00006: val_loss did not improve from 0.05689

Epoch 00007: val_loss did not improve from 0.05689

Epoch 00008: val_loss did not improve from 0.05689

Epoch 00009: val_loss did not improve from 0.05689

Epoch 00010: val_loss did not improve from 0.05689





Epoch 00001: val_loss improved from inf to 0.09552, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09552

Epoch 00003: val_loss did not improve from 0.09552

Epoch 00004: val_loss did not improve from 0.09552

Epoch 00005: val_loss did not improve from 0.09552

Epoch 00006: val_loss did not improve from 0.09552

Epoch 00007: val_loss did not improve from 0.09552

Epoch 00008: val_loss did not improve from 0.09552

Epoch 00009: val_loss did not improve from 0.09552

Epoch 00010: val_loss did not improve from 0.09552

Epoch 00001: val_loss improved from inf to 0.08718, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.08718 to 0.08350, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08350

Epoch 00004: val_loss did not improve from 0.08350

Epoch 00005: val_loss did not improve from 0.08350

Epoch 00006: val_loss did not improve from 0.08350

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.09041, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09041 to 0.06749, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06749

Epoch 00004: val_loss did not improve from 0.06749

Epoch 00005: val_loss did not improve from 0.06749

Epoch 00006: val_loss did not improve from 0.06749

Epoch 00007: val_loss did not improve from 0.06749

Epoch 00008: val_loss did not improve from 0.06749

Epoch 00009: val_loss did not improve from 0.06749

Epoch 00010: val_loss did not improve from 0.06749





Epoch 00001: val_loss improved from inf to 0.05030, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05030

Epoch 00003: val_loss did not improve from 0.05030

Epoch 00004: val_loss did not improve from 0.05030

Epoch 00005: val_loss did not improve from 0.05030

Epoch 00006: val_loss did not improve from 0.05030

Epoch 00007: val_loss did not improve from 0.05030

Epoch 00008: val_loss did not improve from 0.05030

Epoch 00009: val_loss did not improve from 0.05030

Epoch 00010: val_loss did not improve from 0.05030





Epoch 00001: val_loss improved from inf to 0.05509, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05509

Epoch 00003: val_loss did not improve from 0.05509

Epoch 00004: val_loss did not improve from 0.05509

Epoch 00005: val_loss did not improve from 0.05509

Epoch 00006: val_loss did not improve from 0.05509

Epoch 00007: val_loss did not improve from 0.05509

Epoch 00008: val_loss did not improve from 0.05509

Epoch 00009: val_loss did not improve from 0.05509

Epoch 00010: val_loss did not improve from 0.05509





Epoch 00001: val_loss improved from inf to 0.03703, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03703

Epoch 00003: val_loss did not improve from 0.03703

Epoch 00004: val_loss did not improve from 0.03703

Epoch 00005: val_loss did not improve from 0.03703

Epoch 00006: val_loss did not improve from 0.03703

Epoch 00007: val_loss did not improve from 0.03703

Epoch 00008: val_loss did not improve from 0.03703

Epoch 00009: val_loss did not improve from 0.03703

Epoch 00010: val_loss did not improve from 0.03703





Epoch 00001: val_loss improved from inf to 0.10282, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10282

Epoch 00003: val_loss did not improve from 0.10282

Epoch 00004: val_loss did not improve from 0.10282

Epoch 00005: val_loss did not improve from 0.10282

Epoch 00006: val_loss did not improve from 0.10282

Epoch 00007: val_loss did not improve from 0.10282

Epoch 00008: val_loss did not improve from 0.10282

Epoch 00009: val_loss did not improve from 0.10282

Epoch 00010: val_loss did not improve from 0.10282




AlgoCrossValIter - 2
Model: "sequential_22"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.46917, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.46917 to 0.42567, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.42567

Epoch 00004: val_loss improved from 0.42567 to 0.38304, saving model to best-model-conll.hdfs

Epoch 00005: val_loss 




Epoch 00001: val_loss improved from inf to 0.31158, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.31158 to 0.20130, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.20130

Epoch 00004: val_loss did not improve from 0.20130

Epoch 00005: val_loss did not improve from 0.20130

Epoch 00006: val_loss did not improve from 0.20130

Epoch 00007: val_loss did not improve from 0.20130

Epoch 00008: val_loss did not improve from 0.20130

Epoch 00009: val_loss did not improve from 0.20130

Epoch 00010: val_loss did not improve from 0.20130





Epoch 00001: val_loss improved from inf to 0.09339, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09339

Epoch 00003: val_loss improved from 0.09339 to 0.07881, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.07881

Epoch 00005: val_loss did not improve from 0.07881

Epoch 00006: val_loss did not improve from 0.07881

Epoch 00007: val_loss did not improve from 0.07881

Epoch 00008: val_loss did not improve from 0.07881

Epoch 00009: val_loss did not improve from 0.07881

Epoch 00010: val_loss did not improve from 0.07881





Epoch 00001: val_loss improved from inf to 0.10033, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10033

Epoch 00003: val_loss did not improve from 0.10033

Epoch 00004: val_loss did not improve from 0.10033

Epoch 00005: val_loss did not improve from 0.10033

Epoch 00006: val_loss did not improve from 0.10033

Epoch 00007: val_loss did not improve from 0.10033

Epoch 00008: val_loss did not improve from 0.10033

Epoch 00009: val_loss did not improve from 0.10033

Epoch 00010: val_loss did not improve from 0.10033

Epoch 00001: val_loss improved from inf to 0.08705, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08705

Epoch 00003: val_loss did not improve from 0.08705

Epoch 00004: val_loss did not improve from 0.08705

Epoch 00005: val_loss did not improve from 0.08705

Epoch 00006: val_loss did not improve from 0.08705

Epoch 00007: val_loss did not improve from 0.08705

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.10457, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10457 to 0.06934, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06934

Epoch 00004: val_loss did not improve from 0.06934

Epoch 00005: val_loss did not improve from 0.06934

Epoch 00006: val_loss did not improve from 0.06934

Epoch 00007: val_loss did not improve from 0.06934

Epoch 00008: val_loss did not improve from 0.06934

Epoch 00009: val_loss did not improve from 0.06934

Epoch 00010: val_loss did not improve from 0.06934





Epoch 00001: val_loss improved from inf to 0.04754, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04754

Epoch 00003: val_loss did not improve from 0.04754

Epoch 00004: val_loss did not improve from 0.04754

Epoch 00005: val_loss did not improve from 0.04754

Epoch 00006: val_loss did not improve from 0.04754

Epoch 00007: val_loss did not improve from 0.04754

Epoch 00008: val_loss did not improve from 0.04754

Epoch 00009: val_loss did not improve from 0.04754

Epoch 00010: val_loss did not improve from 0.04754





Epoch 00001: val_loss improved from inf to 0.04626, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04626

Epoch 00003: val_loss did not improve from 0.04626

Epoch 00004: val_loss did not improve from 0.04626

Epoch 00005: val_loss did not improve from 0.04626

Epoch 00006: val_loss did not improve from 0.04626

Epoch 00007: val_loss did not improve from 0.04626

Epoch 00008: val_loss did not improve from 0.04626

Epoch 00009: val_loss did not improve from 0.04626

Epoch 00010: val_loss did not improve from 0.04626

Epoch 00001: val_loss improved from inf to 0.04505, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04505

Epoch 00003: val_loss did not improve from 0.04505

Epoch 00004: val_loss did not improve from 0.04505

Epoch 00005: val_loss did not improve from 0.04505

Epoch 00006: val_loss did not improve from 0.04505

Epoch 00007: val_loss did not improve from 0.04505

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.17176, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.17176

Epoch 00003: val_loss did not improve from 0.17176

Epoch 00004: val_loss did not improve from 0.17176

Epoch 00005: val_loss did not improve from 0.17176

Epoch 00006: val_loss did not improve from 0.17176

Epoch 00007: val_loss did not improve from 0.17176

Epoch 00008: val_loss did not improve from 0.17176

Epoch 00009: val_loss did not improve from 0.17176

Epoch 00010: val_loss did not improve from 0.17176





Epoch 00001: val_loss improved from inf to 0.08651, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.08651 to 0.08531, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08531

Epoch 00004: val_loss did not improve from 0.08531

Epoch 00005: val_loss did not improve from 0.08531

Epoch 00006: val_loss did not improve from 0.08531

Epoch 00007: val_loss did not improve from 0.08531

Epoch 00008: val_loss did not improve from 0.08531

Epoch 00009: val_loss did not improve from 0.08531

Epoch 00010: val_loss did not improve from 0.08531





Epoch 00001: val_loss improved from inf to 0.14193, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.14193 to 0.13058, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.13058 to 0.10329, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.10329

Epoch 00005: val_loss did not improve from 0.10329

Epoch 00006: val_loss did not improve from 0.10329

Epoch 00007: val_loss did not improve from 0.10329

Epoch 00008: val_loss did not improve from 0.10329

Epoch 00009: val_loss did not improve from 0.10329

Epoch 00010: val_loss did not improve from 0.10329





Epoch 00001: val_loss improved from inf to 0.10517, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10517 to 0.09700, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.09700

Epoch 00004: val_loss did not improve from 0.09700

Epoch 00005: val_loss did not improve from 0.09700

Epoch 00006: val_loss did not improve from 0.09700

Epoch 00007: val_loss did not improve from 0.09700

Epoch 00008: val_loss did not improve from 0.09700

Epoch 00009: val_loss did not improve from 0.09700

Epoch 00010: val_loss did not improve from 0.09700





Epoch 00001: val_loss improved from inf to 0.08508, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08508

Epoch 00003: val_loss did not improve from 0.08508

Epoch 00004: val_loss did not improve from 0.08508

Epoch 00005: val_loss did not improve from 0.08508

Epoch 00006: val_loss did not improve from 0.08508

Epoch 00007: val_loss did not improve from 0.08508

Epoch 00008: val_loss did not improve from 0.08508

Epoch 00009: val_loss did not improve from 0.08508

Epoch 00010: val_loss did not improve from 0.08508





Epoch 00001: val_loss improved from inf to 0.05174, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05174

Epoch 00003: val_loss did not improve from 0.05174

Epoch 00004: val_loss did not improve from 0.05174

Epoch 00005: val_loss did not improve from 0.05174

Epoch 00006: val_loss did not improve from 0.05174

Epoch 00007: val_loss did not improve from 0.05174

Epoch 00008: val_loss did not improve from 0.05174

Epoch 00009: val_loss did not improve from 0.05174

Epoch 00010: val_loss did not improve from 0.05174





Epoch 00001: val_loss improved from inf to 0.04963, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04963

Epoch 00003: val_loss did not improve from 0.04963

Epoch 00004: val_loss did not improve from 0.04963

Epoch 00005: val_loss did not improve from 0.04963

Epoch 00006: val_loss did not improve from 0.04963

Epoch 00007: val_loss did not improve from 0.04963

Epoch 00008: val_loss did not improve from 0.04963

Epoch 00009: val_loss did not improve from 0.04963

Epoch 00010: val_loss did not improve from 0.04963





Epoch 00001: val_loss improved from inf to 0.05405, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05405 to 0.04741, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.04741 to 0.03759, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.03759 to 0.03756, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.03756

Epoch 00006: val_loss did not improve from 0.03756

Epoch 00007: val_loss did not improve from 0.03756

Epoch 00008: val_loss did not improve from 0.03756

Epoch 00009: val_loss did not improve from 0.03756

Epoch 00010: val_loss did not improve from 0.03756





Epoch 00001: val_loss improved from inf to 0.10433, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10433

Epoch 00003: val_loss did not improve from 0.10433

Epoch 00004: val_loss did not improve from 0.10433

Epoch 00005: val_loss did not improve from 0.10433

Epoch 00006: val_loss did not improve from 0.10433

Epoch 00007: val_loss did not improve from 0.10433

Epoch 00008: val_loss did not improve from 0.10433

Epoch 00009: val_loss did not improve from 0.10433

Epoch 00010: val_loss did not improve from 0.10433




AlgoCrossValIter - 4
Model: "sequential_24"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.51602, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.51602 to 0.48308, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.48308 to 0.46271, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.46271

Epoch 00005: val_loss 




Epoch 00001: val_loss improved from inf to 0.15484, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.15484

Epoch 00003: val_loss did not improve from 0.15484

Epoch 00004: val_loss did not improve from 0.15484

Epoch 00005: val_loss did not improve from 0.15484

Epoch 00006: val_loss did not improve from 0.15484

Epoch 00007: val_loss did not improve from 0.15484

Epoch 00008: val_loss did not improve from 0.15484

Epoch 00009: val_loss did not improve from 0.15484

Epoch 00010: val_loss did not improve from 0.15484





Epoch 00001: val_loss improved from inf to 0.05230, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05230

Epoch 00003: val_loss did not improve from 0.05230

Epoch 00004: val_loss did not improve from 0.05230

Epoch 00005: val_loss did not improve from 0.05230

Epoch 00006: val_loss did not improve from 0.05230

Epoch 00007: val_loss did not improve from 0.05230

Epoch 00008: val_loss did not improve from 0.05230

Epoch 00009: val_loss did not improve from 0.05230

Epoch 00010: val_loss did not improve from 0.05230





Epoch 00001: val_loss improved from inf to 0.08341, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08341

Epoch 00003: val_loss did not improve from 0.08341

Epoch 00004: val_loss did not improve from 0.08341

Epoch 00005: val_loss did not improve from 0.08341

Epoch 00006: val_loss did not improve from 0.08341

Epoch 00007: val_loss did not improve from 0.08341

Epoch 00008: val_loss did not improve from 0.08341

Epoch 00009: val_loss did not improve from 0.08341

Epoch 00010: val_loss did not improve from 0.08341





Epoch 00001: val_loss improved from inf to 0.08650, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08650

Epoch 00003: val_loss did not improve from 0.08650

Epoch 00004: val_loss improved from 0.08650 to 0.08452, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.08452

Epoch 00006: val_loss did not improve from 0.08452

Epoch 00007: val_loss did not improve from 0.08452

Epoch 00008: val_loss did not improve from 0.08452

Epoch 00009: val_loss did not improve from 0.08452

Epoch 00010: val_loss did not improve from 0.08452





Epoch 00001: val_loss improved from inf to 0.07649, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07649 to 0.07369, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07369

Epoch 00004: val_loss did not improve from 0.07369

Epoch 00005: val_loss did not improve from 0.07369

Epoch 00006: val_loss did not improve from 0.07369

Epoch 00007: val_loss did not improve from 0.07369

Epoch 00008: val_loss did not improve from 0.07369

Epoch 00009: val_loss did not improve from 0.07369

Epoch 00010: val_loss did not improve from 0.07369





Epoch 00001: val_loss improved from inf to 0.04988, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04988

Epoch 00003: val_loss did not improve from 0.04988

Epoch 00004: val_loss did not improve from 0.04988

Epoch 00005: val_loss did not improve from 0.04988

Epoch 00006: val_loss did not improve from 0.04988

Epoch 00007: val_loss did not improve from 0.04988

Epoch 00008: val_loss did not improve from 0.04988

Epoch 00009: val_loss did not improve from 0.04988

Epoch 00010: val_loss did not improve from 0.04988





Epoch 00001: val_loss improved from inf to 0.05674, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05674

Epoch 00003: val_loss improved from 0.05674 to 0.04874, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.04874

Epoch 00005: val_loss did not improve from 0.04874

Epoch 00006: val_loss did not improve from 0.04874

Epoch 00007: val_loss did not improve from 0.04874

Epoch 00008: val_loss did not improve from 0.04874

Epoch 00009: val_loss did not improve from 0.04874

Epoch 00010: val_loss did not improve from 0.04874

Epoch 00001: val_loss improved from inf to 0.03578, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03578

Epoch 00003: val_loss did not improve from 0.03578

Epoch 00004: val_loss did not improve from 0.03578

Epoch 00005: val_loss did not improve from 0.03578

Epoch 00006: val_loss did not improve from 0.03578

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.18001, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.18001

Epoch 00003: val_loss did not improve from 0.18001

Epoch 00004: val_loss did not improve from 0.18001

Epoch 00005: val_loss did not improve from 0.18001

Epoch 00006: val_loss did not improve from 0.18001

Epoch 00007: val_loss did not improve from 0.18001

Epoch 00008: val_loss did not improve from 0.18001

Epoch 00009: val_loss did not improve from 0.18001

Epoch 00010: val_loss did not improve from 0.18001





Epoch 00001: val_loss improved from inf to 0.06174, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06174

Epoch 00003: val_loss did not improve from 0.06174

Epoch 00004: val_loss did not improve from 0.06174

Epoch 00005: val_loss did not improve from 0.06174

Epoch 00006: val_loss did not improve from 0.06174

Epoch 00007: val_loss did not improve from 0.06174

Epoch 00008: val_loss did not improve from 0.06174

Epoch 00009: val_loss did not improve from 0.06174

Epoch 00010: val_loss did not improve from 0.06174





Epoch 00001: val_loss improved from inf to 0.18147, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.18147 to 0.10121, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.10121

Epoch 00004: val_loss did not improve from 0.10121

Epoch 00005: val_loss did not improve from 0.10121

Epoch 00006: val_loss did not improve from 0.10121

Epoch 00007: val_loss did not improve from 0.10121

Epoch 00008: val_loss did not improve from 0.10121

Epoch 00009: val_loss did not improve from 0.10121

Epoch 00010: val_loss did not improve from 0.10121





Epoch 00001: val_loss improved from inf to 0.08272, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08272

Epoch 00003: val_loss did not improve from 0.08272

Epoch 00004: val_loss did not improve from 0.08272

Epoch 00005: val_loss did not improve from 0.08272

Epoch 00006: val_loss did not improve from 0.08272

Epoch 00007: val_loss did not improve from 0.08272

Epoch 00008: val_loss did not improve from 0.08272

Epoch 00009: val_loss did not improve from 0.08272

Epoch 00010: val_loss did not improve from 0.08272





Epoch 00001: val_loss improved from inf to 0.06308, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06308

Epoch 00003: val_loss did not improve from 0.06308

Epoch 00004: val_loss did not improve from 0.06308

Epoch 00005: val_loss did not improve from 0.06308

Epoch 00006: val_loss did not improve from 0.06308

Epoch 00007: val_loss did not improve from 0.06308

Epoch 00008: val_loss did not improve from 0.06308

Epoch 00009: val_loss did not improve from 0.06308

Epoch 00010: val_loss did not improve from 0.06308





Epoch 00001: val_loss improved from inf to 0.05577, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05577

Epoch 00003: val_loss improved from 0.05577 to 0.05310, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.05310

Epoch 00005: val_loss did not improve from 0.05310

Epoch 00006: val_loss did not improve from 0.05310

Epoch 00007: val_loss did not improve from 0.05310

Epoch 00008: val_loss did not improve from 0.05310

Epoch 00009: val_loss did not improve from 0.05310

Epoch 00010: val_loss did not improve from 0.05310





Epoch 00001: val_loss improved from inf to 0.05087, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05087

Epoch 00003: val_loss did not improve from 0.05087

Epoch 00004: val_loss did not improve from 0.05087

Epoch 00005: val_loss did not improve from 0.05087

Epoch 00006: val_loss did not improve from 0.05087

Epoch 00007: val_loss did not improve from 0.05087

Epoch 00008: val_loss did not improve from 0.05087

Epoch 00009: val_loss did not improve from 0.05087

Epoch 00010: val_loss did not improve from 0.05087

Epoch 00001: val_loss improved from inf to 0.04901, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.04901 to 0.04204, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.04204

Epoch 00004: val_loss did not improve from 0.04204

Epoch 00005: val_loss did not improve from 0.04204

Epoch 00006: val_loss did not improve from 0.04204

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.21449, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.21449 to 0.19232, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.19232

Epoch 00004: val_loss did not improve from 0.19232

Epoch 00005: val_loss did not improve from 0.19232

Epoch 00006: val_loss did not improve from 0.19232

Epoch 00007: val_loss did not improve from 0.19232

Epoch 00008: val_loss did not improve from 0.19232

Epoch 00009: val_loss did not improve from 0.19232

Epoch 00010: val_loss did not improve from 0.19232





Epoch 00001: val_loss improved from inf to 0.06438, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06438

Epoch 00003: val_loss did not improve from 0.06438

Epoch 00004: val_loss did not improve from 0.06438

Epoch 00005: val_loss did not improve from 0.06438

Epoch 00006: val_loss did not improve from 0.06438

Epoch 00007: val_loss did not improve from 0.06438

Epoch 00008: val_loss did not improve from 0.06438

Epoch 00009: val_loss did not improve from 0.06438

Epoch 00010: val_loss did not improve from 0.06438





Epoch 00001: val_loss improved from inf to 0.13674, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.13674 to 0.11803, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.11803 to 0.10857, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.10857

Epoch 00005: val_loss did not improve from 0.10857

Epoch 00006: val_loss did not improve from 0.10857

Epoch 00007: val_loss did not improve from 0.10857

Epoch 00008: val_loss improved from 0.10857 to 0.10846, saving model to best-model-conll.hdfs

Epoch 00009: val_loss did not improve from 0.10846

Epoch 00010: val_loss did not improve from 0.10846





Epoch 00001: val_loss improved from inf to 0.10015, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10015

Epoch 00003: val_loss improved from 0.10015 to 0.09810, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.09810

Epoch 00005: val_loss improved from 0.09810 to 0.09708, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.09708

Epoch 00007: val_loss did not improve from 0.09708

Epoch 00008: val_loss did not improve from 0.09708

Epoch 00009: val_loss did not improve from 0.09708

Epoch 00010: val_loss did not improve from 0.09708





Epoch 00001: val_loss improved from inf to 0.06814, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06814

Epoch 00003: val_loss did not improve from 0.06814

Epoch 00004: val_loss did not improve from 0.06814

Epoch 00005: val_loss did not improve from 0.06814

Epoch 00006: val_loss did not improve from 0.06814

Epoch 00007: val_loss did not improve from 0.06814

Epoch 00008: val_loss did not improve from 0.06814

Epoch 00009: val_loss did not improve from 0.06814

Epoch 00010: val_loss did not improve from 0.06814





Epoch 00001: val_loss improved from inf to 0.04688, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04688

Epoch 00003: val_loss did not improve from 0.04688

Epoch 00004: val_loss did not improve from 0.04688

Epoch 00005: val_loss did not improve from 0.04688

Epoch 00006: val_loss did not improve from 0.04688

Epoch 00007: val_loss did not improve from 0.04688

Epoch 00008: val_loss did not improve from 0.04688

Epoch 00009: val_loss did not improve from 0.04688

Epoch 00010: val_loss did not improve from 0.04688





Epoch 00001: val_loss improved from inf to 0.05327, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05327

Epoch 00003: val_loss did not improve from 0.05327

Epoch 00004: val_loss did not improve from 0.05327

Epoch 00005: val_loss did not improve from 0.05327

Epoch 00006: val_loss did not improve from 0.05327

Epoch 00007: val_loss did not improve from 0.05327

Epoch 00008: val_loss did not improve from 0.05327

Epoch 00009: val_loss did not improve from 0.05327

Epoch 00010: val_loss did not improve from 0.05327

Epoch 00001: val_loss improved from inf to 0.04631, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.04631 to 0.04563, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.04563

Epoch 00004: val_loss improved from 0.04563 to 0.04126, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.04126

Epoch 00006: val_loss did not improve from 0.04126

Epo




Epoch 00001: val_loss improved from inf to 0.10682, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10682

Epoch 00003: val_loss did not improve from 0.10682

Epoch 00004: val_loss did not improve from 0.10682

Epoch 00005: val_loss did not improve from 0.10682

Epoch 00006: val_loss did not improve from 0.10682

Epoch 00007: val_loss did not improve from 0.10682

Epoch 00008: val_loss did not improve from 0.10682

Epoch 00009: val_loss did not improve from 0.10682

Epoch 00010: val_loss did not improve from 0.10682
AlgoCrossValIter - 7
Model: "sequential_27"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
o




Epoch 00001: val_loss improved from inf to 0.16014, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.16014

Epoch 00003: val_loss did not improve from 0.16014

Epoch 00004: val_loss did not improve from 0.16014

Epoch 00005: val_loss did not improve from 0.16014

Epoch 00006: val_loss did not improve from 0.16014

Epoch 00007: val_loss did not improve from 0.16014

Epoch 00008: val_loss did not improve from 0.16014

Epoch 00009: val_loss did not improve from 0.16014

Epoch 00010: val_loss did not improve from 0.16014





Epoch 00001: val_loss improved from inf to 0.10861, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10861 to 0.09394, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.09394 to 0.07865, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.07865

Epoch 00005: val_loss did not improve from 0.07865

Epoch 00006: val_loss did not improve from 0.07865

Epoch 00007: val_loss did not improve from 0.07865

Epoch 00008: val_loss did not improve from 0.07865

Epoch 00009: val_loss did not improve from 0.07865

Epoch 00010: val_loss did not improve from 0.07865





Epoch 00001: val_loss improved from inf to 0.09659, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09659 to 0.08915, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08915

Epoch 00004: val_loss did not improve from 0.08915

Epoch 00005: val_loss did not improve from 0.08915

Epoch 00006: val_loss did not improve from 0.08915

Epoch 00007: val_loss did not improve from 0.08915

Epoch 00008: val_loss did not improve from 0.08915

Epoch 00009: val_loss did not improve from 0.08915

Epoch 00010: val_loss did not improve from 0.08915

Epoch 00001: val_loss improved from inf to 0.08213, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08213

Epoch 00003: val_loss did not improve from 0.08213

Epoch 00004: val_loss did not improve from 0.08213

Epoch 00005: val_loss did not improve from 0.08213

Epoch 00006: val_loss did not improve from 0.08213

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.07985, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07985

Epoch 00003: val_loss did not improve from 0.07985

Epoch 00004: val_loss did not improve from 0.07985

Epoch 00005: val_loss did not improve from 0.07985

Epoch 00006: val_loss did not improve from 0.07985

Epoch 00007: val_loss did not improve from 0.07985

Epoch 00008: val_loss did not improve from 0.07985

Epoch 00009: val_loss did not improve from 0.07985

Epoch 00010: val_loss did not improve from 0.07985





Epoch 00001: val_loss improved from inf to 0.06258, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.06258 to 0.05396, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.05396

Epoch 00004: val_loss did not improve from 0.05396

Epoch 00005: val_loss did not improve from 0.05396

Epoch 00006: val_loss did not improve from 0.05396

Epoch 00007: val_loss did not improve from 0.05396

Epoch 00008: val_loss did not improve from 0.05396

Epoch 00009: val_loss did not improve from 0.05396

Epoch 00010: val_loss did not improve from 0.05396





Epoch 00001: val_loss improved from inf to 0.04759, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04759

Epoch 00003: val_loss did not improve from 0.04759

Epoch 00004: val_loss did not improve from 0.04759

Epoch 00005: val_loss did not improve from 0.04759

Epoch 00006: val_loss did not improve from 0.04759

Epoch 00007: val_loss did not improve from 0.04759

Epoch 00008: val_loss did not improve from 0.04759

Epoch 00009: val_loss did not improve from 0.04759

Epoch 00010: val_loss did not improve from 0.04759

Epoch 00001: val_loss improved from inf to 0.04325, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04325

Epoch 00003: val_loss did not improve from 0.04325

Epoch 00004: val_loss did not improve from 0.04325

Epoch 00005: val_loss did not improve from 0.04325

Epoch 00006: val_loss did not improve from 0.04325

Epoch 00007: val_loss did not improve from 0.04325

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.17764, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.17764

Epoch 00003: val_loss did not improve from 0.17764

Epoch 00004: val_loss did not improve from 0.17764

Epoch 00005: val_loss did not improve from 0.17764

Epoch 00006: val_loss did not improve from 0.17764

Epoch 00007: val_loss did not improve from 0.17764

Epoch 00008: val_loss did not improve from 0.17764

Epoch 00009: val_loss did not improve from 0.17764

Epoch 00010: val_loss did not improve from 0.17764





Epoch 00001: val_loss improved from inf to 0.05935, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05935

Epoch 00003: val_loss did not improve from 0.05935

Epoch 00004: val_loss did not improve from 0.05935

Epoch 00005: val_loss did not improve from 0.05935

Epoch 00006: val_loss did not improve from 0.05935

Epoch 00007: val_loss did not improve from 0.05935

Epoch 00008: val_loss did not improve from 0.05935

Epoch 00009: val_loss did not improve from 0.05935

Epoch 00010: val_loss did not improve from 0.05935





Epoch 00001: val_loss improved from inf to 0.10338, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10338

Epoch 00003: val_loss did not improve from 0.10338

Epoch 00004: val_loss did not improve from 0.10338

Epoch 00005: val_loss did not improve from 0.10338

Epoch 00006: val_loss did not improve from 0.10338

Epoch 00007: val_loss did not improve from 0.10338

Epoch 00008: val_loss did not improve from 0.10338

Epoch 00009: val_loss did not improve from 0.10338

Epoch 00010: val_loss did not improve from 0.10338





Epoch 00001: val_loss improved from inf to 0.09903, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09903 to 0.09514, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.09514

Epoch 00004: val_loss did not improve from 0.09514

Epoch 00005: val_loss did not improve from 0.09514

Epoch 00006: val_loss did not improve from 0.09514

Epoch 00007: val_loss did not improve from 0.09514

Epoch 00008: val_loss did not improve from 0.09514

Epoch 00009: val_loss did not improve from 0.09514

Epoch 00010: val_loss did not improve from 0.09514





Epoch 00001: val_loss improved from inf to 0.07904, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07904

Epoch 00003: val_loss did not improve from 0.07904

Epoch 00004: val_loss did not improve from 0.07904

Epoch 00005: val_loss did not improve from 0.07904

Epoch 00006: val_loss did not improve from 0.07904

Epoch 00007: val_loss did not improve from 0.07904

Epoch 00008: val_loss did not improve from 0.07904

Epoch 00009: val_loss did not improve from 0.07904

Epoch 00010: val_loss did not improve from 0.07904





Epoch 00001: val_loss improved from inf to 0.04823, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04823

Epoch 00003: val_loss did not improve from 0.04823

Epoch 00004: val_loss did not improve from 0.04823

Epoch 00005: val_loss did not improve from 0.04823

Epoch 00006: val_loss did not improve from 0.04823

Epoch 00007: val_loss did not improve from 0.04823

Epoch 00008: val_loss did not improve from 0.04823

Epoch 00009: val_loss did not improve from 0.04823

Epoch 00010: val_loss did not improve from 0.04823





Epoch 00001: val_loss improved from inf to 0.04680, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04680

Epoch 00003: val_loss did not improve from 0.04680

Epoch 00004: val_loss did not improve from 0.04680

Epoch 00005: val_loss did not improve from 0.04680

Epoch 00006: val_loss did not improve from 0.04680

Epoch 00007: val_loss did not improve from 0.04680

Epoch 00008: val_loss did not improve from 0.04680

Epoch 00009: val_loss did not improve from 0.04680

Epoch 00010: val_loss did not improve from 0.04680





Epoch 00001: val_loss improved from inf to 0.04607, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04607

Epoch 00003: val_loss did not improve from 0.04607

Epoch 00004: val_loss did not improve from 0.04607

Epoch 00005: val_loss did not improve from 0.04607

Epoch 00006: val_loss did not improve from 0.04607

Epoch 00007: val_loss did not improve from 0.04607

Epoch 00008: val_loss did not improve from 0.04607

Epoch 00009: val_loss did not improve from 0.04607

Epoch 00010: val_loss did not improve from 0.04607





Epoch 00001: val_loss improved from inf to 0.10843, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10843

Epoch 00003: val_loss did not improve from 0.10843

Epoch 00004: val_loss did not improve from 0.10843

Epoch 00005: val_loss did not improve from 0.10843

Epoch 00006: val_loss did not improve from 0.10843

Epoch 00007: val_loss did not improve from 0.10843

Epoch 00008: val_loss did not improve from 0.10843

Epoch 00009: val_loss did not improve from 0.10843

Epoch 00010: val_loss did not improve from 0.10843




AlgoCrossValIter - 9
Model: "sequential_29"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.47745, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.47745 to 0.44828, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.44828

Epoch 00004: val_loss improved from 0.44828 to 0.41002, saving model to best-model-conll.hdfs

Epoch 00005: val_loss 




Epoch 00001: val_loss improved from inf to 0.24164, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.24164 to 0.22360, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.22360 to 0.21767, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.21767

Epoch 00005: val_loss did not improve from 0.21767

Epoch 00006: val_loss did not improve from 0.21767

Epoch 00007: val_loss did not improve from 0.21767

Epoch 00008: val_loss did not improve from 0.21767

Epoch 00009: val_loss did not improve from 0.21767

Epoch 00010: val_loss did not improve from 0.21767





Epoch 00001: val_loss improved from inf to 0.07076, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07076 to 0.06599, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06599

Epoch 00004: val_loss did not improve from 0.06599

Epoch 00005: val_loss did not improve from 0.06599

Epoch 00006: val_loss did not improve from 0.06599

Epoch 00007: val_loss did not improve from 0.06599

Epoch 00008: val_loss did not improve from 0.06599

Epoch 00009: val_loss did not improve from 0.06599

Epoch 00010: val_loss did not improve from 0.06599





Epoch 00001: val_loss improved from inf to 0.08173, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08173

Epoch 00003: val_loss did not improve from 0.08173

Epoch 00004: val_loss did not improve from 0.08173

Epoch 00005: val_loss did not improve from 0.08173

Epoch 00006: val_loss did not improve from 0.08173

Epoch 00007: val_loss did not improve from 0.08173

Epoch 00008: val_loss did not improve from 0.08173

Epoch 00009: val_loss did not improve from 0.08173

Epoch 00010: val_loss did not improve from 0.08173

Epoch 00001: val_loss improved from inf to 0.08898, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08898

Epoch 00003: val_loss did not improve from 0.08898

Epoch 00004: val_loss did not improve from 0.08898

Epoch 00005: val_loss did not improve from 0.08898

Epoch 00006: val_loss did not improve from 0.08898

Epoch 00007: val_loss did not improve from 0.08898

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.07099, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07099

Epoch 00003: val_loss did not improve from 0.07099

Epoch 00004: val_loss did not improve from 0.07099

Epoch 00005: val_loss did not improve from 0.07099

Epoch 00006: val_loss did not improve from 0.07099

Epoch 00007: val_loss did not improve from 0.07099

Epoch 00008: val_loss did not improve from 0.07099

Epoch 00009: val_loss did not improve from 0.07099

Epoch 00010: val_loss did not improve from 0.07099





Epoch 00001: val_loss improved from inf to 0.06802, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.06802 to 0.05518, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.05518

Epoch 00004: val_loss did not improve from 0.05518

Epoch 00005: val_loss did not improve from 0.05518

Epoch 00006: val_loss did not improve from 0.05518

Epoch 00007: val_loss did not improve from 0.05518

Epoch 00008: val_loss did not improve from 0.05518

Epoch 00009: val_loss did not improve from 0.05518

Epoch 00010: val_loss did not improve from 0.05518





Epoch 00001: val_loss improved from inf to 0.04797, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04797

Epoch 00003: val_loss did not improve from 0.04797

Epoch 00004: val_loss did not improve from 0.04797

Epoch 00005: val_loss did not improve from 0.04797

Epoch 00006: val_loss did not improve from 0.04797

Epoch 00007: val_loss did not improve from 0.04797

Epoch 00008: val_loss did not improve from 0.04797

Epoch 00009: val_loss did not improve from 0.04797

Epoch 00010: val_loss did not improve from 0.04797

Epoch 00001: val_loss improved from inf to 0.03740, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03740

Epoch 00003: val_loss did not improve from 0.03740

Epoch 00004: val_loss did not improve from 0.03740

Epoch 00005: val_loss did not improve from 0.03740

Epoch 00006: val_loss did not improve from 0.03740

Epoch 00007: val_loss did not improve from 0.03740

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.09799, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09799

Epoch 00003: val_loss did not improve from 0.09799

Epoch 00004: val_loss did not improve from 0.09799

Epoch 00005: val_loss did not improve from 0.09799

Epoch 00006: val_loss did not improve from 0.09799

Epoch 00007: val_loss did not improve from 0.09799

Epoch 00008: val_loss did not improve from 0.09799

Epoch 00009: val_loss did not improve from 0.09799

Epoch 00010: val_loss did not improve from 0.09799




AlgoCrossValIter - 10
Model: "sequential_30"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.55352, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.55352 to 0.53244, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.53244 to 0.43062, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.43062 to 0.42248, saving model to b




Epoch 00001: val_loss improved from inf to 0.21609, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.21609 to 0.21300, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.21300

Epoch 00004: val_loss did not improve from 0.21300

Epoch 00005: val_loss did not improve from 0.21300

Epoch 00006: val_loss did not improve from 0.21300

Epoch 00007: val_loss did not improve from 0.21300

Epoch 00008: val_loss did not improve from 0.21300

Epoch 00009: val_loss did not improve from 0.21300

Epoch 00010: val_loss did not improve from 0.21300





Epoch 00001: val_loss improved from inf to 0.06880, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06880

Epoch 00003: val_loss did not improve from 0.06880

Epoch 00004: val_loss did not improve from 0.06880

Epoch 00005: val_loss did not improve from 0.06880

Epoch 00006: val_loss did not improve from 0.06880

Epoch 00007: val_loss did not improve from 0.06880

Epoch 00008: val_loss did not improve from 0.06880

Epoch 00009: val_loss did not improve from 0.06880

Epoch 00010: val_loss did not improve from 0.06880





Epoch 00001: val_loss improved from inf to 0.12734, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.12734 to 0.09604, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.09604 to 0.08953, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.08953

Epoch 00005: val_loss did not improve from 0.08953

Epoch 00006: val_loss did not improve from 0.08953

Epoch 00007: val_loss did not improve from 0.08953

Epoch 00008: val_loss did not improve from 0.08953

Epoch 00009: val_loss did not improve from 0.08953

Epoch 00010: val_loss did not improve from 0.08953

Epoch 00001: val_loss improved from inf to 0.07532, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07532

Epoch 00003: val_loss did not improve from 0.07532

Epoch 00004: val_loss did not improve from 0.07532

Epoch 00005: val_loss did not improve from 0.07532

Epoch 00006: val_loss did not improve from 0.07532

Epo




Epoch 00001: val_loss improved from inf to 0.07806, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07806

Epoch 00003: val_loss did not improve from 0.07806

Epoch 00004: val_loss did not improve from 0.07806

Epoch 00005: val_loss did not improve from 0.07806

Epoch 00006: val_loss did not improve from 0.07806

Epoch 00007: val_loss did not improve from 0.07806

Epoch 00008: val_loss did not improve from 0.07806

Epoch 00009: val_loss did not improve from 0.07806

Epoch 00010: val_loss did not improve from 0.07806





Epoch 00001: val_loss improved from inf to 0.05201, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05201

Epoch 00003: val_loss did not improve from 0.05201

Epoch 00004: val_loss did not improve from 0.05201

Epoch 00005: val_loss did not improve from 0.05201

Epoch 00006: val_loss did not improve from 0.05201

Epoch 00007: val_loss did not improve from 0.05201

Epoch 00008: val_loss did not improve from 0.05201

Epoch 00009: val_loss did not improve from 0.05201

Epoch 00010: val_loss did not improve from 0.05201





Epoch 00001: val_loss improved from inf to 0.05445, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05445

Epoch 00003: val_loss did not improve from 0.05445

Epoch 00004: val_loss did not improve from 0.05445

Epoch 00005: val_loss did not improve from 0.05445

Epoch 00006: val_loss did not improve from 0.05445

Epoch 00007: val_loss did not improve from 0.05445

Epoch 00008: val_loss did not improve from 0.05445

Epoch 00009: val_loss did not improve from 0.05445

Epoch 00010: val_loss did not improve from 0.05445





Epoch 00001: val_loss improved from inf to 0.03698, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03698

Epoch 00003: val_loss did not improve from 0.03698

Epoch 00004: val_loss did not improve from 0.03698

Epoch 00005: val_loss did not improve from 0.03698

Epoch 00006: val_loss did not improve from 0.03698

Epoch 00007: val_loss did not improve from 0.03698

Epoch 00008: val_loss did not improve from 0.03698

Epoch 00009: val_loss did not improve from 0.03698

Epoch 00010: val_loss did not improve from 0.03698





Epoch 00001: val_loss improved from inf to 0.10621, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10621

Epoch 00003: val_loss did not improve from 0.10621

Epoch 00004: val_loss did not improve from 0.10621

Epoch 00005: val_loss did not improve from 0.10621

Epoch 00006: val_loss did not improve from 0.10621

Epoch 00007: val_loss did not improve from 0.10621

Epoch 00008: val_loss did not improve from 0.10621

Epoch 00009: val_loss did not improve from 0.10621

Epoch 00010: val_loss did not improve from 0.10621




In [267]:
normalization_strategy

In [268]:
resultCrossVal.to_csv("results.csv")
resultCrossVal

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
P_val,89.06,86.577,92.092,84.141,86.104,86.521,87.817,86.726,85.135,87.637
P_train,89.089,85.269,89.372,85.863,85.934,86.667,87.058,87.736,87.955,87.583
P_ewo,79.738,73.715,82.125,74.442,78.039,75.209,78.959,79.006,78.318,77.78
R_val,79.481,82.418,76.039,83.187,80.364,79.63,77.665,79.379,80.418,80.418
R_train,81.697,86.08,82.44,86.501,85.776,83.587,81.059,83.125,83.062,83.916
R_ewo,65.186,72.5,64.722,71.668,69.074,67.036,66.295,67.315,67.316,67.87
F1-val,82.125,82.77,81.875,82.89,80.947,80.459,79.109,80.361,81.734,82.359
F1-train,84.924,85.28,85.345,85.939,85.595,84.349,82.806,85.069,85.048,85.325
F1-ewo,71.363,72.323,71.778,72.415,72.736,69.763,70.749,72.265,71.718,71.807


In [269]:
resultCrossVal.mean(axis=1).to_frame()

Unnamed: 0,0
P_val,87.181
P_train,87.2526
P_ewo,77.7331
R_val,79.8999
R_train,83.7243
R_ewo,67.8982
F1-val,81.4629
F1-train,84.968
F1-ewo,71.6917


In [270]:
resultCrossVal.std(axis=1).to_frame()

Unnamed: 0,0
P_val,2.207464
P_train,1.3616
P_ewo,2.586653
R_val,2.063414
R_train,1.861465
R_ewo,2.543144
F1-val,1.213755
F1-train,0.867873
F1-ewo,0.886879


In [271]:
# trainByTagResult.to_csv("results/train-by-tag.csv")
# trainByTagResult

In [272]:
# trainByTagResult.mean(axis=1).to_frame()

In [273]:
# trainByTagResult.std(axis=1).to_frame()

In [274]:
# testByTagResult.to_csv("results/test-by-tag.csv")
# testByTagResult

In [275]:
# testByTagResult.mean(axis=1).to_frame()

In [276]:
# testByTagResult.std(axis=1).to_frame()

In [277]:
# ewoByTagResult.to_csv("results/ewo-by-tag.csv")

In [278]:
# ewoByTagResult = pd.read_csv("results/ewo-by-tag.csv", index_col=0)
# ewoByTagResult

In [279]:
# ewoByTagResult.mean(axis=1).to_frame()

In [280]:
# ewoByTagResult.std(axis=1).to_frame()

In [281]:
# columns = en_fingerprints.columns

# print("Pred", "Real", "Freq", "Word", sep="\t")
# for c in columns:
#     prediction = model.predict(en_fingerprints[c].values.reshape((1, 210)))
#     pred_tag = int2tag[np.argmax(prediction)]
#     real_tag = en_corpus[en_corpus.word == c].iloc[0]['ne-tag']
    
#     if pred_tag != real_tag:
#         print(pred_tag, real_tag, en_fingerprints[c].max(), c, sep="\t")