In [1]:
# import
import keras
import sys
import numpy as np
import string
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense
from keras.utils import np_utils, plot_model
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn import model_selection
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, precision_recall_fscore_support
import h5py as h5py

Using TensorFlow backend.


In [2]:
# if we are doeing binary classification. That means say if a token is a named entity or not
BINARY = False

# number of epochs for training
epochs = 10 

# the english side of the corpus
en_corpus_file = "corpus-en.txt"

# the ewondo side of the corpus
ewo_corpus_file = "corpus-ewo.txt"

# name of the file to same the model 
best_model_file = "best-model-conll.hdfs"

# The maximal number of phrases to use
max_nb_of_phrases =  -1

# the maximal number of duplicates for each word in the corpus
duplication = 1

# wether we are using only the vocabulary, ro redundancy
is_only_vocab = True

# if word should be shuffle or not
shuffle = is_only_vocab

# normalization strategy
# log, max, mean_log, log_inv, max_inv or mean_log_inv, tf
# None: for no normalization => 1/tf = nbWC/nbOcc(w)
normalization_strategy = None

# if we are using the Zennaki et al. signature
is_zennaki = False

# the number of neurons in the first layer
h1_size = 640

# number of neurons in the second layer
h2_size = 160  

In [3]:
def getTag(aString):
    """
        convert a string to a tag
    """
    tag = "O"
    if BINARY:
        if aString != "O":
            return "NE"
    else:
        tag = aString
    return tag
     

In [4]:
def load_corpus(file, max_nb_of_phrases):
    """
    Load a corpus stored in a file
    Input:
        - file: the name of the file of the corpus
        - max_nb_of_phases: maximal number of phrases to load
    
    Return:
        - a DataFrame representing the corpus
        - the number of phrases in the corpus
    """
    nb_of_phrases = 0
    dataset = {"word": [], "ne-tag": []}
    with open(file) as f:
        prev_line = None
        for cpt, line in enumerate(f):
            if cpt == 0:
                continue
            if nb_of_phrases == max_nb_of_phrases:
                break;

            l = line.strip()
            if len(l) == 0 and len(prev_line) != 0:
                nb_of_phrases += 1
                dataset["word"].append(line)
                dataset["ne-tag"].append(None)
            else:
                l = l.split("\t")
                if l[0] not in string.punctuation:
                    dataset["word"].append(l[0])
                    dataset["ne-tag"].append(ne_type(l[1]))
            prev_line = line.strip()
        
    return pd.DataFrame(dataset), nb_of_phrases+1

In [5]:
def log_normalization(fingerprints):
    fingerprints[fingerprints > 0] = 1 / fingerprints[fingerprints > 0] # get tf = nbOcc(w)/nbWC
    fingerprints[fingerprints > 0] = 1 + np.log(fingerprints[fingerprints > 0])
    return fingerprints

In [6]:
def max_normalization(fingerprints):
    fingerprints[fingerprints > 0] = 1 / fingerprints[fingerprints > 0] # get tf = nbOcc(w)/nbWC
    maxis = fingerprints.max(axis = 1)
    fingerprints[fingerprints > 0] = fingerprints[fingerprints > 0].apply(lambda row: 0.5 + 0.5 * row / maxis[row.index])
    return fingerprints

In [7]:
def mean_log_normalization(fingerprints):
    fingerprints[fingerprints > 0] = 1 / fingerprints[fingerprints > 0] # get tf = nbOcc(w)/nbWC
    means = fingerprints.mean(axis=1)
    fingerprints[fingerprints > 0] = fingerprints[fingerprints > 0].apply(lambda row: (1 + np.log(row)) / 1 + np.log(means[row.index]))
    return fingerprints

In [8]:
def normalize(fingerprints):
    if normalization_strategy == "log":
        return log_normalization(fingerprints)
    elif normalization_strategy == "max":
        return max_normalization(fingerprints)
    elif normalization_strategy == "mean_log":
        return mean_log_normalization(fingerprints)
    elif normalization_strategy == "log_inv":
        fp = log_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "max_inv":
        fp = max_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "mean_log_inv":
        fp = mean_log_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "tf":
        fp = fingerprints
        fp[fp > 0] = 1 / fp[fp > 0]
        return fp
    else:
        return fingerprints

In [72]:
def corpus_fingerprint(aDataframe, nb_of_biphrases):
    """
    Create the distributionnal signature of each word in the corpus
    Input:
        -aDataFrame: the corpus DataFrame
        -nb_of_biphrases: number of phrases in the corpus
    Return:
        a DataFrame: corpus fingerprint, the columns are the words in the corpus
    """
    print("Normalization strategy:", normalization_strategy)
    tf = {}
    fingerprints = {}
    current_bi_phrase_index = 0
    nb_word_in_corpus = aDataframe[aDataframe.word != "\n"].word.size
    words_in_current_phrase = []
    for index, row in aDataframe.iterrows():
        if current_bi_phrase_index > nb_of_biphrases:
            break
            
        word = row['word']
        
        if word != "\n":
            words_in_current_phrase.append(word)
            if word not in fingerprints:
                fingerprints[word] = np.zeros(nb_of_biphrases, dtype=np.float32)
                tf[word] = 0
            tf[word] += 1
            fingerprints[word][current_bi_phrase_index] = 1
        else:
            nb_word_in_current_phrase = len(words_in_current_phrase)
            current_bi_phrase_index += 1
            words_in_current_phrase = []
        
    if not is_zennaki:
        for word in fingerprints:
            for i in range(nb_of_biphrases):
                if fingerprints[word][i] != 0:
                    fingerprints[word][i] = nb_word_in_corpus / tf[word]
    ret = pd.DataFrame(fingerprints)
        
    return normalize(ret)

In [10]:
def corpus2trainingdata(aDataframe, fingerprintsDataFrame):
    """
    Convert corpus to training data => numpy array
    
    Input:
        -aDataFrame: Corpus dataframe
        -fingerprintsDataFrame: distributionnal signature of words in the corpus
    Return:
        (X, y): X is the array of words (signature) in the corpus and y is the corresponding labels (NE tags)
    """
    X = np.zeros((aDataframe.shape[0], fingerprintsDataFrame.shape[0]), dtype=np.int8)
    y = np.zeros(aDataframe.shape[0], dtype=np.int8)
    i = 0
    for row in aDataframe.iterrows():
        X[i] = fingerprintsDataFrame[row[1]['word']].values
        y[i] = tag2int[getTag(row[1]['ne-tag'])]
        i += 1
    return X, y

In [11]:
# A utility function to convert NE tags
def ne_type(aType):
    aType = aType.lower()
    if 'per' in aType:
        t =  'NE' if BINARY else 'PER' 
    elif 'loc' in aType:
        t =  'NE' if BINARY else 'LOC'
    elif 'org' in aType:
        t =  'NE' if BINARY else 'ORG'
    elif 'hour' in aType:
        t =  'NE' if BINARY else 'MISC'
    elif aType != 'o' and len(aType) > 0 :
        t =  'NE' if BINARY else 'MISC'
    else:
        t = 'O'
    return t

In [12]:
def P_R_F1(y_pred, y_true, neg_class):
    same = y_pred[y_true==y_pred]
    tp = same[same != neg_class].size
    nb_of_pos_exple = y_true[y_true != neg_class].size
    nb_of_pos_pred = y_pred[y_pred != neg_class].size
    p = r = f1 = 0
    try:
        p = np.round(tp*100/nb_of_pos_pred, 2)
    except ZeroDivisionError:
        print("number of correct positive predictions is 0")
        
    try:
        r = np.round(tp*100/nb_of_pos_exple, 2)
    except ZeroDivisionError:
        print("number of position exple is 0")
        
    try:
        f1 = np.round(2*r*p/(r+p), 2)
    except ZeroDivisionError:
        print("Recall and precision are 0")

    return p, r, f1

In [13]:
def shuffle_data(X, y):
    indices = [i for i in  range(X.shape[0])]
    np.random.shuffle(indices)
    return X[indices], y[indices]

In [14]:
def create_model(input_dim, output_dim):
    model = Sequential()
    model.add(Dense(h1_size, input_dim=input_dim, activation='sigmoid', name="hidden1"))
    model.add(Dense(h2_size, activation='sigmoid', name="hidden2"))
    if BINARY:
        model.add(Dense(1, activation='sigmoid', name="outputlayer"))
        model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['binary_accuracy'])
    else:
        model.add(Dense(output_dim, activation='softmax', name="outputlayer"))
        model.compile(loss='categorical_crossentropy', optimizer="rmsprop", metrics=['accuracy'])
    model.summary()
    return model

In [15]:
def train_model(model, X_train, y_train, X_val, y_val, epochs=epochs):
    # stop learning if the error is the same between two consecutive epochs
    early_stop = EarlyStopping(patience=20, verbose=2)
    
    # saving best model
    best_model_cp = ModelCheckpoint(best_model_file, save_best_only=True, verbose=1)
    
    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, verbose=0, shuffle=shuffle, callbacks=[best_model_cp, early_stop])
    
    #loading and returning the best model
    return keras.models.load_model(best_model_file)

In [16]:
def predict(model, X, y, binary=BINARY):
    if BINARY:
        y_pred = np.round(model.predict(X))
        y_true = y
    else:
        predictions = model.predict(X)
        y_pred = np.array([np.argmax(p) for p in predictions])
        y_true = np.array([np.argmax(t) for t in y ])
    return y_true, y_pred

In [17]:
def model_performance(y_true, y_pred):
    return P_R_F1(y_pred, y_true, tag2int['O']) #precision, recall, f1-score

In [18]:
def model_performace_by_tag(y_true, y_pred, tag):
    p, r, f1 = 0, 0, 0
    
    eq = y_pred[y_pred==y_true]
    correctly_pred = eq[eq==tag].size
    try:
        p = np.round(100 * correctly_pred / y_pred[y_pred==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        r = np.round(100 * correctly_pred / y_true[y_true==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        f1 = np.round(2 * r * p / (r + p), 2)
    except ZeroDivisionError:
        pass
    
    return p, r, f1

In [19]:
def algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, epochs=epochs, model=None):
    """
    Train a model on (X, y) and validate on (X_val, y_val) then project on (X_ewo)
    """
    test_precision, train_precision, ewo_precision = [], [], []
    test_recall, train_recall, ewo_recall = [], [], []
    test_fscore, train_fscore, ewo_fscore = [], [], []
    
    test_result_by_tag = {}
    train_result_by_tag = {}
    ewo_result_by_tag = {}
    for t in tagSet:
        f1_key = "F1-"+t
        p_key = "P-"+t
        r_key = "R-"+t
        train_result_by_tag[f1_key], train_result_by_tag[p_key], train_result_by_tag[r_key] = [], [], []
        test_result_by_tag[f1_key], test_result_by_tag[p_key], test_result_by_tag[r_key] = [], [], []
        ewo_result_by_tag[f1_key], ewo_result_by_tag[p_key], ewo_result_by_tag[r_key] = [], [], []

    m = train_model(model, X_train, y_train, X_val, y_val, epochs=epochs)
        
    y_true, y_pred = predict(m, X_train, y_train)
    p_train, r_train, f1_train = model_performance(y_true, y_pred)
        
    y_true_val, y_pred_val = predict(m, X_val, y_val)
    p_val, r_val, f1_val = model_performance(y_true_val, y_pred_val)
        
    y_true_ewo, y_pred_ewo = predict(m, X_ewo, y_ewo) 
    p_ewo, r_ewo, f1_ewo = model_performance(y_true_ewo, y_pred_ewo)
        
    for t in range(len(int2tag)):
        f1_key = "F1-" + int2tag[t]
        p_key = "P-" + int2tag[t]
        r_key = "R-" + int2tag[t]
            
        p, r, f1 = model_performace_by_tag(y_true, y_pred, t)
        train_result_by_tag[p_key].append(p)
        train_result_by_tag[r_key].append(r)
        train_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_val, y_pred_val, t)
        test_result_by_tag[p_key].append(p)
        test_result_by_tag[r_key].append(r)
        test_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_ewo, y_pred_ewo, t)
        ewo_result_by_tag[p_key].append(p)
        ewo_result_by_tag[r_key].append(r)
        ewo_result_by_tag[f1_key].append(f1)
                
    test_precision.append(p_val)
    train_precision.append(p_train)
    ewo_precision.append(p_ewo)
        
    test_recall.append(r_val)
    train_recall.append(r_train)
    ewo_recall.append(r_ewo)
        
    test_fscore.append(f1_val)
    train_fscore.append(f1_train)
    ewo_fscore.append(f1_ewo)
    return pd.DataFrame({
        'P_val': test_precision, 
        'P_train': train_precision, 
        'P_ewo': ewo_precision, 'R_val': test_recall, 'R_train': train_recall, 
        'R_ewo': ewo_recall, 'F1-val': test_fscore, 'F1-train': train_fscore, 'F1-ewo': ewo_fscore}), pd.DataFrame(train_result_by_tag), pd.DataFrame(test_result_by_tag), pd.DataFrame(ewo_result_by_tag)

In [20]:
def algoCrossVal(X, y, X_ewo, y_ewo, k = 10, repeat=1): 
    """
    Traing a model with k-fold cross validation
    We train the model `repeat` times to check it's stability
    """
    block_size = int(X.shape[0] / k)   
    output = None
    model = None
    train_by_tags, test_by_tags, ewo_by_tags = None, None, None
    for it in range(repeat):
        print("AlgoCrossValIter -", it+1)
        model = create_model(X.shape[1], len(tagSet))
        results = None
        train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = None, None, None
        for i in range(k):
            X_val, y_val = X[i*block_size:i*block_size+block_size], y[i*block_size:i*block_size+block_size]
            X_train = np.concatenate((X[0:i*block_size], X[i*block_size+block_size:]))
            y_train = np.concatenate((y[0:i*block_size], y[i*block_size+block_size:]))

            X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
            X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])

            result, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)
            if results is None:
                results = result.copy()
                train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = train_by_tag.copy(), test_by_tag.copy(), ewo_by_tag.copy()
            else:
                results = pd.concat([results, result], ignore_index=True)
                train_by_tagsTmp = pd.concat([train_by_tagsTmp, train_by_tag], ignore_index=True)
                test_by_tagsTmp = pd.concat([test_by_tagsTmp, test_by_tag], ignore_index=True)
                ewo_by_tagsTmp = pd.concat([ewo_by_tagsTmp, ewo_by_tag], ignore_index=True)
        
        if output is None:
            output = results.mean(axis=0).to_frame()
            train_by_tags = train_by_tagsTmp.mean(axis=0).to_frame()
            test_by_tags = test_by_tagsTmp.mean(axis=0).to_frame()
            ewo_by_tags = ewo_by_tagsTmp.mean(axis=0).to_frame()
        else:
            output = pd.concat([output, results.mean(axis=0).to_frame()], axis=1)
            train_by_tags = pd.concat([train_by_tags, train_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            test_by_tags = pd.concat([test_by_tags, test_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            ewo_by_tags = pd.concat([ewo_by_tags, ewo_by_tagsTmp.mean(axis=0).to_frame()], axis=1)

    return output, train_by_tags, test_by_tags, ewo_by_tags, model

In [21]:
en_corpus, en_nb_of_phrases = load_corpus(en_corpus_file, max_nb_of_phrases)

In [22]:
nb_word_in_corpus = en_corpus[en_corpus.word != "\n"].word.size
print("Nb word in corpus", nb_word_in_corpus)

Nb word in corpus 4170


In [23]:
en_corpus.head()
en_corpus.loc[en_corpus['ne-tag'] == 'ORG']

Unnamed: 0,word,ne-tag
1335,Sadducees,ORG


In [24]:
tagSet = en_corpus["ne-tag"].dropna().unique()
if BINARY:
    tagSet = ['NE', 'O']
tag2int = {j: i for i, j in enumerate(tagSet)}
int2tag = {i: j for i, j in enumerate(tagSet)}
print(tag2int)

{'O': 0, 'MISC': 1, 'PER': 2, 'LOC': 3, 'ORG': 4}


In [25]:
en_nb_of_phrases

210

In [26]:
en_corpus.describe()

Unnamed: 0,word,ne-tag
count,4379,4170
unique,904,5
top,the,O
freq,313,3779


In [27]:
en_corpus.head(10)

Unnamed: 0,word,ne-tag
0,The,O
1,Promise,O
2,of,O
3,the,O
4,Holy,MISC
5,Spirit,MISC
6,\n,
7,In,O
8,the,O
9,first,O


In [28]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(en_corpus[en_corpus['ne-tag']==tag].shape[0] * 100 / en_corpus[en_corpus['ne-tag']!='\n'].shape[0], 2)))

O % = 86.3 %
MISC % = 2.4 %
PER % = 5.59 %
LOC % = 0.91 %
ORG % = 0.02 %


In [29]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(en_corpus[en_corpus['ne-tag']==tag].word.unique().shape[0] * 100 / en_corpus[en_corpus['ne-tag']!='\n'].word.unique().shape[0], 2)))

O % = 89.16 %
MISC % = 1.88 %
PER % = 8.96 %
LOC % = 1.99 %
ORG % = 0.11 %


In [30]:
en_corpus[en_corpus.word == "\n"].shape

(209, 2)

In [31]:
print("Nb of bi-phrases", en_nb_of_phrases)

Nb of bi-phrases 210


In [73]:
en_fingerprints = corpus_fingerprint(en_corpus, en_nb_of_phrases)

Normalization strategy: None


In [75]:
en_fingerprints.head(10)

Unnamed: 0,The,Promise,of,the,Holy,Spirit,In,first,book,O,...,considered,dream,She,save,fulfill,Immanuel,us),woke,sleep,knew
0,379.090912,4170.0,22.180851,13.322683,278.0,297.857147,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,13.322683,0.0,0.0,695.0,2085.0,2085.0,2085.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,13.322683,278.0,297.857147,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,22.180851,13.322683,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,22.180851,13.322683,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,13.322683,278.0,297.857147,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,379.090912,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,13.322683,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,13.322683,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,22.180851,13.322683,278.0,297.857147,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [76]:
(4170 / nb_word_in_corpus)

1.0

In [77]:
en_corpus.shape

(4379, 2)

In [78]:
en_fingerprints['you'].values.shape

(210,)

In [79]:
en_corpus[en_corpus.word != "\n"].shape

(4170, 2)

In [80]:
if is_only_vocab:
    text = list(en_corpus[en_corpus.word != "\n"].word.unique())
else:
    text = list(en_corpus[en_corpus.word != "\n"].word)
en_vocab = pd.DataFrame({'text': text})
en_vocab.describe()

Unnamed: 0,text
count,903
unique,903
top,annoyed
freq,1


In [81]:
if is_only_vocab:
    X = np.zeros((en_vocab.shape[0] * duplication, en_nb_of_phrases))
    target = np.zeros((en_vocab.shape[0] * duplication))
    p=0
    for i, row in en_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X[p] = en_fingerprints[c.split(" ")[0]]
            target[p] = tag2int[getTag(en_corpus[en_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X, target = shuffle_data(X, target)
    print(X.shape, en_fingerprints.shape, target.shape)

(903, 210) (210, 903) (903,)


In [82]:
en_vocab[-20:]

Unnamed: 0,text
883,Eliud
884,Eleazar
885,Matthan
886,husband
887,fourteen
888,unwilling
889,shame
890,resolved
891,divorce
892,quietly


In [83]:
if not is_only_vocab:
    X, target = corpus2trainingdata(en_corpus[en_corpus.word != "\n"], en_fingerprints)

In [84]:
if shuffle:
    X, target = shuffle_data(X, target)

In [85]:
y = target.copy()
y[0:100]
if not BINARY:
    y = np_utils.to_categorical(y, len(tagSet))
y.shape

(903, 5)

In [86]:
from sklearn.decomposition import PCA

def visualize(X, y):
    pca = PCA(n_components=2)
    X_embeded = pca.fit_transform(X)
    plt.figure(figsize=(5, 5))
    plt.scatter(X_embeded[:, 0], X_embeded[:, 1], c=y)
    plt.legend()
    plt.show()

In [87]:
# visualize(X, target)

In [88]:
X_train, X_val, y_train, y_val = model_selection.train_test_split(X, y, test_size=0.33)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])
print("X_train.shape =", X_train.shape)
print("y_train.shape =", y_train.shape)
print("X_val.shape =", X_val.shape)
print("y_val.shape =", y_val.shape)

tTarget = np.array([np.argmax(yy) for yy in y_train])
vTarget = np.array([np.argmax(yy) for yy in y_val])

for tag in tagSet:
    print("{0} % in training data = {1} %".format(tag, np.round(tTarget[tTarget==tag2int[tag]].size * 100 / tTarget.shape[0], 2)))
    print("{0} % in validation data = {1} %".format(tag, np.round(vTarget[vTarget==tag2int[tag]].size * 100 / vTarget.shape[0], 2)))

X_train.shape = (605, 210)
y_train.shape = (605, 5)
X_val.shape = (298, 210)
y_val.shape = (298, 5)
O % in training data = 87.77 %
O % in validation data = 89.26 %
MISC % in training data = 0.99 %
MISC % in validation data = 1.68 %
PER % in training data = 8.76 %
PER % in validation data = 8.05 %
LOC % in training data = 2.31 %
LOC % in validation data = 1.01 %
ORG % in training data = 0.17 %
ORG % in validation data = 0.0 %


In [89]:
ewo_corpus, ewo_nb_of_phrases = load_corpus(ewo_corpus_file, max_nb_of_phrases)

In [90]:
ewo_corpus.loc[ewo_corpus['ne-tag'] == 'PER']

Unnamed: 0,word,ne-tag
6,Teofil,PER
15,Yesus,PER
86,Yohannes,PER
104,Yesus,PER
230,Yesus,PER
...,...,...
3676,Maria,PER
3697,Yesus,PER
3740,Emmanuel,PER
3750,Yosef,PER


In [91]:
ewo_nb_of_phrases

210

In [92]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(ewo_corpus[ewo_corpus['ne-tag']==tag].shape[0] * 100 / ewo_corpus[ewo_corpus['ne-tag']!='\n'].shape[0], 2)))

O % = 84.15 %
MISC % = 2.54 %
PER % = 6.69 %
LOC % = 1.03 %
ORG % = 0.05 %


In [93]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(ewo_corpus[ewo_corpus['ne-tag']==tag].word.unique().shape[0] * 100 / ewo_corpus[ewo_corpus['ne-tag']!='\n'].word.unique().shape[0], 2)))

O % = 89.94 %
MISC % = 1.17 %
PER % = 8.3 %
LOC % = 1.86 %
ORG % = 0.2 %


In [94]:
ewo_corpus.describe()

Unnamed: 0,word,ne-tag
count,3779,3570
unique,1024,5
top,\n,O
freq,209,3180


In [95]:
ewo_corpus.head()

Unnamed: 0,word,ne-tag
0,Mfufub,MISC
1,Nsisim,MISC
2,ayi,O
3,sò,O
4,\n,


In [96]:
ewo_fingerprints = corpus_fingerprint(ewo_corpus, en_nb_of_phrases)

Normalization strategy: None


In [97]:
if is_only_vocab:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word.unique())
else:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word)
ewo_vocab = pd.DataFrame({"text":text})

In [98]:
if is_only_vocab:
    X_ewo = np.zeros((ewo_vocab.shape[0] * duplication, en_nb_of_phrases))
    ewo_target = np.zeros((ewo_vocab.shape[0] * duplication))
    p=0
    for i, row in ewo_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X_ewo[p] = ewo_fingerprints[c.split(" ")[0]]
            ewo_target[p] = tag2int[getTag(ewo_corpus[ewo_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [99]:
ewo_vocab[-10:]

Unnamed: 0,text
1013,sik
1014,Ntud
1015,bëyole
1016,Emmanuel
1017,Avëbë
1018,angavëbë
1019,oyò
1020,angabende
1021,anganòṅ
1022,angayole


In [100]:
if not is_only_vocab:
    X_ewo, ewo_target = corpus2trainingdata(ewo_corpus[ewo_corpus.word != "\n"], ewo_fingerprints)

In [101]:
if shuffle:
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [102]:
y_ewo = ewo_target.copy()
print(y_ewo.shape, len(ewo_vocab))

(1023,) 1023


In [103]:
X_ewo.shape

(1023, 210)

In [104]:
y_ewo = ewo_target.copy()
y_ewo[:20]
if not BINARY:
    y_ewo = np_utils.to_categorical(y_ewo)

In [105]:
X_ewo = X_ewo.reshape((X_ewo.shape[0], en_nb_of_phrases))

In [106]:
# model = create_model(X.shape[1], len(tagSet))
# resultEval, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)

In [107]:
# resultEval

In [108]:
# train_by_tag

In [109]:
# test_by_tag

In [110]:
# ewo_by_tag

In [111]:
# resultEval.mean()

In [112]:
# resultEval.std()

In [113]:
resultCrossVal, trainByTagResult, testByTagResult, ewoByTagResult, model = algoCrossVal(X, y, X_ewo, y_ewo, repeat=10)

AlgoCrossValIter - 1
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.48399, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.48399

Epoch 00003: val_loss did not improve from 0.48399

Epoch 00004: val_loss did not improve from 0.48399

Epoch 00005: val_loss did not improve from 0.48399

Epoch 00006: val_loss did not improve from 0.48399

Epoch




Epoch 00002: val_loss did not improve from 0.03967

Epoch 00003: val_loss did not improve from 0.03967

Epoch 00004: val_loss did not improve from 0.03967

Epoch 00005: val_loss did not improve from 0.03967

Epoch 00006: val_loss did not improve from 0.03967

Epoch 00007: val_loss did not improve from 0.03967

Epoch 00008: val_loss did not improve from 0.03967

Epoch 00009: val_loss did not improve from 0.03967

Epoch 00010: val_loss did not improve from 0.03967





Epoch 00001: val_loss improved from inf to 0.06763, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06763

Epoch 00003: val_loss did not improve from 0.06763

Epoch 00004: val_loss did not improve from 0.06763

Epoch 00005: val_loss did not improve from 0.06763

Epoch 00006: val_loss did not improve from 0.06763

Epoch 00007: val_loss did not improve from 0.06763

Epoch 00008: val_loss did not improve from 0.06763

Epoch 00009: val_loss did not improve from 0.06763

Epoch 00010: val_loss did not improve from 0.06763

Epoch 00001: val_loss improved from inf to 0.07362, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07362

Epoch 00003: val_loss did not improve from 0.07362

Epoch 00004: val_loss did not improve from 0.07362

Epoch 00005: val_loss did not improve from 0.07362

Epoch 00006: val_loss did not improve from 0.07362

Epoch 00007: val_loss did not improve from 0.07362

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.10068, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10068

Epoch 00003: val_loss did not improve from 0.10068

Epoch 00004: val_loss did not improve from 0.10068

Epoch 00005: val_loss did not improve from 0.10068

Epoch 00006: val_loss did not improve from 0.10068

Epoch 00007: val_loss did not improve from 0.10068

Epoch 00008: val_loss did not improve from 0.10068

Epoch 00009: val_loss did not improve from 0.10068

Epoch 00010: val_loss did not improve from 0.10068





Epoch 00001: val_loss improved from inf to 0.08038, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.08038 to 0.07938, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.07938 to 0.06440, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.06440

Epoch 00005: val_loss did not improve from 0.06440

Epoch 00006: val_loss did not improve from 0.06440

Epoch 00007: val_loss did not improve from 0.06440

Epoch 00008: val_loss did not improve from 0.06440

Epoch 00009: val_loss did not improve from 0.06440

Epoch 00010: val_loss did not improve from 0.06440





Epoch 00001: val_loss improved from inf to 0.06610, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06610

Epoch 00003: val_loss did not improve from 0.06610

Epoch 00004: val_loss did not improve from 0.06610

Epoch 00005: val_loss did not improve from 0.06610

Epoch 00006: val_loss did not improve from 0.06610

Epoch 00007: val_loss did not improve from 0.06610

Epoch 00008: val_loss did not improve from 0.06610

Epoch 00009: val_loss did not improve from 0.06610

Epoch 00010: val_loss did not improve from 0.06610





Epoch 00001: val_loss improved from inf to 0.04862, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04862

Epoch 00003: val_loss did not improve from 0.04862

Epoch 00004: val_loss did not improve from 0.04862

Epoch 00005: val_loss did not improve from 0.04862

Epoch 00006: val_loss did not improve from 0.04862

Epoch 00007: val_loss did not improve from 0.04862

Epoch 00008: val_loss did not improve from 0.04862

Epoch 00009: val_loss did not improve from 0.04862

Epoch 00010: val_loss did not improve from 0.04862

Epoch 00001: val_loss improved from inf to 0.07740, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07740 to 0.06678, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06678

Epoch 00004: val_loss did not improve from 0.06678

Epoch 00005: val_loss did not improve from 0.06678

Epoch 00006: val_loss did not improve from 0.06678

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.06871, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06871

Epoch 00003: val_loss did not improve from 0.06871

Epoch 00004: val_loss did not improve from 0.06871

Epoch 00005: val_loss did not improve from 0.06871

Epoch 00006: val_loss did not improve from 0.06871

Epoch 00007: val_loss did not improve from 0.06871

Epoch 00008: val_loss did not improve from 0.06871

Epoch 00009: val_loss did not improve from 0.06871

Epoch 00010: val_loss did not improve from 0.06871




AlgoCrossValIter - 2
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.49141, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.49141 to 0.48287, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.48287

Epoch 00004: val_loss did not improve from 0.48287

Epoch 00005: val_loss did not improve from 0.48287

Epoch 00006: v




Epoch 00001: val_loss improved from inf to 0.04278, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.04278 to 0.03717, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.03717

Epoch 00004: val_loss did not improve from 0.03717

Epoch 00005: val_loss did not improve from 0.03717

Epoch 00006: val_loss did not improve from 0.03717

Epoch 00007: val_loss did not improve from 0.03717

Epoch 00008: val_loss did not improve from 0.03717

Epoch 00009: val_loss did not improve from 0.03717

Epoch 00010: val_loss did not improve from 0.03717





Epoch 00001: val_loss improved from inf to 0.07052, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07052 to 0.06914, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06914

Epoch 00004: val_loss did not improve from 0.06914

Epoch 00005: val_loss did not improve from 0.06914

Epoch 00006: val_loss did not improve from 0.06914

Epoch 00007: val_loss did not improve from 0.06914

Epoch 00008: val_loss did not improve from 0.06914

Epoch 00009: val_loss did not improve from 0.06914

Epoch 00010: val_loss did not improve from 0.06914





Epoch 00001: val_loss improved from inf to 0.06902, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06902

Epoch 00003: val_loss did not improve from 0.06902

Epoch 00004: val_loss did not improve from 0.06902

Epoch 00005: val_loss did not improve from 0.06902

Epoch 00006: val_loss did not improve from 0.06902

Epoch 00007: val_loss did not improve from 0.06902

Epoch 00008: val_loss did not improve from 0.06902

Epoch 00009: val_loss did not improve from 0.06902

Epoch 00010: val_loss did not improve from 0.06902





Epoch 00001: val_loss improved from inf to 0.10495, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10495

Epoch 00003: val_loss did not improve from 0.10495

Epoch 00004: val_loss did not improve from 0.10495

Epoch 00005: val_loss did not improve from 0.10495

Epoch 00006: val_loss did not improve from 0.10495

Epoch 00007: val_loss did not improve from 0.10495

Epoch 00008: val_loss did not improve from 0.10495

Epoch 00009: val_loss did not improve from 0.10495

Epoch 00010: val_loss did not improve from 0.10495





Epoch 00001: val_loss improved from inf to 0.06034, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06034

Epoch 00003: val_loss did not improve from 0.06034

Epoch 00004: val_loss did not improve from 0.06034

Epoch 00005: val_loss did not improve from 0.06034

Epoch 00006: val_loss did not improve from 0.06034

Epoch 00007: val_loss did not improve from 0.06034

Epoch 00008: val_loss did not improve from 0.06034

Epoch 00009: val_loss did not improve from 0.06034

Epoch 00010: val_loss did not improve from 0.06034





Epoch 00001: val_loss improved from inf to 0.06468, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06468

Epoch 00003: val_loss did not improve from 0.06468

Epoch 00004: val_loss did not improve from 0.06468

Epoch 00005: val_loss did not improve from 0.06468

Epoch 00006: val_loss did not improve from 0.06468

Epoch 00007: val_loss did not improve from 0.06468

Epoch 00008: val_loss did not improve from 0.06468

Epoch 00009: val_loss did not improve from 0.06468

Epoch 00010: val_loss did not improve from 0.06468





Epoch 00001: val_loss improved from inf to 0.05208, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05208

Epoch 00003: val_loss did not improve from 0.05208

Epoch 00004: val_loss did not improve from 0.05208

Epoch 00005: val_loss did not improve from 0.05208

Epoch 00006: val_loss did not improve from 0.05208

Epoch 00007: val_loss did not improve from 0.05208

Epoch 00008: val_loss did not improve from 0.05208

Epoch 00009: val_loss did not improve from 0.05208

Epoch 00010: val_loss did not improve from 0.05208

Epoch 00001: val_loss improved from inf to 0.05497, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05497

Epoch 00003: val_loss did not improve from 0.05497

Epoch 00004: val_loss did not improve from 0.05497

Epoch 00005: val_loss did not improve from 0.05497

Epoch 00006: val_loss did not improve from 0.05497

Epoch 00007: val_loss did not improve from 0.05497

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.07366, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07366

Epoch 00003: val_loss did not improve from 0.07366

Epoch 00004: val_loss did not improve from 0.07366

Epoch 00005: val_loss did not improve from 0.07366

Epoch 00006: val_loss did not improve from 0.07366

Epoch 00007: val_loss did not improve from 0.07366

Epoch 00008: val_loss did not improve from 0.07366

Epoch 00009: val_loss did not improve from 0.07366

Epoch 00010: val_loss did not improve from 0.07366




AlgoCrossValIter - 3
Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.47694, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.47694

Epoch 00003: val_loss did not improve from 0.47694

Epoch 00004: val_loss did not improve from 0.47694

Epoch 00005: val_loss did not improve from 0.47694

Epoch 00006: val_loss did not improve from 0.47694

Epoch




Epoch 00002: val_loss did not improve from 0.04623

Epoch 00003: val_loss did not improve from 0.04623

Epoch 00004: val_loss did not improve from 0.04623

Epoch 00005: val_loss did not improve from 0.04623

Epoch 00006: val_loss did not improve from 0.04623

Epoch 00007: val_loss did not improve from 0.04623

Epoch 00008: val_loss did not improve from 0.04623

Epoch 00009: val_loss did not improve from 0.04623

Epoch 00010: val_loss did not improve from 0.04623





Epoch 00001: val_loss improved from inf to 0.06199, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06199

Epoch 00003: val_loss did not improve from 0.06199

Epoch 00004: val_loss did not improve from 0.06199

Epoch 00005: val_loss did not improve from 0.06199

Epoch 00006: val_loss did not improve from 0.06199

Epoch 00007: val_loss did not improve from 0.06199

Epoch 00008: val_loss did not improve from 0.06199

Epoch 00009: val_loss did not improve from 0.06199

Epoch 00010: val_loss did not improve from 0.06199





Epoch 00001: val_loss improved from inf to 0.07027, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07027

Epoch 00003: val_loss did not improve from 0.07027

Epoch 00004: val_loss did not improve from 0.07027

Epoch 00005: val_loss did not improve from 0.07027

Epoch 00006: val_loss did not improve from 0.07027

Epoch 00007: val_loss did not improve from 0.07027

Epoch 00008: val_loss did not improve from 0.07027

Epoch 00009: val_loss did not improve from 0.07027

Epoch 00010: val_loss did not improve from 0.07027





Epoch 00001: val_loss improved from inf to 0.09029, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09029 to 0.08781, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08781

Epoch 00004: val_loss did not improve from 0.08781

Epoch 00005: val_loss did not improve from 0.08781

Epoch 00006: val_loss did not improve from 0.08781

Epoch 00007: val_loss did not improve from 0.08781

Epoch 00008: val_loss did not improve from 0.08781

Epoch 00009: val_loss did not improve from 0.08781

Epoch 00010: val_loss did not improve from 0.08781





Epoch 00001: val_loss improved from inf to 0.08703, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.08703 to 0.07383, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.07383 to 0.04921, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.04921

Epoch 00005: val_loss did not improve from 0.04921

Epoch 00006: val_loss did not improve from 0.04921

Epoch 00007: val_loss did not improve from 0.04921

Epoch 00008: val_loss did not improve from 0.04921

Epoch 00009: val_loss did not improve from 0.04921

Epoch 00010: val_loss did not improve from 0.04921





Epoch 00001: val_loss improved from inf to 0.06089, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06089

Epoch 00003: val_loss did not improve from 0.06089

Epoch 00004: val_loss did not improve from 0.06089

Epoch 00005: val_loss did not improve from 0.06089

Epoch 00006: val_loss did not improve from 0.06089

Epoch 00007: val_loss did not improve from 0.06089

Epoch 00008: val_loss did not improve from 0.06089

Epoch 00009: val_loss did not improve from 0.06089

Epoch 00010: val_loss did not improve from 0.06089





Epoch 00001: val_loss improved from inf to 0.05014, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05014

Epoch 00003: val_loss did not improve from 0.05014

Epoch 00004: val_loss did not improve from 0.05014

Epoch 00005: val_loss did not improve from 0.05014

Epoch 00006: val_loss did not improve from 0.05014

Epoch 00007: val_loss did not improve from 0.05014

Epoch 00008: val_loss did not improve from 0.05014

Epoch 00009: val_loss did not improve from 0.05014

Epoch 00010: val_loss did not improve from 0.05014

Epoch 00001: val_loss improved from inf to 0.04776, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04776

Epoch 00003: val_loss did not improve from 0.04776

Epoch 00004: val_loss did not improve from 0.04776

Epoch 00005: val_loss did not improve from 0.04776

Epoch 00006: val_loss did not improve from 0.04776

Epoch 00007: val_loss did not improve from 0.04776

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.07694, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07694

Epoch 00003: val_loss did not improve from 0.07694

Epoch 00004: val_loss did not improve from 0.07694

Epoch 00005: val_loss did not improve from 0.07694

Epoch 00006: val_loss did not improve from 0.07694

Epoch 00007: val_loss did not improve from 0.07694

Epoch 00008: val_loss did not improve from 0.07694

Epoch 00009: val_loss did not improve from 0.07694

Epoch 00010: val_loss did not improve from 0.07694




AlgoCrossValIter - 4
Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.47377, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.47377

Epoch 00003: val_loss did not improve from 0.47377

Epoch 00004: val_loss did not improve from 0.47377

Epoch 00005: val_loss did not improve from 0.47377

Epoch 00006: val_loss did not improve from 0.47377

Epoch




Epoch 00001: val_loss improved from inf to 0.09882, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09882 to 0.04078, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.04078

Epoch 00004: val_loss did not improve from 0.04078

Epoch 00005: val_loss did not improve from 0.04078

Epoch 00006: val_loss did not improve from 0.04078

Epoch 00007: val_loss did not improve from 0.04078

Epoch 00008: val_loss did not improve from 0.04078

Epoch 00009: val_loss did not improve from 0.04078

Epoch 00010: val_loss did not improve from 0.04078





Epoch 00001: val_loss improved from inf to 0.07896, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07896

Epoch 00003: val_loss did not improve from 0.07896

Epoch 00004: val_loss did not improve from 0.07896

Epoch 00005: val_loss did not improve from 0.07896

Epoch 00006: val_loss did not improve from 0.07896

Epoch 00007: val_loss did not improve from 0.07896

Epoch 00008: val_loss did not improve from 0.07896

Epoch 00009: val_loss did not improve from 0.07896

Epoch 00010: val_loss did not improve from 0.07896





Epoch 00001: val_loss improved from inf to 0.06624, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06624

Epoch 00003: val_loss did not improve from 0.06624

Epoch 00004: val_loss did not improve from 0.06624

Epoch 00005: val_loss did not improve from 0.06624

Epoch 00006: val_loss did not improve from 0.06624

Epoch 00007: val_loss did not improve from 0.06624

Epoch 00008: val_loss did not improve from 0.06624

Epoch 00009: val_loss did not improve from 0.06624

Epoch 00010: val_loss did not improve from 0.06624





Epoch 00001: val_loss improved from inf to 0.09017, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09017

Epoch 00003: val_loss did not improve from 0.09017

Epoch 00004: val_loss did not improve from 0.09017

Epoch 00005: val_loss did not improve from 0.09017

Epoch 00006: val_loss did not improve from 0.09017

Epoch 00007: val_loss did not improve from 0.09017

Epoch 00008: val_loss did not improve from 0.09017

Epoch 00009: val_loss did not improve from 0.09017

Epoch 00010: val_loss did not improve from 0.09017





Epoch 00001: val_loss improved from inf to 0.05077, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05077 to 0.04557, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.04557

Epoch 00004: val_loss did not improve from 0.04557

Epoch 00005: val_loss did not improve from 0.04557

Epoch 00006: val_loss did not improve from 0.04557

Epoch 00007: val_loss did not improve from 0.04557

Epoch 00008: val_loss did not improve from 0.04557

Epoch 00009: val_loss did not improve from 0.04557

Epoch 00010: val_loss did not improve from 0.04557





Epoch 00001: val_loss improved from inf to 0.06570, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06570

Epoch 00003: val_loss did not improve from 0.06570

Epoch 00004: val_loss did not improve from 0.06570

Epoch 00005: val_loss did not improve from 0.06570

Epoch 00006: val_loss did not improve from 0.06570

Epoch 00007: val_loss did not improve from 0.06570

Epoch 00008: val_loss did not improve from 0.06570

Epoch 00009: val_loss did not improve from 0.06570

Epoch 00010: val_loss did not improve from 0.06570





Epoch 00001: val_loss improved from inf to 0.04781, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04781

Epoch 00003: val_loss did not improve from 0.04781

Epoch 00004: val_loss did not improve from 0.04781

Epoch 00005: val_loss did not improve from 0.04781

Epoch 00006: val_loss did not improve from 0.04781

Epoch 00007: val_loss did not improve from 0.04781

Epoch 00008: val_loss did not improve from 0.04781

Epoch 00009: val_loss did not improve from 0.04781

Epoch 00010: val_loss did not improve from 0.04781

Epoch 00001: val_loss improved from inf to 0.05781, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05781

Epoch 00003: val_loss did not improve from 0.05781

Epoch 00004: val_loss did not improve from 0.05781

Epoch 00005: val_loss did not improve from 0.05781

Epoch 00006: val_loss did not improve from 0.05781

Epoch 00007: val_loss did not improve from 0.05781

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.07343, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07343

Epoch 00003: val_loss did not improve from 0.07343

Epoch 00004: val_loss did not improve from 0.07343

Epoch 00005: val_loss did not improve from 0.07343

Epoch 00006: val_loss did not improve from 0.07343

Epoch 00007: val_loss did not improve from 0.07343

Epoch 00008: val_loss did not improve from 0.07343

Epoch 00009: val_loss did not improve from 0.07343

Epoch 00010: val_loss did not improve from 0.07343




AlgoCrossValIter - 5
Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.45678, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.45678

Epoch 00003: val_loss did not improve from 0.45678

Epoch 00004: val_loss did not improve from 0.45678

Epoch 00005: val_loss did not improve from 0.45678

Epoch 00006: val_loss did not improve from 0.45678

Epoch




Epoch 00001: val_loss improved from inf to 0.03057, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03057

Epoch 00003: val_loss did not improve from 0.03057

Epoch 00004: val_loss did not improve from 0.03057

Epoch 00005: val_loss did not improve from 0.03057

Epoch 00006: val_loss did not improve from 0.03057

Epoch 00007: val_loss did not improve from 0.03057

Epoch 00008: val_loss did not improve from 0.03057

Epoch 00009: val_loss did not improve from 0.03057

Epoch 00010: val_loss did not improve from 0.03057





Epoch 00001: val_loss improved from inf to 0.09749, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09749

Epoch 00003: val_loss did not improve from 0.09749

Epoch 00004: val_loss improved from 0.09749 to 0.09405, saving model to best-model-conll.hdfs

Epoch 00005: val_loss improved from 0.09405 to 0.08935, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.08935

Epoch 00007: val_loss did not improve from 0.08935

Epoch 00008: val_loss did not improve from 0.08935

Epoch 00009: val_loss did not improve from 0.08935

Epoch 00010: val_loss did not improve from 0.08935

Epoch 00001: val_loss improved from inf to 0.07477, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07477 to 0.06900, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06900

Epoch 00004: val_loss did not improve from 0.06900

Epoch 00005: val_loss did not improve from 0.06900

Epoch 00006:




Epoch 00001: val_loss improved from inf to 0.08578, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08578

Epoch 00003: val_loss did not improve from 0.08578

Epoch 00004: val_loss did not improve from 0.08578

Epoch 00005: val_loss did not improve from 0.08578

Epoch 00006: val_loss did not improve from 0.08578

Epoch 00007: val_loss did not improve from 0.08578

Epoch 00008: val_loss did not improve from 0.08578

Epoch 00009: val_loss did not improve from 0.08578

Epoch 00010: val_loss did not improve from 0.08578





Epoch 00001: val_loss improved from inf to 0.05908, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05908 to 0.05358, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.05358

Epoch 00004: val_loss did not improve from 0.05358

Epoch 00005: val_loss did not improve from 0.05358

Epoch 00006: val_loss did not improve from 0.05358

Epoch 00007: val_loss did not improve from 0.05358

Epoch 00008: val_loss did not improve from 0.05358

Epoch 00009: val_loss did not improve from 0.05358

Epoch 00010: val_loss did not improve from 0.05358





Epoch 00001: val_loss improved from inf to 0.06222, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06222

Epoch 00003: val_loss did not improve from 0.06222

Epoch 00004: val_loss did not improve from 0.06222

Epoch 00005: val_loss did not improve from 0.06222

Epoch 00006: val_loss did not improve from 0.06222

Epoch 00007: val_loss did not improve from 0.06222

Epoch 00008: val_loss did not improve from 0.06222

Epoch 00009: val_loss did not improve from 0.06222

Epoch 00010: val_loss did not improve from 0.06222





Epoch 00001: val_loss improved from inf to 0.04714, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04714

Epoch 00003: val_loss did not improve from 0.04714

Epoch 00004: val_loss did not improve from 0.04714

Epoch 00005: val_loss did not improve from 0.04714

Epoch 00006: val_loss did not improve from 0.04714

Epoch 00007: val_loss did not improve from 0.04714

Epoch 00008: val_loss did not improve from 0.04714

Epoch 00009: val_loss did not improve from 0.04714

Epoch 00010: val_loss did not improve from 0.04714

Epoch 00001: val_loss improved from inf to 0.06340, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06340

Epoch 00003: val_loss did not improve from 0.06340

Epoch 00004: val_loss did not improve from 0.06340

Epoch 00005: val_loss did not improve from 0.06340

Epoch 00006: val_loss did not improve from 0.06340

Epoch 00007: val_loss did not improve from 0.06340

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.06734, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06734

Epoch 00003: val_loss did not improve from 0.06734

Epoch 00004: val_loss did not improve from 0.06734

Epoch 00005: val_loss did not improve from 0.06734

Epoch 00006: val_loss did not improve from 0.06734

Epoch 00007: val_loss did not improve from 0.06734

Epoch 00008: val_loss did not improve from 0.06734

Epoch 00009: val_loss did not improve from 0.06734

Epoch 00010: val_loss did not improve from 0.06734




AlgoCrossValIter - 6
Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.46740, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.46740

Epoch 00003: val_loss did not improve from 0.46740

Epoch 00004: val_loss did not improve from 0.46740

Epoch 00005: val_loss did not improve from 0.46740

Epoch 00006: val_loss did not improve from 0.46740

Epoch




Epoch 00001: val_loss improved from inf to 0.05773, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05773 to 0.04822, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.04822

Epoch 00004: val_loss did not improve from 0.04822

Epoch 00005: val_loss did not improve from 0.04822

Epoch 00006: val_loss did not improve from 0.04822

Epoch 00007: val_loss did not improve from 0.04822

Epoch 00008: val_loss did not improve from 0.04822

Epoch 00009: val_loss did not improve from 0.04822

Epoch 00010: val_loss did not improve from 0.04822





Epoch 00001: val_loss improved from inf to 0.06799, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06799

Epoch 00003: val_loss did not improve from 0.06799

Epoch 00004: val_loss did not improve from 0.06799

Epoch 00005: val_loss did not improve from 0.06799

Epoch 00006: val_loss did not improve from 0.06799

Epoch 00007: val_loss did not improve from 0.06799

Epoch 00008: val_loss did not improve from 0.06799

Epoch 00009: val_loss did not improve from 0.06799

Epoch 00010: val_loss did not improve from 0.06799





Epoch 00001: val_loss improved from inf to 0.07745, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07745

Epoch 00003: val_loss did not improve from 0.07745

Epoch 00004: val_loss did not improve from 0.07745

Epoch 00005: val_loss did not improve from 0.07745

Epoch 00006: val_loss did not improve from 0.07745

Epoch 00007: val_loss did not improve from 0.07745

Epoch 00008: val_loss did not improve from 0.07745

Epoch 00009: val_loss did not improve from 0.07745

Epoch 00010: val_loss did not improve from 0.07745





Epoch 00001: val_loss improved from inf to 0.08276, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08276

Epoch 00003: val_loss did not improve from 0.08276

Epoch 00004: val_loss did not improve from 0.08276

Epoch 00005: val_loss did not improve from 0.08276

Epoch 00006: val_loss did not improve from 0.08276

Epoch 00007: val_loss did not improve from 0.08276

Epoch 00008: val_loss did not improve from 0.08276

Epoch 00009: val_loss did not improve from 0.08276

Epoch 00010: val_loss did not improve from 0.08276





Epoch 00001: val_loss improved from inf to 0.10773, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10773 to 0.06432, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06432

Epoch 00004: val_loss did not improve from 0.06432

Epoch 00005: val_loss did not improve from 0.06432

Epoch 00006: val_loss did not improve from 0.06432

Epoch 00007: val_loss did not improve from 0.06432

Epoch 00008: val_loss did not improve from 0.06432

Epoch 00009: val_loss did not improve from 0.06432

Epoch 00010: val_loss did not improve from 0.06432





Epoch 00001: val_loss improved from inf to 0.06595, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06595

Epoch 00003: val_loss did not improve from 0.06595

Epoch 00004: val_loss did not improve from 0.06595

Epoch 00005: val_loss did not improve from 0.06595

Epoch 00006: val_loss did not improve from 0.06595

Epoch 00007: val_loss did not improve from 0.06595

Epoch 00008: val_loss did not improve from 0.06595

Epoch 00009: val_loss did not improve from 0.06595

Epoch 00010: val_loss did not improve from 0.06595





Epoch 00001: val_loss improved from inf to 0.05043, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05043

Epoch 00003: val_loss did not improve from 0.05043

Epoch 00004: val_loss did not improve from 0.05043

Epoch 00005: val_loss did not improve from 0.05043

Epoch 00006: val_loss did not improve from 0.05043

Epoch 00007: val_loss did not improve from 0.05043

Epoch 00008: val_loss did not improve from 0.05043

Epoch 00009: val_loss did not improve from 0.05043

Epoch 00010: val_loss did not improve from 0.05043

Epoch 00001: val_loss improved from inf to 0.05973, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05973

Epoch 00003: val_loss did not improve from 0.05973

Epoch 00004: val_loss did not improve from 0.05973

Epoch 00005: val_loss did not improve from 0.05973

Epoch 00006: val_loss did not improve from 0.05973

Epoch 00007: val_loss did not improve from 0.05973

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.07936, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07936

Epoch 00003: val_loss did not improve from 0.07936

Epoch 00004: val_loss did not improve from 0.07936

Epoch 00005: val_loss did not improve from 0.07936

Epoch 00006: val_loss did not improve from 0.07936

Epoch 00007: val_loss did not improve from 0.07936

Epoch 00008: val_loss did not improve from 0.07936

Epoch 00009: val_loss did not improve from 0.07936

Epoch 00010: val_loss did not improve from 0.07936




AlgoCrossValIter - 7
Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.45552, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.45552

Epoch 00003: val_loss did not improve from 0.45552

Epoch 00004: val_loss did not improve from 0.45552

Epoch 00005: val_loss did not improve from 0.45552

Epoch 00006: val_loss did not improve from 0.45552

Epoch




Epoch 00001: val_loss improved from inf to 0.03656, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03656

Epoch 00003: val_loss did not improve from 0.03656

Epoch 00004: val_loss did not improve from 0.03656

Epoch 00005: val_loss did not improve from 0.03656

Epoch 00006: val_loss did not improve from 0.03656

Epoch 00007: val_loss did not improve from 0.03656

Epoch 00008: val_loss did not improve from 0.03656

Epoch 00009: val_loss did not improve from 0.03656

Epoch 00010: val_loss did not improve from 0.03656

Epoch 00001: val_loss improved from inf to 0.05967, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05967 to 0.05631, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.05631

Epoch 00004: val_loss did not improve from 0.05631

Epoch 00005: val_loss did not improve from 0.05631

Epoch 00006: val_loss did not improve from 0.05631

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.08590, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08590

Epoch 00003: val_loss did not improve from 0.08590

Epoch 00004: val_loss did not improve from 0.08590

Epoch 00005: val_loss did not improve from 0.08590

Epoch 00006: val_loss did not improve from 0.08590

Epoch 00007: val_loss did not improve from 0.08590

Epoch 00008: val_loss did not improve from 0.08590

Epoch 00009: val_loss did not improve from 0.08590

Epoch 00010: val_loss did not improve from 0.08590





Epoch 00001: val_loss improved from inf to 0.05427, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05427

Epoch 00003: val_loss did not improve from 0.05427

Epoch 00004: val_loss did not improve from 0.05427

Epoch 00005: val_loss did not improve from 0.05427

Epoch 00006: val_loss did not improve from 0.05427

Epoch 00007: val_loss did not improve from 0.05427

Epoch 00008: val_loss did not improve from 0.05427

Epoch 00009: val_loss did not improve from 0.05427

Epoch 00010: val_loss did not improve from 0.05427





Epoch 00001: val_loss improved from inf to 0.07042, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07042

Epoch 00003: val_loss did not improve from 0.07042

Epoch 00004: val_loss did not improve from 0.07042

Epoch 00005: val_loss did not improve from 0.07042

Epoch 00006: val_loss did not improve from 0.07042

Epoch 00007: val_loss did not improve from 0.07042

Epoch 00008: val_loss did not improve from 0.07042

Epoch 00009: val_loss did not improve from 0.07042

Epoch 00010: val_loss did not improve from 0.07042





Epoch 00001: val_loss improved from inf to 0.04610, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04610

Epoch 00003: val_loss did not improve from 0.04610

Epoch 00004: val_loss did not improve from 0.04610

Epoch 00005: val_loss did not improve from 0.04610

Epoch 00006: val_loss did not improve from 0.04610

Epoch 00007: val_loss did not improve from 0.04610

Epoch 00008: val_loss did not improve from 0.04610

Epoch 00009: val_loss did not improve from 0.04610

Epoch 00010: val_loss did not improve from 0.04610

Epoch 00001: val_loss improved from inf to 0.06485, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06485

Epoch 00003: val_loss did not improve from 0.06485

Epoch 00004: val_loss did not improve from 0.06485

Epoch 00005: val_loss did not improve from 0.06485

Epoch 00006: val_loss did not improve from 0.06485

Epoch 00007: val_loss did not improve from 0.06485

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.07308, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07308

Epoch 00003: val_loss did not improve from 0.07308

Epoch 00004: val_loss did not improve from 0.07308

Epoch 00005: val_loss did not improve from 0.07308

Epoch 00006: val_loss did not improve from 0.07308

Epoch 00007: val_loss did not improve from 0.07308

Epoch 00008: val_loss did not improve from 0.07308

Epoch 00009: val_loss did not improve from 0.07308

Epoch 00010: val_loss did not improve from 0.07308




AlgoCrossValIter - 8
Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.48981, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.48981

Epoch 00003: val_loss did not improve from 0.48981

Epoch 00004: val_loss did not improve from 0.48981

Epoch 00005: val_loss did not improve from 0.48981

Epoch 00006: val_loss did not improve from 0.48981

Epoch




Epoch 00002: val_loss improved from 0.04634 to 0.04445, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.04445

Epoch 00004: val_loss did not improve from 0.04445

Epoch 00005: val_loss did not improve from 0.04445

Epoch 00006: val_loss did not improve from 0.04445

Epoch 00007: val_loss did not improve from 0.04445

Epoch 00008: val_loss did not improve from 0.04445

Epoch 00009: val_loss did not improve from 0.04445

Epoch 00010: val_loss did not improve from 0.04445





Epoch 00001: val_loss improved from inf to 0.11530, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.11530 to 0.08016, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08016

Epoch 00004: val_loss did not improve from 0.08016

Epoch 00005: val_loss did not improve from 0.08016

Epoch 00006: val_loss did not improve from 0.08016

Epoch 00007: val_loss did not improve from 0.08016

Epoch 00008: val_loss did not improve from 0.08016

Epoch 00009: val_loss did not improve from 0.08016

Epoch 00010: val_loss did not improve from 0.08016

Epoch 00001: val_loss improved from inf to 0.07248, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07248 to 0.07220, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07220

Epoch 00004: val_loss did not improve from 0.07220

Epoch 00005: val_loss did not improve from 0.07220

Epoch 00006: val_loss did not improve from 0.07220

Epo




Epoch 00001: val_loss improved from inf to 0.10192, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10192 to 0.09523, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.09523

Epoch 00004: val_loss did not improve from 0.09523

Epoch 00005: val_loss did not improve from 0.09523

Epoch 00006: val_loss did not improve from 0.09523

Epoch 00007: val_loss did not improve from 0.09523

Epoch 00008: val_loss did not improve from 0.09523

Epoch 00009: val_loss did not improve from 0.09523

Epoch 00010: val_loss did not improve from 0.09523

Epoch 00001: val_loss improved from inf to 0.06503, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06503

Epoch 00003: val_loss did not improve from 0.06503

Epoch 00004: val_loss did not improve from 0.06503

Epoch 00005: val_loss did not improve from 0.06503

Epoch 00006: val_loss did not improve from 0.06503

Epoch 00007: val_loss improved from 0.06503 to




Epoch 00001: val_loss improved from inf to 0.07048, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07048

Epoch 00003: val_loss did not improve from 0.07048

Epoch 00004: val_loss did not improve from 0.07048

Epoch 00005: val_loss did not improve from 0.07048

Epoch 00006: val_loss did not improve from 0.07048

Epoch 00007: val_loss did not improve from 0.07048

Epoch 00008: val_loss did not improve from 0.07048

Epoch 00009: val_loss did not improve from 0.07048

Epoch 00010: val_loss did not improve from 0.07048





Epoch 00001: val_loss improved from inf to 0.04952, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04952

Epoch 00003: val_loss did not improve from 0.04952

Epoch 00004: val_loss did not improve from 0.04952

Epoch 00005: val_loss did not improve from 0.04952

Epoch 00006: val_loss did not improve from 0.04952

Epoch 00007: val_loss did not improve from 0.04952

Epoch 00008: val_loss did not improve from 0.04952

Epoch 00009: val_loss did not improve from 0.04952

Epoch 00010: val_loss did not improve from 0.04952

Epoch 00001: val_loss improved from inf to 0.06483, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06483

Epoch 00003: val_loss did not improve from 0.06483

Epoch 00004: val_loss did not improve from 0.06483

Epoch 00005: val_loss did not improve from 0.06483

Epoch 00006: val_loss did not improve from 0.06483

Epoch 00007: val_loss did not improve from 0.06483

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.07382, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07382

Epoch 00003: val_loss did not improve from 0.07382

Epoch 00004: val_loss did not improve from 0.07382

Epoch 00005: val_loss did not improve from 0.07382

Epoch 00006: val_loss did not improve from 0.07382

Epoch 00007: val_loss did not improve from 0.07382

Epoch 00008: val_loss did not improve from 0.07382

Epoch 00009: val_loss did not improve from 0.07382

Epoch 00010: val_loss did not improve from 0.07382




AlgoCrossValIter - 9
Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.48041, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.48041

Epoch 00003: val_loss did not improve from 0.48041

Epoch 00004: val_loss did not improve from 0.48041

Epoch 00005: val_loss did not improve from 0.48041

Epoch 00006: val_loss did not improve from 0.48041

Epoc




Epoch 00002: val_loss did not improve from 0.04422

Epoch 00003: val_loss did not improve from 0.04422

Epoch 00004: val_loss did not improve from 0.04422

Epoch 00005: val_loss did not improve from 0.04422

Epoch 00006: val_loss did not improve from 0.04422

Epoch 00007: val_loss did not improve from 0.04422

Epoch 00008: val_loss did not improve from 0.04422

Epoch 00009: val_loss did not improve from 0.04422

Epoch 00010: val_loss did not improve from 0.04422





Epoch 00001: val_loss improved from inf to 0.07546, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07546

Epoch 00003: val_loss did not improve from 0.07546

Epoch 00004: val_loss did not improve from 0.07546

Epoch 00005: val_loss did not improve from 0.07546

Epoch 00006: val_loss did not improve from 0.07546

Epoch 00007: val_loss did not improve from 0.07546

Epoch 00008: val_loss did not improve from 0.07546

Epoch 00009: val_loss did not improve from 0.07546

Epoch 00010: val_loss did not improve from 0.07546





Epoch 00001: val_loss improved from inf to 0.06340, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06340

Epoch 00003: val_loss did not improve from 0.06340

Epoch 00004: val_loss did not improve from 0.06340

Epoch 00005: val_loss did not improve from 0.06340

Epoch 00006: val_loss did not improve from 0.06340

Epoch 00007: val_loss did not improve from 0.06340

Epoch 00008: val_loss did not improve from 0.06340

Epoch 00009: val_loss did not improve from 0.06340

Epoch 00010: val_loss did not improve from 0.06340





Epoch 00001: val_loss improved from inf to 0.09359, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09359

Epoch 00003: val_loss did not improve from 0.09359

Epoch 00004: val_loss did not improve from 0.09359

Epoch 00005: val_loss did not improve from 0.09359

Epoch 00006: val_loss did not improve from 0.09359

Epoch 00007: val_loss did not improve from 0.09359

Epoch 00008: val_loss did not improve from 0.09359

Epoch 00009: val_loss did not improve from 0.09359

Epoch 00010: val_loss did not improve from 0.09359





Epoch 00001: val_loss improved from inf to 0.05883, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05883

Epoch 00003: val_loss improved from 0.05883 to 0.05221, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.05221

Epoch 00005: val_loss did not improve from 0.05221

Epoch 00006: val_loss did not improve from 0.05221

Epoch 00007: val_loss did not improve from 0.05221

Epoch 00008: val_loss did not improve from 0.05221

Epoch 00009: val_loss did not improve from 0.05221

Epoch 00010: val_loss did not improve from 0.05221





Epoch 00001: val_loss improved from inf to 0.07135, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07135 to 0.06394, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06394

Epoch 00004: val_loss did not improve from 0.06394

Epoch 00005: val_loss did not improve from 0.06394

Epoch 00006: val_loss did not improve from 0.06394

Epoch 00007: val_loss did not improve from 0.06394

Epoch 00008: val_loss did not improve from 0.06394

Epoch 00009: val_loss did not improve from 0.06394

Epoch 00010: val_loss did not improve from 0.06394





Epoch 00001: val_loss improved from inf to 0.04817, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04817

Epoch 00003: val_loss did not improve from 0.04817

Epoch 00004: val_loss did not improve from 0.04817

Epoch 00005: val_loss did not improve from 0.04817

Epoch 00006: val_loss did not improve from 0.04817

Epoch 00007: val_loss did not improve from 0.04817

Epoch 00008: val_loss did not improve from 0.04817

Epoch 00009: val_loss did not improve from 0.04817

Epoch 00010: val_loss did not improve from 0.04817

Epoch 00001: val_loss improved from inf to 0.05794, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05794

Epoch 00003: val_loss did not improve from 0.05794

Epoch 00004: val_loss did not improve from 0.05794

Epoch 00005: val_loss did not improve from 0.05794

Epoch 00006: val_loss did not improve from 0.05794

Epoch 00007: val_loss did not improve from 0.05794

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.07854, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07854

Epoch 00003: val_loss did not improve from 0.07854

Epoch 00004: val_loss did not improve from 0.07854

Epoch 00005: val_loss did not improve from 0.07854

Epoch 00006: val_loss did not improve from 0.07854

Epoch 00007: val_loss did not improve from 0.07854

Epoch 00008: val_loss did not improve from 0.07854

Epoch 00009: val_loss did not improve from 0.07854

Epoch 00010: val_loss did not improve from 0.07854




AlgoCrossValIter - 10
Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.47409, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.47409

Epoch 00003: val_loss did not improve from 0.47409

Epoch 00004: val_loss did not improve from 0.47409

Epoch 00005: val_loss did not improve from 0.47409

Epoch 00006: val_loss did not improve from 0.47409

Epo




Epoch 00001: val_loss improved from inf to 0.03713, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03713

Epoch 00003: val_loss did not improve from 0.03713

Epoch 00004: val_loss did not improve from 0.03713

Epoch 00005: val_loss did not improve from 0.03713

Epoch 00006: val_loss did not improve from 0.03713

Epoch 00007: val_loss did not improve from 0.03713

Epoch 00008: val_loss did not improve from 0.03713

Epoch 00009: val_loss did not improve from 0.03713

Epoch 00010: val_loss did not improve from 0.03713





Epoch 00001: val_loss improved from inf to 0.07783, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07783 to 0.07658, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07658

Epoch 00004: val_loss did not improve from 0.07658

Epoch 00005: val_loss did not improve from 0.07658

Epoch 00006: val_loss did not improve from 0.07658

Epoch 00007: val_loss did not improve from 0.07658

Epoch 00008: val_loss did not improve from 0.07658

Epoch 00009: val_loss did not improve from 0.07658

Epoch 00010: val_loss did not improve from 0.07658

Epoch 00001: val_loss improved from inf to 0.07630, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07630 to 0.06665, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06665

Epoch 00004: val_loss did not improve from 0.06665

Epoch 00005: val_loss did not improve from 0.06665

Epoch 00006: val_loss did not improve from 0.06665

Epo




Epoch 00001: val_loss improved from inf to 0.08515, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08515

Epoch 00003: val_loss did not improve from 0.08515

Epoch 00004: val_loss did not improve from 0.08515

Epoch 00005: val_loss did not improve from 0.08515

Epoch 00006: val_loss did not improve from 0.08515

Epoch 00007: val_loss did not improve from 0.08515

Epoch 00008: val_loss did not improve from 0.08515

Epoch 00009: val_loss did not improve from 0.08515

Epoch 00010: val_loss did not improve from 0.08515

Epoch 00001: val_loss improved from inf to 0.06884, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.06884 to 0.06237, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06237

Epoch 00004: val_loss did not improve from 0.06237

Epoch 00005: val_loss did not improve from 0.06237

Epoch 00006: val_loss did not improve from 0.06237

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.06188, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06188

Epoch 00003: val_loss did not improve from 0.06188

Epoch 00004: val_loss did not improve from 0.06188

Epoch 00005: val_loss did not improve from 0.06188

Epoch 00006: val_loss did not improve from 0.06188

Epoch 00007: val_loss did not improve from 0.06188

Epoch 00008: val_loss did not improve from 0.06188

Epoch 00009: val_loss did not improve from 0.06188

Epoch 00010: val_loss did not improve from 0.06188





Epoch 00001: val_loss improved from inf to 0.05141, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05141

Epoch 00003: val_loss did not improve from 0.05141

Epoch 00004: val_loss did not improve from 0.05141

Epoch 00005: val_loss did not improve from 0.05141

Epoch 00006: val_loss did not improve from 0.05141

Epoch 00007: val_loss did not improve from 0.05141

Epoch 00008: val_loss did not improve from 0.05141

Epoch 00009: val_loss did not improve from 0.05141

Epoch 00010: val_loss did not improve from 0.05141

Epoch 00001: val_loss improved from inf to 0.08059, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.08059 to 0.05677, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.05677

Epoch 00004: val_loss did not improve from 0.05677

Epoch 00005: val_loss did not improve from 0.05677

Epoch 00006: val_loss did not improve from 0.05677

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.06838, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06838

Epoch 00003: val_loss did not improve from 0.06838

Epoch 00004: val_loss did not improve from 0.06838

Epoch 00005: val_loss did not improve from 0.06838

Epoch 00006: val_loss did not improve from 0.06838

Epoch 00007: val_loss did not improve from 0.06838

Epoch 00008: val_loss did not improve from 0.06838

Epoch 00009: val_loss did not improve from 0.06838

Epoch 00010: val_loss did not improve from 0.06838




In [114]:
normalization_strategy

In [115]:
resultCrossVal.to_csv("results.csv")
resultCrossVal

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
P_val,78.655,77.773,77.111,79.413,76.201,79.431,78.703,77.75,76.773,78.432
P_train,78.046,87.797,78.579,88.059,86.603,88.288,86.157,78.9,79.331,76.908
P_ewo,67.155,76.636,69.506,74.975,72.125,75.531,71.475,69.731,70.922,68.947
R_val,76.165,77.237,77.313,77.415,77.237,78.665,77.415,77.908,73.778,78.904
R_train,77.823,81.366,77.163,78.162,79.697,77.917,80.199,76.869,75.442,79.959
R_ewo,62.408,65.462,62.316,62.593,64.723,61.945,64.351,61.296,60.556,65.74
F1-val,85.595556,85.55,85.653333,86.75,84.857778,87.273333,86.386667,85.661111,82.886667,87.133333
F1-train,86.326667,83.086,86.13,79.614,79.698,79.71,79.699,86.144444,85.03,87.045556
F1-ewo,71.471111,69.539,72.306667,66.687,66.725,66.774,66.05,71.756667,71.094444,74.701111


In [116]:
resultCrossVal.mean(axis=1).to_frame()

Unnamed: 0,0
P_val,78.0242
P_train,82.8668
P_ewo,71.7003
R_val,77.2037
R_train,78.4597
R_ewo,63.139
F1-val,85.774778
F1-train,83.248367
F1-ewo,69.7105


In [117]:
resultCrossVal.std(axis=1).to_frame()

Unnamed: 0,0
P_val,1.095172
P_train,4.839764
P_ewo,3.118572
R_val,1.431403
R_train,1.806081
R_ewo,1.799491
F1-val,1.278419
F1-train,3.244106
F1-ewo,2.999159


In [118]:
# trainByTagResult.to_csv("results/train-by-tag.csv")
# trainByTagResult

In [119]:
# trainByTagResult.mean(axis=1).to_frame()

In [120]:
# trainByTagResult.std(axis=1).to_frame()

In [121]:
# testByTagResult.to_csv("results/test-by-tag.csv")
# testByTagResult

In [122]:
# testByTagResult.mean(axis=1).to_frame()

In [123]:
# testByTagResult.std(axis=1).to_frame()

In [124]:
# ewoByTagResult.to_csv("results/ewo-by-tag.csv")

In [125]:
# ewoByTagResult = pd.read_csv("results/ewo-by-tag.csv", index_col=0)
# ewoByTagResult

In [126]:
# ewoByTagResult.mean(axis=1).to_frame()

In [127]:
# ewoByTagResult.std(axis=1).to_frame()

In [128]:
# columns = en_fingerprints.columns

# print("Pred", "Real", "Freq", "Word", sep="\t")
# for c in columns:
#     prediction = model.predict(en_fingerprints[c].values.reshape((1, 210)))
#     pred_tag = int2tag[np.argmax(prediction)]
#     real_tag = en_corpus[en_corpus.word == c].iloc[0]['ne-tag']
    
#     if pred_tag != real_tag:
#         print(pred_tag, real_tag, en_fingerprints[c].max(), c, sep="\t")