In [1]:
# import
import keras
import sys
import numpy as np
import string
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense
from keras.utils import np_utils, plot_model
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn import model_selection
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, precision_recall_fscore_support
import h5py as h5py

Using TensorFlow backend.


In [2]:
# if we are doeing binary classification. That means say if a token is a named entity or not
BINARY = False

# number of epochs for training
epochs = 10 

# the english side of the corpus
en_corpus_file = "corpus-en.txt"

# the ewondo side of the corpus
ewo_corpus_file = "corpus-ewo.txt"

# name of the file to same the model 
best_model_file = "best-model-conll.hdfs"

# The maximal number of phrases to use
max_nb_of_phrases =  -1

# the maximal number of duplicates for each word in the corpus
duplication = 1

# wether we are using only the vocabulary, ro redundancy
is_only_vocab = True

# if word should be shuffle or not
shuffle = is_only_vocab

# normalization strategy
# log, max, mean_log
# None: for no normalization
normalization_strategy = None

# if we are using the Zennaki et al. signature
is_zennaki = False

# the number of neurons in the first layer
h1_size = 640

# number of neurons in the second layer
h2_size = 160  

In [3]:
def getTag(aString):
    """
        convert a string to a tag
    """
    tag = "O"
    if BINARY:
        if aString != "O":
            return "NE"
    else:
        tag = aString
    return tag
     

In [4]:
def load_corpus(file, max_nb_of_phrases):
    """
    Load a corpus stored in a file
    Input:
        - file: the name of the file of the corpus
        - max_nb_of_phases: maximal number of phrases to load
    
    Return:
        - a DataFrame representing the corpus
        - the number of phrases in the corpus
    """
    nb_of_phrases = 0
    dataset = {"word": [], "ne-tag": []}
    with open(file) as f:
        prev_line = None
        for cpt, line in enumerate(f):
            if cpt == 0:
                continue
            if nb_of_phrases == max_nb_of_phrases:
                break;

            l = line.strip()
            if len(l) == 0 and len(prev_line) != 0:
                nb_of_phrases += 1
                dataset["word"].append(line)
                dataset["ne-tag"].append(None)
            else:
                l = l.split("\t")
                if l[0] not in string.punctuation:
                    dataset["word"].append(l[0])
                    dataset["ne-tag"].append(ne_type(l[1]))
            prev_line = line.strip()
        
    return pd.DataFrame(dataset), nb_of_phrases+1

In [5]:
def log_normalization(fingerprints):
    fingerprints[fingerprints > 0] = 1 + np.log(fingerprints[fingerprints > 0])
    return fingerprints

In [6]:
def max_normalization(fingerprints):
    maxis = fingerprints.max(axis = 1)
    fingerprints[fingerprints > 0] = fingerprints[fingerprints > 0].apply(lambda row: 0.5 + 0.5 * row / maxis[row.index])
    return fingerprints

In [7]:
def mean_log_normalization(fingerprints):
    means = fingerprints.mean(axis=1)
    fingerprints[fingerprints > 0] = fingerprints[fingerprints > 0].apply(lambda row: (1 + np.log(row)) / 1 + np.log(means[row.index]))
    return fingerprints

In [8]:
def normalize(fingerprints):
    if normalization_strategy == "log":
        return log_normalization(fingerprints)
    elif normalization_strategy == "max":
        return max_normalization(fingerprints)
    elif normalization_strategy == "mean_log":
        return mean_log_normalization(fingerprints)
    elif normalization_strategy == "log_inv":
        fp = log_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "max_inv":
        fp = max_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "mean_log_inv":
        fp = mean_log_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "tf":
        fp = fingerprints
        fp[fp > 0] = 1 / fp[fp > 0]
        return fp
    else:
        return fingerprints

In [9]:
def corpus_fingerprint(aDataframe, nb_of_biphrases, total_nb_of_words):
    """
    Create the distributionnal signature of each word in the corpus
    Input:
        -aDataFrame: the corpus DataFrame
        -nb_of_biphrases: number of phrases in the corpus
    Return:
        a DataFrame: corpus fingerprint, the columns are the words in the corpus
    """
    print("Normalization strategy:", normalization_strategy)
    tf = {}
    fingerprints = {}
    current_bi_phrase_index = 0
    nb_word_in_corpus = aDataframe[aDataframe.word != "\n"].word.size
    for index, row in aDataframe.iterrows():
        if current_bi_phrase_index > nb_of_biphrases:
            break
            
        word = row['word']
        
        if word != "\n":
            if word not in fingerprints:
                fingerprints[word] = np.zeros(nb_of_biphrases, dtype=np.float32)
                tf[word] = 0
            tf[word] += 1
            fingerprints[word][current_bi_phrase_index] = 1
        else:
            current_bi_phrase_index += 1
            words_in_current_phrase = []
        
    if not is_zennaki:
        for word in fingerprints:
            for i in range(nb_of_biphrases):
                if fingerprints[word][i] != 0:
                    fingerprints[word][i] = total_nb_of_words / tf[word]
    ret = pd.DataFrame(fingerprints)
        
    return normalize(ret)

In [10]:
def corpus2trainingdata(aDataframe, fingerprintsDataFrame):
    """
    Convert corpus to training data => numpy array
    
    Input:
        -aDataFrame: Corpus dataframe
        -fingerprintsDataFrame: distributionnal signature of words in the corpus
    Return:
        (X, y): X is the array of words (signature) in the corpus and y is the corresponding labels (NE tags)
    """
    X = np.zeros((aDataframe.shape[0], fingerprintsDataFrame.shape[0]), dtype=np.int8)
    y = np.zeros(aDataframe.shape[0], dtype=np.int8)
    i = 0
    for row in aDataframe.iterrows():
        X[i] = fingerprintsDataFrame[row[1]['word']].values
        y[i] = tag2int[getTag(row[1]['ne-tag'])]
        i += 1
    return X, y

In [11]:
# A utility function to convert NE tags
def ne_type(aType):
    aType = aType.lower()
    if 'per' in aType:
        t =  'NE' if BINARY else 'PER' 
    elif 'loc' in aType:
        t =  'NE' if BINARY else 'LOC'
    elif 'org' in aType:
        t =  'NE' if BINARY else 'ORG'
    elif 'hour' in aType:
        t =  'NE' if BINARY else 'MISC'
    elif aType != 'o' and len(aType) > 0 :
        t =  'NE' if BINARY else 'MISC'
    else:
        t = 'O'
    return t

In [12]:
def P_R_F1(y_pred, y_true, neg_class):
    same = y_pred[y_true==y_pred]
    tp = same[same != neg_class].size
    nb_of_pos_exple = y_true[y_true != neg_class].size
    nb_of_pos_pred = y_pred[y_pred != neg_class].size
    p = r = f1 = 0
    try:
        p = np.round(tp*100/nb_of_pos_pred, 2)
    except ZeroDivisionError:
        print("number of correct positive predictions is 0")
        
    try:
        r = np.round(tp*100/nb_of_pos_exple, 2)
    except ZeroDivisionError:
        print("number of position exple is 0")
        
    try:
        f1 = np.round(2*r*p/(r+p), 2)
    except ZeroDivisionError:
        print("Recall and precision are 0")

    return p, r, f1

In [13]:
def shuffle_data(X, y):
    indices = [i for i in  range(X.shape[0])]
    np.random.shuffle(indices)
    return X[indices], y[indices]

In [14]:
def create_model(input_dim, output_dim):
    model = Sequential()
    model.add(Dense(h1_size, input_dim=input_dim, activation='sigmoid', name="hidden1"))
    model.add(Dense(h2_size, activation='sigmoid', name="hidden2"))
    if BINARY:
        model.add(Dense(1, activation='sigmoid', name="outputlayer"))
        model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['binary_accuracy'])
    else:
        model.add(Dense(output_dim, activation='softmax', name="outputlayer"))
        model.compile(loss='categorical_crossentropy', optimizer="rmsprop", metrics=['accuracy'])
    model.summary()
    return model

In [15]:
def train_model(model, X_train, y_train, X_val, y_val, epochs=epochs):
    # stop learning if the error is the same between two consecutive epochs
    early_stop = EarlyStopping(patience=20, verbose=2)
    
    # saving best model
    best_model_cp = ModelCheckpoint(best_model_file, save_best_only=True, verbose=1)
    
    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, verbose=0, shuffle=shuffle, callbacks=[best_model_cp, early_stop])
    
    #loading and returning the best model
    return keras.models.load_model(best_model_file)

In [16]:
def predict(model, X, y, binary=BINARY):
    if BINARY:
        y_pred = np.round(model.predict(X))
        y_true = y
    else:
        predictions = model.predict(X)
        y_pred = np.array([np.argmax(p) for p in predictions])
        y_true = np.array([np.argmax(t) for t in y ])
    return y_true, y_pred

In [17]:
def model_performance(y_true, y_pred):
    return P_R_F1(y_pred, y_true, tag2int['O']) #precision, recall, f1-score

In [18]:
def model_performace_by_tag(y_true, y_pred, tag):
    p, r, f1 = 0, 0, 0
    
    eq = y_pred[y_pred==y_true]
    correctly_pred = eq[eq==tag].size
    try:
        p = np.round(100 * correctly_pred / y_pred[y_pred==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        r = np.round(100 * correctly_pred / y_true[y_true==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        f1 = np.round(2 * r * p / (r + p), 2)
    except ZeroDivisionError:
        pass
    
    return p, r, f1

In [19]:
def algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, epochs=epochs, model=None):
    """
    Train a model on (X, y) and validate on (X_val, y_val) then project on (X_ewo)
    """
    test_precision, train_precision, ewo_precision = [], [], []
    test_recall, train_recall, ewo_recall = [], [], []
    test_fscore, train_fscore, ewo_fscore = [], [], []
    
    test_result_by_tag = {}
    train_result_by_tag = {}
    ewo_result_by_tag = {}
    for t in tagSet:
        f1_key = "F1-"+t
        p_key = "P-"+t
        r_key = "R-"+t
        train_result_by_tag[f1_key], train_result_by_tag[p_key], train_result_by_tag[r_key] = [], [], []
        test_result_by_tag[f1_key], test_result_by_tag[p_key], test_result_by_tag[r_key] = [], [], []
        ewo_result_by_tag[f1_key], ewo_result_by_tag[p_key], ewo_result_by_tag[r_key] = [], [], []

    m = train_model(model, X_train, y_train, X_val, y_val, epochs=epochs)
        
    y_true, y_pred = predict(m, X_train, y_train)
    p_train, r_train, f1_train = model_performance(y_true, y_pred)
        
    y_true_val, y_pred_val = predict(m, X_val, y_val)
    p_val, r_val, f1_val = model_performance(y_true_val, y_pred_val)
        
    y_true_ewo, y_pred_ewo = predict(m, X_ewo, y_ewo) 
    p_ewo, r_ewo, f1_ewo = model_performance(y_true_ewo, y_pred_ewo)
        
    for t in range(len(int2tag)):
        f1_key = "F1-" + int2tag[t]
        p_key = "P-" + int2tag[t]
        r_key = "R-" + int2tag[t]
            
        p, r, f1 = model_performace_by_tag(y_true, y_pred, t)
        train_result_by_tag[p_key].append(p)
        train_result_by_tag[r_key].append(r)
        train_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_val, y_pred_val, t)
        test_result_by_tag[p_key].append(p)
        test_result_by_tag[r_key].append(r)
        test_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_ewo, y_pred_ewo, t)
        ewo_result_by_tag[p_key].append(p)
        ewo_result_by_tag[r_key].append(r)
        ewo_result_by_tag[f1_key].append(f1)
                
    test_precision.append(p_val)
    train_precision.append(p_train)
    ewo_precision.append(p_ewo)
        
    test_recall.append(r_val)
    train_recall.append(r_train)
    ewo_recall.append(r_ewo)
        
    test_fscore.append(f1_val)
    train_fscore.append(f1_train)
    ewo_fscore.append(f1_ewo)
    return pd.DataFrame({
        'P_val': test_precision, 
        'P_train': train_precision, 
        'P_ewo': ewo_precision, 'R_val': test_recall, 'R_train': train_recall, 
        'R_ewo': ewo_recall, 'F1-val': test_fscore, 'F1-train': train_fscore, 'F1-ewo': ewo_fscore}), pd.DataFrame(train_result_by_tag), pd.DataFrame(test_result_by_tag), pd.DataFrame(ewo_result_by_tag)

In [20]:
def algoCrossVal(X, y, X_ewo, y_ewo, k = 10, repeat=1): 
    """
    Traing a model with k-fold cross validation
    We train the model `repeat` times to check it's stability
    """
    block_size = int(X.shape[0] / k)   
    output = None
    model = None
    train_by_tags, test_by_tags, ewo_by_tags = None, None, None
    for it in range(repeat):
        print("AlgoCrossValIter -", it+1)
        model = create_model(X.shape[1], len(tagSet))
        results = None
        train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = None, None, None
        for i in range(k):
            X_val, y_val = X[i*block_size:i*block_size+block_size], y[i*block_size:i*block_size+block_size]
            X_train = np.concatenate((X[0:i*block_size], X[i*block_size+block_size:]))
            y_train = np.concatenate((y[0:i*block_size], y[i*block_size+block_size:]))

            X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
            X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])

            result, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)
            if results is None:
                results = result.copy()
                train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = train_by_tag.copy(), test_by_tag.copy(), ewo_by_tag.copy()
            else:
                results = pd.concat([results, result], ignore_index=True)
                train_by_tagsTmp = pd.concat([train_by_tagsTmp, train_by_tag], ignore_index=True)
                test_by_tagsTmp = pd.concat([test_by_tagsTmp, test_by_tag], ignore_index=True)
                ewo_by_tagsTmp = pd.concat([ewo_by_tagsTmp, ewo_by_tag], ignore_index=True)
        
        if output is None:
            output = results.mean(axis=0).to_frame()
            train_by_tags = train_by_tagsTmp.mean(axis=0).to_frame()
            test_by_tags = test_by_tagsTmp.mean(axis=0).to_frame()
            ewo_by_tags = ewo_by_tagsTmp.mean(axis=0).to_frame()
        else:
            output = pd.concat([output, results.mean(axis=0).to_frame()], axis=1)
            train_by_tags = pd.concat([train_by_tags, train_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            test_by_tags = pd.concat([test_by_tags, test_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            ewo_by_tags = pd.concat([ewo_by_tags, ewo_by_tagsTmp.mean(axis=0).to_frame()], axis=1)

    return output, train_by_tags, test_by_tags, ewo_by_tags, model

In [21]:
en_corpus, en_nb_of_phrases = load_corpus(en_corpus_file, max_nb_of_phrases)

In [22]:
ewo_corpus, ewo_nb_of_phrases = load_corpus(ewo_corpus_file, max_nb_of_phrases)

In [23]:
en_nb_word = en_corpus[en_corpus.word != "\n"].word.size
ewo_nb_word = ewo_corpus[ewo_corpus.word != "\n"].word.size
corpus_nb_word = en_nb_word + ewo_nb_word
print("Nb word in ewondo", ewo_nb_word)
print("Nb word in english", en_nb_word)
print("Nb word in corpus", corpus_nb_word)

Nb word in ewondo 3570
Nb word in english 4170
Nb word in corpus 7740


In [24]:
en_corpus.head()
en_corpus.loc[en_corpus['ne-tag'] == 'ORG']

Unnamed: 0,word,ne-tag
1335,Sadducees,ORG


In [25]:
tagSet = en_corpus["ne-tag"].dropna().unique()
if BINARY:
    tagSet = ['NE', 'O']
tag2int = {j: i for i, j in enumerate(tagSet)}
int2tag = {i: j for i, j in enumerate(tagSet)}
print(tag2int)

{'O': 0, 'MISC': 1, 'PER': 2, 'LOC': 3, 'ORG': 4}


In [26]:
en_nb_of_phrases

210

In [27]:
en_corpus.describe()

Unnamed: 0,word,ne-tag
count,4379,4170
unique,904,5
top,the,O
freq,313,3779


In [28]:
en_corpus.head(10)

Unnamed: 0,word,ne-tag
0,The,O
1,Promise,O
2,of,O
3,the,O
4,Holy,MISC
5,Spirit,MISC
6,\n,
7,In,O
8,the,O
9,first,O


In [29]:
print("---------Whole corpus stats-----------")
corpus = en_corpus[en_corpus['word']!='\n']
total=0
for tag in tagSet:
    percent = corpus[corpus['ne-tag']==tag].shape[0] *100 / corpus.shape[0]
    total += percent
    print("{0} % = {1} %".format(tag, percent))
print("Total:", total, "%\n")

print("---------Vocabulary stats-------------")
total = 0
percents = {}
for tag in tagSet:
    percent = corpus[corpus['ne-tag']==tag].word.unique().size # * 100 / corpus.word.unique().size
    total += percent
    percents[tag] = percent
for t in percents:
    print("{0} % = {1} %".format(t, percents[t] * 100 /total))

O % = 86.3 %
MISC % = 2.4 %
PER % = 5.59 %
LOC % = 0.91 %
ORG % = 0.02 %


In [31]:
en_corpus[en_corpus.word == "\n"].shape

(209, 2)

In [32]:
print("Nb of bi-phrases", en_nb_of_phrases)

Nb of bi-phrases 210


In [33]:
en_fingerprints = corpus_fingerprint(en_corpus, en_nb_of_phrases, corpus_nb_word)

Normalization strategy: None


In [34]:
en_fingerprints.head(10)

Unnamed: 0,The,Promise,of,the,Holy,Spirit,In,first,book,O,...,considered,dream,She,save,fulfill,Immanuel,us),woke,sleep,knew
0,703.636353,7740.0,41.170212,24.728434,516.0,552.857117,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,24.728434,0.0,0.0,1290.0,3870.0,3870.0,3870.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,24.728434,516.0,552.857117,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,41.170212,24.728434,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,41.170212,24.728434,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,24.728434,516.0,552.857117,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,703.636353,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,24.728434,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,24.728434,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,41.170212,24.728434,516.0,552.857117,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
(4170 / en_nb_word)

1.0

In [36]:
en_corpus.shape

(4379, 2)

In [37]:
en_fingerprints['you'].values.shape

(210,)

In [38]:
en_corpus[en_corpus.word != "\n"].shape

(4170, 2)

In [39]:
if is_only_vocab:
    text = list(en_corpus[en_corpus.word != "\n"].word.unique())
else:
    text = list(en_corpus[en_corpus.word != "\n"].word)
en_vocab = pd.DataFrame({'text': text})
en_vocab.describe()

Unnamed: 0,text
count,903
unique,903
top,Gentiles
freq,1


In [40]:
if is_only_vocab:
    X = np.zeros((en_vocab.shape[0] * duplication, en_nb_of_phrases))
    target = np.zeros((en_vocab.shape[0] * duplication))
    p=0
    for i, row in en_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X[p] = en_fingerprints[c.split(" ")[0]]
            target[p] = tag2int[getTag(en_corpus[en_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X, target = shuffle_data(X, target)
    print(X.shape, en_fingerprints.shape, target.shape)

(903, 210) (210, 903) (903,)


In [41]:
en_vocab[-20:]

Unnamed: 0,text
883,Eliud
884,Eleazar
885,Matthan
886,husband
887,fourteen
888,unwilling
889,shame
890,resolved
891,divorce
892,quietly


In [42]:
if not is_only_vocab:
    X, target = corpus2trainingdata(en_corpus[en_corpus.word != "\n"], en_fingerprints)

In [43]:
if shuffle:
    X, target = shuffle_data(X, target)

In [44]:
y = target.copy()
y[0:100]
if not BINARY:
    y = np_utils.to_categorical(y, len(tagSet))
y.shape

(903, 5)

In [45]:
from sklearn.decomposition import PCA

def visualize(X, y):
    pca = PCA(n_components=2)
    X_embeded = pca.fit_transform(X)
    plt.figure(figsize=(5, 5))
    plt.scatter(X_embeded[:, 0], X_embeded[:, 1], c=y)
    plt.legend()
    plt.show()

In [46]:
# visualize(X, target)

In [47]:
X_train, X_val, y_train, y_val = model_selection.train_test_split(X, y, test_size=0.33)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])
print("X_train.shape =", X_train.shape)
print("y_train.shape =", y_train.shape)
print("X_val.shape =", X_val.shape)
print("y_val.shape =", y_val.shape)

tTarget = np.array([np.argmax(yy) for yy in y_train])
vTarget = np.array([np.argmax(yy) for yy in y_val])

for tag in tagSet:
    print("{0} % in training data = {1} %".format(tag, np.round(tTarget[tTarget==tag2int[tag]].size * 100 / tTarget.shape[0], 2)))
    print("{0} % in validation data = {1} %".format(tag, np.round(vTarget[vTarget==tag2int[tag]].size * 100 / vTarget.shape[0], 2)))

X_train.shape = (605, 210)
y_train.shape = (605, 5)
X_val.shape = (298, 210)
y_val.shape = (298, 5)
O % in training data = 89.26 %
O % in validation data = 86.24 %
MISC % in training data = 1.16 %
MISC % in validation data = 1.34 %
PER % in training data = 7.6 %
PER % in validation data = 10.4 %
LOC % in training data = 1.98 %
LOC % in validation data = 1.68 %
ORG % in training data = 0.0 %
ORG % in validation data = 0.34 %


In [48]:
ewo_corpus.loc[ewo_corpus['ne-tag'] == 'PER']

Unnamed: 0,word,ne-tag
6,Teofil,PER
15,Yesus,PER
86,Yohannes,PER
104,Yesus,PER
230,Yesus,PER
...,...,...
3676,Maria,PER
3697,Yesus,PER
3740,Emmanuel,PER
3750,Yosef,PER


In [49]:
ewo_nb_of_phrases

210

In [50]:
print("---------Whole corpus stats-----------")
corpus = ewo_corpus[ewo_corpus['word']!='\n']
total=0
for tag in tagSet:
    percent = corpus[corpus['ne-tag']==tag].shape[0] *100 / corpus.shape[0]
    total += percent
    print("{0} % = {1} %".format(tag, percent))
print("Total:", total, "%\n")

print("---------Vocabulary stats-------------")
total = 0
percents = {}
for tag in tagSet:
    percent = corpus[corpus['ne-tag']==tag].word.unique().size # * 100 / corpus.word.unique().size
    total += percent
    percents[tag] = percent
for t in percents:
    print("{0} % = {1} %".format(t, percents[t] * 100 /total))

O % = 84.15 %
MISC % = 2.54 %
PER % = 6.69 %
LOC % = 1.03 %
ORG % = 0.05 %


In [52]:
ewo_corpus.describe()

Unnamed: 0,word,ne-tag
count,3779,3570
unique,1024,5
top,\n,O
freq,209,3180


In [53]:
ewo_corpus.head()

Unnamed: 0,word,ne-tag
0,Mfufub,MISC
1,Nsisim,MISC
2,ayi,O
3,sò,O
4,\n,


In [54]:
ewo_fingerprints = corpus_fingerprint(ewo_corpus, en_nb_of_phrases, corpus_nb_word)

Normalization strategy: None


In [55]:
if is_only_vocab:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word.unique())
else:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word)
ewo_vocab = pd.DataFrame({"text":text})

In [56]:
if is_only_vocab:
    X_ewo = np.zeros((ewo_vocab.shape[0] * duplication, en_nb_of_phrases))
    ewo_target = np.zeros((ewo_vocab.shape[0] * duplication))
    p=0
    for i, row in ewo_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X_ewo[p] = ewo_fingerprints[c.split(" ")[0]]
            ewo_target[p] = tag2int[getTag(ewo_corpus[ewo_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [57]:
ewo_vocab[-10:]

Unnamed: 0,text
1013,sik
1014,Ntud
1015,bëyole
1016,Emmanuel
1017,Avëbë
1018,angavëbë
1019,oyò
1020,angabende
1021,anganòṅ
1022,angayole


In [58]:
if not is_only_vocab:
    X_ewo, ewo_target = corpus2trainingdata(ewo_corpus[ewo_corpus.word != "\n"], ewo_fingerprints)

In [59]:
if shuffle:
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [60]:
y_ewo = ewo_target.copy()
print(y_ewo.shape, len(ewo_vocab))

(1023,) 1023


In [61]:
X_ewo.shape

(1023, 210)

In [62]:
y_ewo = ewo_target.copy()
y_ewo[:20]
if not BINARY:
    y_ewo = np_utils.to_categorical(y_ewo)

In [63]:
X_ewo = X_ewo.reshape((X_ewo.shape[0], en_nb_of_phrases))

In [64]:
# model = create_model(X.shape[1], len(tagSet))
# resultEval, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)

In [65]:
# resultEval

In [66]:
# train_by_tag

In [67]:
# test_by_tag

In [68]:
# ewo_by_tag

In [69]:
# resultEval.mean()

In [70]:
# resultEval.std()

In [71]:
resultCrossVal, trainByTagResult, testByTagResult, ewoByTagResult, model = algoCrossVal(X, y, X_ewo, y_ewo, repeat=10)

AlgoCrossValIter - 1
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.58747, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.58747 to 0.48451, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.48451 to 0.47639, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.47639

Epoch 00005: val_loss d




Epoch 00001: val_loss improved from inf to 0.10962, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10962 to 0.10595, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.10595

Epoch 00004: val_loss did not improve from 0.10595

Epoch 00005: val_loss did not improve from 0.10595

Epoch 00006: val_loss did not improve from 0.10595

Epoch 00007: val_loss did not improve from 0.10595

Epoch 00008: val_loss did not improve from 0.10595

Epoch 00009: val_loss did not improve from 0.10595

Epoch 00010: val_loss did not improve from 0.10595





Epoch 00001: val_loss improved from inf to 0.07142, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07142

Epoch 00003: val_loss did not improve from 0.07142

Epoch 00004: val_loss did not improve from 0.07142

Epoch 00005: val_loss did not improve from 0.07142

Epoch 00006: val_loss did not improve from 0.07142

Epoch 00007: val_loss did not improve from 0.07142

Epoch 00008: val_loss did not improve from 0.07142

Epoch 00009: val_loss did not improve from 0.07142

Epoch 00010: val_loss did not improve from 0.07142





Epoch 00001: val_loss improved from inf to 0.05047, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05047 to 0.04315, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.04315

Epoch 00004: val_loss did not improve from 0.04315

Epoch 00005: val_loss did not improve from 0.04315

Epoch 00006: val_loss did not improve from 0.04315

Epoch 00007: val_loss did not improve from 0.04315

Epoch 00008: val_loss did not improve from 0.04315

Epoch 00009: val_loss did not improve from 0.04315

Epoch 00010: val_loss did not improve from 0.04315





Epoch 00001: val_loss improved from inf to 0.04918, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04918

Epoch 00003: val_loss improved from 0.04918 to 0.04761, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.04761 to 0.04360, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.04360

Epoch 00006: val_loss did not improve from 0.04360

Epoch 00007: val_loss did not improve from 0.04360

Epoch 00008: val_loss did not improve from 0.04360

Epoch 00009: val_loss did not improve from 0.04360

Epoch 00010: val_loss did not improve from 0.04360





Epoch 00001: val_loss improved from inf to 0.05884, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05884 to 0.04419, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.04419

Epoch 00004: val_loss did not improve from 0.04419

Epoch 00005: val_loss improved from 0.04419 to 0.04151, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.04151

Epoch 00007: val_loss improved from 0.04151 to 0.03909, saving model to best-model-conll.hdfs

Epoch 00008: val_loss did not improve from 0.03909

Epoch 00009: val_loss did not improve from 0.03909

Epoch 00010: val_loss did not improve from 0.03909





Epoch 00001: val_loss improved from inf to 0.07076, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07076 to 0.06511, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.06511 to 0.06502, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.06502

Epoch 00005: val_loss did not improve from 0.06502

Epoch 00006: val_loss did not improve from 0.06502

Epoch 00007: val_loss did not improve from 0.06502

Epoch 00008: val_loss did not improve from 0.06502

Epoch 00009: val_loss did not improve from 0.06502

Epoch 00010: val_loss did not improve from 0.06502

Epoch 00001: val_loss improved from inf to 0.04698, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04698

Epoch 00003: val_loss did not improve from 0.04698

Epoch 00004: val_loss did not improve from 0.04698

Epoch 00005: val_loss did not improve from 0.04698

Epoch 00006: val_loss did not improve from 0.04698

Epo




Epoch 00001: val_loss improved from inf to 0.10261, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10261

Epoch 00003: val_loss did not improve from 0.10261

Epoch 00004: val_loss did not improve from 0.10261

Epoch 00005: val_loss did not improve from 0.10261

Epoch 00006: val_loss did not improve from 0.10261

Epoch 00007: val_loss did not improve from 0.10261

Epoch 00008: val_loss did not improve from 0.10261

Epoch 00009: val_loss did not improve from 0.10261

Epoch 00010: val_loss did not improve from 0.10261

Epoch 00001: val_loss improved from inf to 0.11875, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11875

Epoch 00003: val_loss did not improve from 0.11875

Epoch 00004: val_loss did not improve from 0.11875

Epoch 00005: val_loss did not improve from 0.11875

Epoch 00006: val_loss did not improve from 0.11875

Epoch 00007: val_loss did not improve from 0.11875

Epoch 00008: val_loss did not improve



AlgoCrossValIter - 2
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.53468, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.53468

Epoch 00003: val_loss improved from 0.53468 to 0.52557, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.52557 to 0.45809, saving model to best-model-conll.hdfs

Epoch 00005: val_loss d




Epoch 00001: val_loss improved from inf to 0.07355, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07355

Epoch 00003: val_loss did not improve from 0.07355

Epoch 00004: val_loss did not improve from 0.07355

Epoch 00005: val_loss did not improve from 0.07355

Epoch 00006: val_loss did not improve from 0.07355

Epoch 00007: val_loss did not improve from 0.07355

Epoch 00008: val_loss did not improve from 0.07355

Epoch 00009: val_loss did not improve from 0.07355

Epoch 00010: val_loss did not improve from 0.07355





Epoch 00001: val_loss improved from inf to 0.09185, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09185

Epoch 00003: val_loss did not improve from 0.09185

Epoch 00004: val_loss did not improve from 0.09185

Epoch 00005: val_loss did not improve from 0.09185

Epoch 00006: val_loss did not improve from 0.09185

Epoch 00007: val_loss did not improve from 0.09185

Epoch 00008: val_loss did not improve from 0.09185

Epoch 00009: val_loss did not improve from 0.09185

Epoch 00010: val_loss did not improve from 0.09185





Epoch 00001: val_loss improved from inf to 0.02902, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02902

Epoch 00003: val_loss improved from 0.02902 to 0.02897, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.02897

Epoch 00005: val_loss did not improve from 0.02897

Epoch 00006: val_loss did not improve from 0.02897

Epoch 00007: val_loss did not improve from 0.02897

Epoch 00008: val_loss did not improve from 0.02897

Epoch 00009: val_loss did not improve from 0.02897

Epoch 00010: val_loss did not improve from 0.02897





Epoch 00001: val_loss improved from inf to 0.04357, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.04357 to 0.04301, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.04301

Epoch 00004: val_loss did not improve from 0.04301

Epoch 00005: val_loss did not improve from 0.04301

Epoch 00006: val_loss did not improve from 0.04301

Epoch 00007: val_loss did not improve from 0.04301

Epoch 00008: val_loss did not improve from 0.04301

Epoch 00009: val_loss did not improve from 0.04301

Epoch 00010: val_loss did not improve from 0.04301





Epoch 00001: val_loss improved from inf to 0.03381, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03381

Epoch 00003: val_loss did not improve from 0.03381

Epoch 00004: val_loss did not improve from 0.03381

Epoch 00005: val_loss did not improve from 0.03381

Epoch 00006: val_loss did not improve from 0.03381

Epoch 00007: val_loss did not improve from 0.03381

Epoch 00008: val_loss did not improve from 0.03381

Epoch 00009: val_loss did not improve from 0.03381

Epoch 00010: val_loss did not improve from 0.03381

Epoch 00001: val_loss improved from inf to 0.05743, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05743

Epoch 00003: val_loss did not improve from 0.05743

Epoch 00004: val_loss did not improve from 0.05743

Epoch 00005: val_loss did not improve from 0.05743

Epoch 00006: val_loss did not improve from 0.05743

Epoch 00007: val_loss did not improve from 0.05743

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.05554, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05554 to 0.05446, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.05446

Epoch 00004: val_loss did not improve from 0.05446

Epoch 00005: val_loss did not improve from 0.05446

Epoch 00006: val_loss did not improve from 0.05446

Epoch 00007: val_loss did not improve from 0.05446

Epoch 00008: val_loss did not improve from 0.05446

Epoch 00009: val_loss did not improve from 0.05446

Epoch 00010: val_loss did not improve from 0.05446





Epoch 00001: val_loss improved from inf to 0.10275, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10275

Epoch 00003: val_loss did not improve from 0.10275

Epoch 00004: val_loss did not improve from 0.10275

Epoch 00005: val_loss did not improve from 0.10275

Epoch 00006: val_loss did not improve from 0.10275

Epoch 00007: val_loss did not improve from 0.10275

Epoch 00008: val_loss did not improve from 0.10275

Epoch 00009: val_loss did not improve from 0.10275

Epoch 00010: val_loss did not improve from 0.10275

Epoch 00001: val_loss improved from inf to 0.12305, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12305

Epoch 00003: val_loss did not improve from 0.12305

Epoch 00004: val_loss did not improve from 0.12305

Epoch 00005: val_loss did not improve from 0.12305

Epoch 00006: val_loss did not improve from 0.12305

Epoch 00007: val_loss did not improve from 0.12305

Epoch 00008: val_loss did not improve



AlgoCrossValIter - 3
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.56499, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.56499 to 0.53004, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.53004 to 0.45065, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.45065

Epoch 00005: val_loss d




Epoch 00001: val_loss improved from inf to 0.21334, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.21334 to 0.10846, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.10846

Epoch 00004: val_loss did not improve from 0.10846

Epoch 00005: val_loss did not improve from 0.10846

Epoch 00006: val_loss did not improve from 0.10846

Epoch 00007: val_loss did not improve from 0.10846

Epoch 00008: val_loss did not improve from 0.10846

Epoch 00009: val_loss did not improve from 0.10846

Epoch 00010: val_loss did not improve from 0.10846





Epoch 00001: val_loss improved from inf to 0.06639, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06639

Epoch 00003: val_loss did not improve from 0.06639

Epoch 00004: val_loss did not improve from 0.06639

Epoch 00005: val_loss did not improve from 0.06639

Epoch 00006: val_loss did not improve from 0.06639

Epoch 00007: val_loss did not improve from 0.06639

Epoch 00008: val_loss did not improve from 0.06639

Epoch 00009: val_loss did not improve from 0.06639

Epoch 00010: val_loss did not improve from 0.06639





Epoch 00001: val_loss improved from inf to 0.02220, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02220

Epoch 00003: val_loss did not improve from 0.02220

Epoch 00004: val_loss did not improve from 0.02220

Epoch 00005: val_loss did not improve from 0.02220

Epoch 00006: val_loss did not improve from 0.02220

Epoch 00007: val_loss did not improve from 0.02220

Epoch 00008: val_loss did not improve from 0.02220

Epoch 00009: val_loss did not improve from 0.02220

Epoch 00010: val_loss did not improve from 0.02220





Epoch 00001: val_loss improved from inf to 0.05410, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05410 to 0.04345, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.04345

Epoch 00004: val_loss did not improve from 0.04345

Epoch 00005: val_loss did not improve from 0.04345

Epoch 00006: val_loss did not improve from 0.04345

Epoch 00007: val_loss did not improve from 0.04345

Epoch 00008: val_loss did not improve from 0.04345

Epoch 00009: val_loss did not improve from 0.04345

Epoch 00010: val_loss did not improve from 0.04345





Epoch 00001: val_loss improved from inf to 0.03413, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03413

Epoch 00003: val_loss did not improve from 0.03413

Epoch 00004: val_loss did not improve from 0.03413

Epoch 00005: val_loss did not improve from 0.03413

Epoch 00006: val_loss did not improve from 0.03413

Epoch 00007: val_loss did not improve from 0.03413

Epoch 00008: val_loss did not improve from 0.03413

Epoch 00009: val_loss did not improve from 0.03413

Epoch 00010: val_loss did not improve from 0.03413

Epoch 00001: val_loss improved from inf to 0.06747, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06747

Epoch 00003: val_loss did not improve from 0.06747

Epoch 00004: val_loss did not improve from 0.06747

Epoch 00005: val_loss did not improve from 0.06747

Epoch 00006: val_loss improved from 0.06747 to 0.06450, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.04871, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04871

Epoch 00003: val_loss did not improve from 0.04871

Epoch 00004: val_loss did not improve from 0.04871

Epoch 00005: val_loss did not improve from 0.04871

Epoch 00006: val_loss did not improve from 0.04871

Epoch 00007: val_loss did not improve from 0.04871

Epoch 00008: val_loss did not improve from 0.04871

Epoch 00009: val_loss did not improve from 0.04871

Epoch 00010: val_loss did not improve from 0.04871





Epoch 00001: val_loss improved from inf to 0.11526, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11526

Epoch 00003: val_loss did not improve from 0.11526

Epoch 00004: val_loss did not improve from 0.11526

Epoch 00005: val_loss did not improve from 0.11526

Epoch 00006: val_loss did not improve from 0.11526

Epoch 00007: val_loss did not improve from 0.11526

Epoch 00008: val_loss did not improve from 0.11526

Epoch 00009: val_loss did not improve from 0.11526

Epoch 00010: val_loss did not improve from 0.11526

Epoch 00001: val_loss improved from inf to 0.12777, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12777

Epoch 00003: val_loss did not improve from 0.12777

Epoch 00004: val_loss did not improve from 0.12777

Epoch 00005: val_loss did not improve from 0.12777

Epoch 00006: val_loss did not improve from 0.12777

Epoch 00007: val_loss did not improve from 0.12777

Epoch 00008: val_loss did not improve



AlgoCrossValIter - 4
Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.54489, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.54489 to 0.45970, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.45970 to 0.43469, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.43469

Epoch 00005: val_loss d




Epoch 00001: val_loss improved from inf to 0.09571, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09571

Epoch 00003: val_loss did not improve from 0.09571

Epoch 00004: val_loss did not improve from 0.09571

Epoch 00005: val_loss did not improve from 0.09571

Epoch 00006: val_loss did not improve from 0.09571

Epoch 00007: val_loss did not improve from 0.09571

Epoch 00008: val_loss did not improve from 0.09571

Epoch 00009: val_loss did not improve from 0.09571

Epoch 00010: val_loss did not improve from 0.09571





Epoch 00001: val_loss improved from inf to 0.11707, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11707

Epoch 00003: val_loss improved from 0.11707 to 0.10422, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.10422 to 0.09756, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.09756

Epoch 00006: val_loss did not improve from 0.09756

Epoch 00007: val_loss did not improve from 0.09756

Epoch 00008: val_loss did not improve from 0.09756

Epoch 00009: val_loss did not improve from 0.09756

Epoch 00010: val_loss did not improve from 0.09756





Epoch 00001: val_loss improved from inf to 0.02989, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02989

Epoch 00003: val_loss did not improve from 0.02989

Epoch 00004: val_loss did not improve from 0.02989

Epoch 00005: val_loss did not improve from 0.02989

Epoch 00006: val_loss did not improve from 0.02989

Epoch 00007: val_loss did not improve from 0.02989

Epoch 00008: val_loss did not improve from 0.02989

Epoch 00009: val_loss did not improve from 0.02989

Epoch 00010: val_loss did not improve from 0.02989





Epoch 00001: val_loss improved from inf to 0.05394, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05394 to 0.04051, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.04051

Epoch 00004: val_loss did not improve from 0.04051

Epoch 00005: val_loss did not improve from 0.04051

Epoch 00006: val_loss improved from 0.04051 to 0.04032, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.04032

Epoch 00008: val_loss did not improve from 0.04032

Epoch 00009: val_loss did not improve from 0.04032

Epoch 00010: val_loss did not improve from 0.04032





Epoch 00001: val_loss improved from inf to 0.04716, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.04716 to 0.03916, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.03916

Epoch 00004: val_loss did not improve from 0.03916

Epoch 00005: val_loss did not improve from 0.03916

Epoch 00006: val_loss did not improve from 0.03916

Epoch 00007: val_loss did not improve from 0.03916

Epoch 00008: val_loss did not improve from 0.03916

Epoch 00009: val_loss did not improve from 0.03916

Epoch 00010: val_loss improved from 0.03916 to 0.03819, saving model to best-model-conll.hdfs

Epoch 00001: val_loss improved from inf to 0.06773, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.06773 to 0.06724, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.06724 to 0.06295, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.06295

Epoch 00005: val_loss




Epoch 00001: val_loss improved from inf to 0.09667, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09667

Epoch 00003: val_loss did not improve from 0.09667

Epoch 00004: val_loss did not improve from 0.09667

Epoch 00005: val_loss did not improve from 0.09667

Epoch 00006: val_loss did not improve from 0.09667

Epoch 00007: val_loss did not improve from 0.09667

Epoch 00008: val_loss did not improve from 0.09667

Epoch 00009: val_loss did not improve from 0.09667

Epoch 00010: val_loss did not improve from 0.09667

Epoch 00001: val_loss improved from inf to 0.11661, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11661

Epoch 00003: val_loss did not improve from 0.11661

Epoch 00004: val_loss did not improve from 0.11661

Epoch 00005: val_loss did not improve from 0.11661

Epoch 00006: val_loss did not improve from 0.11661

Epoch 00007: val_loss did not improve from 0.11661

Epoch 00008: val_loss did not improve



AlgoCrossValIter - 5
Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.51717, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.51717 to 0.48990, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.48990 to 0.45943, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.45943 to 0.45727, saving model to bes




Epoch 00001: val_loss improved from inf to 0.13624, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.13624

Epoch 00003: val_loss improved from 0.13624 to 0.12421, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.12421

Epoch 00005: val_loss did not improve from 0.12421

Epoch 00006: val_loss did not improve from 0.12421

Epoch 00007: val_loss did not improve from 0.12421

Epoch 00008: val_loss did not improve from 0.12421

Epoch 00009: val_loss did not improve from 0.12421

Epoch 00010: val_loss did not improve from 0.12421





Epoch 00001: val_loss improved from inf to 0.07071, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07071 to 0.06853, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06853

Epoch 00004: val_loss did not improve from 0.06853

Epoch 00005: val_loss did not improve from 0.06853

Epoch 00006: val_loss did not improve from 0.06853

Epoch 00007: val_loss did not improve from 0.06853

Epoch 00008: val_loss did not improve from 0.06853

Epoch 00009: val_loss did not improve from 0.06853

Epoch 00010: val_loss did not improve from 0.06853





Epoch 00001: val_loss improved from inf to 0.02896, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02896

Epoch 00003: val_loss did not improve from 0.02896

Epoch 00004: val_loss did not improve from 0.02896

Epoch 00005: val_loss did not improve from 0.02896

Epoch 00006: val_loss did not improve from 0.02896

Epoch 00007: val_loss did not improve from 0.02896

Epoch 00008: val_loss did not improve from 0.02896

Epoch 00009: val_loss did not improve from 0.02896

Epoch 00010: val_loss did not improve from 0.02896





Epoch 00001: val_loss improved from inf to 0.04092, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04092

Epoch 00003: val_loss did not improve from 0.04092

Epoch 00004: val_loss did not improve from 0.04092

Epoch 00005: val_loss did not improve from 0.04092

Epoch 00006: val_loss did not improve from 0.04092

Epoch 00007: val_loss did not improve from 0.04092

Epoch 00008: val_loss did not improve from 0.04092

Epoch 00009: val_loss did not improve from 0.04092

Epoch 00010: val_loss did not improve from 0.04092





Epoch 00001: val_loss improved from inf to 0.03561, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03561

Epoch 00003: val_loss did not improve from 0.03561

Epoch 00004: val_loss did not improve from 0.03561

Epoch 00005: val_loss did not improve from 0.03561

Epoch 00006: val_loss did not improve from 0.03561

Epoch 00007: val_loss did not improve from 0.03561

Epoch 00008: val_loss did not improve from 0.03561

Epoch 00009: val_loss did not improve from 0.03561

Epoch 00010: val_loss did not improve from 0.03561





Epoch 00001: val_loss improved from inf to 0.06383, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06383

Epoch 00003: val_loss did not improve from 0.06383

Epoch 00004: val_loss did not improve from 0.06383

Epoch 00005: val_loss did not improve from 0.06383

Epoch 00006: val_loss did not improve from 0.06383

Epoch 00007: val_loss did not improve from 0.06383

Epoch 00008: val_loss did not improve from 0.06383

Epoch 00009: val_loss did not improve from 0.06383

Epoch 00010: val_loss did not improve from 0.06383





Epoch 00001: val_loss improved from inf to 0.05020, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05020

Epoch 00003: val_loss did not improve from 0.05020

Epoch 00004: val_loss did not improve from 0.05020

Epoch 00005: val_loss did not improve from 0.05020

Epoch 00006: val_loss did not improve from 0.05020

Epoch 00007: val_loss did not improve from 0.05020

Epoch 00008: val_loss did not improve from 0.05020

Epoch 00009: val_loss did not improve from 0.05020

Epoch 00010: val_loss did not improve from 0.05020





Epoch 00001: val_loss improved from inf to 0.10165, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10165

Epoch 00003: val_loss did not improve from 0.10165

Epoch 00004: val_loss did not improve from 0.10165

Epoch 00005: val_loss did not improve from 0.10165

Epoch 00006: val_loss did not improve from 0.10165

Epoch 00007: val_loss did not improve from 0.10165

Epoch 00008: val_loss did not improve from 0.10165

Epoch 00009: val_loss did not improve from 0.10165

Epoch 00010: val_loss did not improve from 0.10165

Epoch 00001: val_loss improved from inf to 0.12397, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12397

Epoch 00003: val_loss did not improve from 0.12397

Epoch 00004: val_loss did not improve from 0.12397

Epoch 00005: val_loss did not improve from 0.12397

Epoch 00006: val_loss did not improve from 0.12397

Epoch 00007: val_loss did not improve from 0.12397

Epoch 00008: val_loss did not improve



AlgoCrossValIter - 6
Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.54845, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.54845 to 0.51248, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.51248

Epoch 00004: val_loss improved from 0.51248 to 0.48860, saving model to best-model-conll.hdfs

Epoch 00005: val_loss i




Epoch 00001: val_loss improved from inf to 0.14160, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.14160

Epoch 00003: val_loss improved from 0.14160 to 0.12970, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.12970

Epoch 00005: val_loss did not improve from 0.12970

Epoch 00006: val_loss did not improve from 0.12970

Epoch 00007: val_loss did not improve from 0.12970

Epoch 00008: val_loss did not improve from 0.12970

Epoch 00009: val_loss did not improve from 0.12970

Epoch 00010: val_loss did not improve from 0.12970





Epoch 00001: val_loss improved from inf to 0.08257, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08257

Epoch 00003: val_loss did not improve from 0.08257

Epoch 00004: val_loss did not improve from 0.08257

Epoch 00005: val_loss did not improve from 0.08257

Epoch 00006: val_loss did not improve from 0.08257

Epoch 00007: val_loss did not improve from 0.08257

Epoch 00008: val_loss did not improve from 0.08257

Epoch 00009: val_loss did not improve from 0.08257

Epoch 00010: val_loss did not improve from 0.08257





Epoch 00001: val_loss improved from inf to 0.03600, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03600

Epoch 00003: val_loss did not improve from 0.03600

Epoch 00004: val_loss did not improve from 0.03600

Epoch 00005: val_loss did not improve from 0.03600

Epoch 00006: val_loss did not improve from 0.03600

Epoch 00007: val_loss did not improve from 0.03600

Epoch 00008: val_loss did not improve from 0.03600

Epoch 00009: val_loss did not improve from 0.03600

Epoch 00010: val_loss did not improve from 0.03600





Epoch 00001: val_loss improved from inf to 0.04526, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04526

Epoch 00003: val_loss improved from 0.04526 to 0.04283, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.04283

Epoch 00005: val_loss did not improve from 0.04283

Epoch 00006: val_loss did not improve from 0.04283

Epoch 00007: val_loss did not improve from 0.04283

Epoch 00008: val_loss did not improve from 0.04283

Epoch 00009: val_loss did not improve from 0.04283

Epoch 00010: val_loss did not improve from 0.04283





Epoch 00001: val_loss improved from inf to 0.03881, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.03881 to 0.03437, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.03437

Epoch 00004: val_loss did not improve from 0.03437

Epoch 00005: val_loss did not improve from 0.03437

Epoch 00006: val_loss did not improve from 0.03437

Epoch 00007: val_loss did not improve from 0.03437

Epoch 00008: val_loss did not improve from 0.03437

Epoch 00009: val_loss did not improve from 0.03437

Epoch 00010: val_loss did not improve from 0.03437

Epoch 00001: val_loss improved from inf to 0.06033, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06033

Epoch 00003: val_loss did not improve from 0.06033

Epoch 00004: val_loss did not improve from 0.06033

Epoch 00005: val_loss did not improve from 0.06033

Epoch 00006: val_loss did not improve from 0.06033

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.04543, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04543

Epoch 00003: val_loss did not improve from 0.04543

Epoch 00004: val_loss did not improve from 0.04543

Epoch 00005: val_loss did not improve from 0.04543

Epoch 00006: val_loss did not improve from 0.04543

Epoch 00007: val_loss did not improve from 0.04543

Epoch 00008: val_loss did not improve from 0.04543

Epoch 00009: val_loss did not improve from 0.04543

Epoch 00010: val_loss did not improve from 0.04543





Epoch 00001: val_loss improved from inf to 0.10139, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10139

Epoch 00003: val_loss did not improve from 0.10139

Epoch 00004: val_loss did not improve from 0.10139

Epoch 00005: val_loss did not improve from 0.10139

Epoch 00006: val_loss did not improve from 0.10139

Epoch 00007: val_loss did not improve from 0.10139

Epoch 00008: val_loss did not improve from 0.10139

Epoch 00009: val_loss did not improve from 0.10139

Epoch 00010: val_loss did not improve from 0.10139

Epoch 00001: val_loss improved from inf to 0.11580, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11580

Epoch 00003: val_loss did not improve from 0.11580

Epoch 00004: val_loss did not improve from 0.11580

Epoch 00005: val_loss did not improve from 0.11580

Epoch 00006: val_loss did not improve from 0.11580

Epoch 00007: val_loss did not improve from 0.11580

Epoch 00008: val_loss did not improve



AlgoCrossValIter - 7
Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.63886, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.63886 to 0.45998, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.45998

Epoch 00004: val_loss did not improve from 0.45998

Epoch 00005: val_loss did not improve from 0.45998

Epoch 00006: v




Epoch 00001: val_loss improved from inf to 0.14196, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.14196 to 0.11448, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.11448

Epoch 00004: val_loss did not improve from 0.11448

Epoch 00005: val_loss did not improve from 0.11448

Epoch 00006: val_loss did not improve from 0.11448

Epoch 00007: val_loss did not improve from 0.11448

Epoch 00008: val_loss did not improve from 0.11448

Epoch 00009: val_loss did not improve from 0.11448

Epoch 00010: val_loss did not improve from 0.11448

Epoch 00001: val_loss improved from inf to 0.06720, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06720

Epoch 00003: val_loss did not improve from 0.06720

Epoch 00004: val_loss did not improve from 0.06720

Epoch 00005: val_loss did not improve from 0.06720

Epoch 00006: val_loss did not improve from 0.06720

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.02887, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02887

Epoch 00003: val_loss did not improve from 0.02887

Epoch 00004: val_loss did not improve from 0.02887

Epoch 00005: val_loss did not improve from 0.02887

Epoch 00006: val_loss did not improve from 0.02887

Epoch 00007: val_loss did not improve from 0.02887

Epoch 00008: val_loss did not improve from 0.02887

Epoch 00009: val_loss did not improve from 0.02887

Epoch 00010: val_loss did not improve from 0.02887





Epoch 00001: val_loss improved from inf to 0.05881, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05881 to 0.05002, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.05002 to 0.04213, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.04213

Epoch 00005: val_loss did not improve from 0.04213

Epoch 00006: val_loss did not improve from 0.04213

Epoch 00007: val_loss did not improve from 0.04213

Epoch 00008: val_loss did not improve from 0.04213

Epoch 00009: val_loss did not improve from 0.04213

Epoch 00010: val_loss did not improve from 0.04213





Epoch 00001: val_loss improved from inf to 0.04525, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04525

Epoch 00003: val_loss did not improve from 0.04525

Epoch 00004: val_loss did not improve from 0.04525

Epoch 00005: val_loss improved from 0.04525 to 0.04136, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.04136

Epoch 00007: val_loss did not improve from 0.04136

Epoch 00008: val_loss did not improve from 0.04136

Epoch 00009: val_loss did not improve from 0.04136

Epoch 00010: val_loss improved from 0.04136 to 0.03924, saving model to best-model-conll.hdfs





Epoch 00001: val_loss improved from inf to 0.05421, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05421

Epoch 00003: val_loss did not improve from 0.05421

Epoch 00004: val_loss did not improve from 0.05421

Epoch 00005: val_loss did not improve from 0.05421

Epoch 00006: val_loss did not improve from 0.05421

Epoch 00007: val_loss did not improve from 0.05421

Epoch 00008: val_loss did not improve from 0.05421

Epoch 00009: val_loss did not improve from 0.05421

Epoch 00010: val_loss did not improve from 0.05421





Epoch 00001: val_loss improved from inf to 0.05399, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05399

Epoch 00003: val_loss did not improve from 0.05399

Epoch 00004: val_loss did not improve from 0.05399

Epoch 00005: val_loss did not improve from 0.05399

Epoch 00006: val_loss did not improve from 0.05399

Epoch 00007: val_loss did not improve from 0.05399

Epoch 00008: val_loss did not improve from 0.05399

Epoch 00009: val_loss did not improve from 0.05399

Epoch 00010: val_loss did not improve from 0.05399





Epoch 00001: val_loss improved from inf to 0.09784, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09784

Epoch 00003: val_loss did not improve from 0.09784

Epoch 00004: val_loss did not improve from 0.09784

Epoch 00005: val_loss did not improve from 0.09784

Epoch 00006: val_loss did not improve from 0.09784

Epoch 00007: val_loss did not improve from 0.09784

Epoch 00008: val_loss did not improve from 0.09784

Epoch 00009: val_loss did not improve from 0.09784

Epoch 00010: val_loss did not improve from 0.09784

Epoch 00001: val_loss improved from inf to 0.11639, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11639

Epoch 00003: val_loss did not improve from 0.11639

Epoch 00004: val_loss did not improve from 0.11639

Epoch 00005: val_loss did not improve from 0.11639

Epoch 00006: val_loss did not improve from 0.11639

Epoch 00007: val_loss did not improve from 0.11639

Epoch 00008: val_loss did not improve



AlgoCrossValIter - 8
Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.54522, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.54522

Epoch 00003: val_loss did not improve from 0.54522

Epoch 00004: val_loss improved from 0.54522 to 0.45443, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.45443

Epoch 00006: v




Epoch 00001: val_loss improved from inf to 0.09262, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09262

Epoch 00003: val_loss did not improve from 0.09262

Epoch 00004: val_loss did not improve from 0.09262

Epoch 00005: val_loss did not improve from 0.09262

Epoch 00006: val_loss did not improve from 0.09262

Epoch 00007: val_loss did not improve from 0.09262

Epoch 00008: val_loss did not improve from 0.09262

Epoch 00009: val_loss did not improve from 0.09262

Epoch 00010: val_loss did not improve from 0.09262





Epoch 00001: val_loss improved from inf to 0.07886, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07886

Epoch 00003: val_loss did not improve from 0.07886

Epoch 00004: val_loss did not improve from 0.07886

Epoch 00005: val_loss did not improve from 0.07886

Epoch 00006: val_loss did not improve from 0.07886

Epoch 00007: val_loss did not improve from 0.07886

Epoch 00008: val_loss did not improve from 0.07886

Epoch 00009: val_loss did not improve from 0.07886

Epoch 00010: val_loss did not improve from 0.07886





Epoch 00001: val_loss improved from inf to 0.06674, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.06674 to 0.03613, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.03613

Epoch 00004: val_loss improved from 0.03613 to 0.03472, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.03472

Epoch 00006: val_loss did not improve from 0.03472

Epoch 00007: val_loss did not improve from 0.03472

Epoch 00008: val_loss did not improve from 0.03472

Epoch 00009: val_loss did not improve from 0.03472

Epoch 00010: val_loss did not improve from 0.03472





Epoch 00001: val_loss improved from inf to 0.05694, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05694 to 0.03690, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.03690

Epoch 00004: val_loss did not improve from 0.03690

Epoch 00005: val_loss did not improve from 0.03690

Epoch 00006: val_loss did not improve from 0.03690

Epoch 00007: val_loss did not improve from 0.03690

Epoch 00008: val_loss did not improve from 0.03690

Epoch 00009: val_loss did not improve from 0.03690

Epoch 00010: val_loss did not improve from 0.03690





Epoch 00001: val_loss improved from inf to 0.04647, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.04647 to 0.03868, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.03868 to 0.03702, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.03702 to 0.03563, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.03563

Epoch 00006: val_loss did not improve from 0.03563

Epoch 00007: val_loss did not improve from 0.03563

Epoch 00008: val_loss did not improve from 0.03563

Epoch 00009: val_loss did not improve from 0.03563

Epoch 00010: val_loss did not improve from 0.03563





Epoch 00001: val_loss improved from inf to 0.06088, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06088

Epoch 00003: val_loss did not improve from 0.06088

Epoch 00004: val_loss did not improve from 0.06088

Epoch 00005: val_loss did not improve from 0.06088

Epoch 00006: val_loss did not improve from 0.06088

Epoch 00007: val_loss did not improve from 0.06088

Epoch 00008: val_loss did not improve from 0.06088

Epoch 00009: val_loss did not improve from 0.06088

Epoch 00010: val_loss did not improve from 0.06088





Epoch 00001: val_loss improved from inf to 0.04426, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04426

Epoch 00003: val_loss did not improve from 0.04426

Epoch 00004: val_loss did not improve from 0.04426

Epoch 00005: val_loss did not improve from 0.04426

Epoch 00006: val_loss did not improve from 0.04426

Epoch 00007: val_loss did not improve from 0.04426

Epoch 00008: val_loss did not improve from 0.04426

Epoch 00009: val_loss did not improve from 0.04426

Epoch 00010: val_loss did not improve from 0.04426





Epoch 00001: val_loss improved from inf to 0.09508, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09508

Epoch 00003: val_loss did not improve from 0.09508

Epoch 00004: val_loss did not improve from 0.09508

Epoch 00005: val_loss did not improve from 0.09508

Epoch 00006: val_loss did not improve from 0.09508

Epoch 00007: val_loss did not improve from 0.09508

Epoch 00008: val_loss did not improve from 0.09508

Epoch 00009: val_loss did not improve from 0.09508

Epoch 00010: val_loss did not improve from 0.09508

Epoch 00001: val_loss improved from inf to 0.11871, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11871

Epoch 00003: val_loss did not improve from 0.11871

Epoch 00004: val_loss did not improve from 0.11871

Epoch 00005: val_loss did not improve from 0.11871

Epoch 00006: val_loss did not improve from 0.11871

Epoch 00007: val_loss did not improve from 0.11871

Epoch 00008: val_loss did not improve



AlgoCrossValIter - 9
Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.64600, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.64600

Epoch 00003: val_loss improved from 0.64600 to 0.53437, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.53437 to 0.52661, saving model to best-model-conll.hdfs

Epoch 00005: val_loss d




Epoch 00001: val_loss improved from inf to 0.10278, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10278

Epoch 00003: val_loss did not improve from 0.10278

Epoch 00004: val_loss did not improve from 0.10278

Epoch 00005: val_loss did not improve from 0.10278

Epoch 00006: val_loss did not improve from 0.10278

Epoch 00007: val_loss did not improve from 0.10278

Epoch 00008: val_loss did not improve from 0.10278

Epoch 00009: val_loss did not improve from 0.10278

Epoch 00010: val_loss did not improve from 0.10278





Epoch 00001: val_loss improved from inf to 0.08392, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08392

Epoch 00003: val_loss did not improve from 0.08392

Epoch 00004: val_loss did not improve from 0.08392

Epoch 00005: val_loss did not improve from 0.08392

Epoch 00006: val_loss did not improve from 0.08392

Epoch 00007: val_loss did not improve from 0.08392

Epoch 00008: val_loss did not improve from 0.08392

Epoch 00009: val_loss did not improve from 0.08392

Epoch 00010: val_loss did not improve from 0.08392





Epoch 00001: val_loss improved from inf to 0.03399, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.03399 to 0.03173, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.03173

Epoch 00004: val_loss did not improve from 0.03173

Epoch 00005: val_loss did not improve from 0.03173

Epoch 00006: val_loss did not improve from 0.03173

Epoch 00007: val_loss did not improve from 0.03173

Epoch 00008: val_loss did not improve from 0.03173

Epoch 00009: val_loss did not improve from 0.03173

Epoch 00010: val_loss did not improve from 0.03173





Epoch 00001: val_loss improved from inf to 0.04373, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04373

Epoch 00003: val_loss did not improve from 0.04373

Epoch 00004: val_loss did not improve from 0.04373

Epoch 00005: val_loss did not improve from 0.04373

Epoch 00006: val_loss did not improve from 0.04373

Epoch 00007: val_loss did not improve from 0.04373

Epoch 00008: val_loss did not improve from 0.04373

Epoch 00009: val_loss did not improve from 0.04373

Epoch 00010: val_loss did not improve from 0.04373

Epoch 00001: val_loss improved from inf to 0.05035, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05035 to 0.03571, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.03571

Epoch 00004: val_loss did not improve from 0.03571

Epoch 00005: val_loss did not improve from 0.03571

Epoch 00006: val_loss did not improve from 0.03571

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.07649, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07649 to 0.06545, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06545

Epoch 00004: val_loss did not improve from 0.06545

Epoch 00005: val_loss improved from 0.06545 to 0.06429, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.06429

Epoch 00007: val_loss did not improve from 0.06429

Epoch 00008: val_loss did not improve from 0.06429

Epoch 00009: val_loss did not improve from 0.06429

Epoch 00010: val_loss did not improve from 0.06429

Epoch 00001: val_loss improved from inf to 0.05205, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05205

Epoch 00003: val_loss did not improve from 0.05205

Epoch 00004: val_loss did not improve from 0.05205

Epoch 00005: val_loss did not improve from 0.05205

Epoch 00006: val_loss did not improve from 0.05205

Epo




Epoch 00001: val_loss improved from inf to 0.09898, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09898

Epoch 00003: val_loss did not improve from 0.09898

Epoch 00004: val_loss did not improve from 0.09898

Epoch 00005: val_loss did not improve from 0.09898

Epoch 00006: val_loss did not improve from 0.09898

Epoch 00007: val_loss did not improve from 0.09898

Epoch 00008: val_loss did not improve from 0.09898

Epoch 00009: val_loss did not improve from 0.09898

Epoch 00010: val_loss did not improve from 0.09898

Epoch 00001: val_loss improved from inf to 0.11959, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11959

Epoch 00003: val_loss did not improve from 0.11959

Epoch 00004: val_loss did not improve from 0.11959

Epoch 00005: val_loss did not improve from 0.11959

Epoch 00006: val_loss did not improve from 0.11959

Epoch 00007: val_loss did not improve from 0.11959

Epoch 00008: val_loss did not improve



AlgoCrossValIter - 10
Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.58274, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.58274 to 0.52556, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.52556

Epoch 00004: val_loss did not improve from 0.52556

Epoch 00005: val_loss improved from 0.52556 to 0.52432, saving m




Epoch 00001: val_loss improved from inf to 0.16262, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.16262

Epoch 00003: val_loss improved from 0.16262 to 0.14001, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.14001

Epoch 00005: val_loss did not improve from 0.14001

Epoch 00006: val_loss did not improve from 0.14001

Epoch 00007: val_loss did not improve from 0.14001

Epoch 00008: val_loss did not improve from 0.14001

Epoch 00009: val_loss did not improve from 0.14001

Epoch 00010: val_loss did not improve from 0.14001





Epoch 00001: val_loss improved from inf to 0.07016, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07016

Epoch 00003: val_loss did not improve from 0.07016

Epoch 00004: val_loss did not improve from 0.07016

Epoch 00005: val_loss did not improve from 0.07016

Epoch 00006: val_loss did not improve from 0.07016

Epoch 00007: val_loss did not improve from 0.07016

Epoch 00008: val_loss did not improve from 0.07016

Epoch 00009: val_loss did not improve from 0.07016

Epoch 00010: val_loss did not improve from 0.07016





Epoch 00001: val_loss improved from inf to 0.03755, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.03755 to 0.03597, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.03597

Epoch 00004: val_loss did not improve from 0.03597

Epoch 00005: val_loss did not improve from 0.03597

Epoch 00006: val_loss did not improve from 0.03597

Epoch 00007: val_loss did not improve from 0.03597

Epoch 00008: val_loss did not improve from 0.03597

Epoch 00009: val_loss did not improve from 0.03597

Epoch 00010: val_loss did not improve from 0.03597





Epoch 00001: val_loss improved from inf to 0.04222, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04222

Epoch 00003: val_loss did not improve from 0.04222

Epoch 00004: val_loss did not improve from 0.04222

Epoch 00005: val_loss did not improve from 0.04222

Epoch 00006: val_loss did not improve from 0.04222

Epoch 00007: val_loss did not improve from 0.04222

Epoch 00008: val_loss did not improve from 0.04222

Epoch 00009: val_loss did not improve from 0.04222

Epoch 00010: val_loss did not improve from 0.04222





Epoch 00001: val_loss improved from inf to 0.04060, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04060

Epoch 00003: val_loss did not improve from 0.04060

Epoch 00004: val_loss improved from 0.04060 to 0.03984, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.03984

Epoch 00006: val_loss did not improve from 0.03984

Epoch 00007: val_loss did not improve from 0.03984

Epoch 00008: val_loss did not improve from 0.03984

Epoch 00009: val_loss did not improve from 0.03984

Epoch 00010: val_loss did not improve from 0.03984





Epoch 00001: val_loss improved from inf to 0.06043, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06043

Epoch 00003: val_loss did not improve from 0.06043

Epoch 00004: val_loss did not improve from 0.06043

Epoch 00005: val_loss did not improve from 0.06043

Epoch 00006: val_loss did not improve from 0.06043

Epoch 00007: val_loss did not improve from 0.06043

Epoch 00008: val_loss did not improve from 0.06043

Epoch 00009: val_loss did not improve from 0.06043

Epoch 00010: val_loss did not improve from 0.06043

Epoch 00001: val_loss improved from inf to 0.05001, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05001

Epoch 00003: val_loss did not improve from 0.05001

Epoch 00004: val_loss did not improve from 0.05001

Epoch 00005: val_loss did not improve from 0.05001

Epoch 00006: val_loss did not improve from 0.05001

Epoch 00007: val_loss did not improve from 0.05001

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.10380, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10380

Epoch 00003: val_loss did not improve from 0.10380

Epoch 00004: val_loss did not improve from 0.10380

Epoch 00005: val_loss did not improve from 0.10380

Epoch 00006: val_loss did not improve from 0.10380

Epoch 00007: val_loss did not improve from 0.10380

Epoch 00008: val_loss did not improve from 0.10380

Epoch 00009: val_loss did not improve from 0.10380

Epoch 00010: val_loss did not improve from 0.10380

Epoch 00001: val_loss improved from inf to 0.12761, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.12761 to 0.12732, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.12732

Epoch 00004: val_loss did not improve from 0.12732

Epoch 00005: val_loss did not improve from 0.12732

Epoch 00006: val_loss did not improve from 0.12732

Epoch 00007: val_loss did not improve from 0.1



In [72]:
normalization_strategy

In [73]:
resultCrossVal.to_csv("results.csv")
resultCrossVal

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
P_val,88.188,84.92,87.376,86.733,84.036,85.633,85.049,85.174,82.936,84.236
P_train,88.851,87.733,88.107,89.069,87.499,87.093,86.906,89.862,87.655,89.861
P_ewo,78.954,77.999,79.563,79.548,77.792,78.583,77.31,80.685,76.642,79.751
R_val,76.335,81.077,78.168,77.161,79.919,81.078,78.987,76.32,82.745,75.933
R_train,82.602,85.522,80.375,81.262,85.777,86.255,79.313,82.775,85.961,83.619
R_ewo,64.628,66.296,62.223,63.332,66.852,67.594,62.501,63.797,67.592,64.722
F1-val,80.529,81.656,80.317,80.463,81.36,82.562,79.91,79.623,82.298,78.35
F1-train,85.205,86.435,83.001,84.361,86.427,86.625,81.17,85.836,86.715,86.37
F1-ewo,70.496,71.369,68.647,69.937,71.565,72.578,67.419,70.646,71.735,70.881


In [74]:
resultCrossVal.mean(axis=1).to_frame()

Unnamed: 0,0
P_val,85.4281
P_train,88.2636
P_ewo,78.6827
R_val,78.7723
R_train,83.3461
R_ewo,64.9537
F1-val,80.7068
F1-train,85.2145
F1-ewo,70.5273


In [75]:
resultCrossVal.std(axis=1).to_frame()

Unnamed: 0,0
P_val,1.605275
P_train,1.08396
P_ewo,1.250211
R_val,2.374601
R_train,2.501873
R_ewo,2.025866
F1-val,1.287044
F1-train,1.853176
F1-ewo,1.532423


In [76]:
# trainByTagResult.to_csv("results/train-by-tag.csv")
# trainByTagResult

In [77]:
# trainByTagResult.mean(axis=1).to_frame()

In [78]:
# trainByTagResult.std(axis=1).to_frame()

In [79]:
# testByTagResult.to_csv("results/test-by-tag.csv")
# testByTagResult

In [80]:
# testByTagResult.mean(axis=1).to_frame()

In [81]:
# testByTagResult.std(axis=1).to_frame()

In [82]:
# ewoByTagResult.to_csv("results/ewo-by-tag.csv")

In [83]:
# ewoByTagResult = pd.read_csv("results/ewo-by-tag.csv", index_col=0)
# ewoByTagResult

In [84]:
# ewoByTagResult.mean(axis=1).to_frame()

In [85]:
# ewoByTagResult.std(axis=1).to_frame()

In [86]:
# columns = en_fingerprints.columns

# print("Pred", "Real", "Freq", "Word", sep="\t")
# for c in columns:
#     prediction = model.predict(en_fingerprints[c].values.reshape((1, 210)))
#     pred_tag = int2tag[np.argmax(prediction)]
#     real_tag = en_corpus[en_corpus.word == c].iloc[0]['ne-tag']
    
#     if pred_tag != real_tag:
#         print(pred_tag, real_tag, en_fingerprints[c].max(), c, sep="\t")