In [1]:
# import
import keras
import sys
import numpy as np
import string
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense
from keras.utils import np_utils, plot_model
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn import model_selection
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, precision_recall_fscore_support
import h5py as h5py

Using TensorFlow backend.


In [2]:
# if we are doeing binary classification. That means say if a token is a named entity or not
BINARY = False

# number of epochs for training
epochs = 10

# the english side of the corpus
en_corpus_file = "corpus-en.txt"

# the ewondo side of the corpus
ewo_corpus_file = "corpus-ewo.txt"

# name of the file to same the model 
best_model_file = "best-model-conll.hdfs"

# The maximal number of phrases to use
max_nb_of_phrases =  -1

# the maximal number of duplicates for each word in the corpus
duplication = 1

# wether we are using only the vocabulary, ro redundancy
is_only_vocab = True

# if word should be shuffle or not
shuffle = is_only_vocab

# normalization strategy
# log, max, mean_log, mean_log_inv, mean_inv
# max_inv, log_inv, tf
# None: for no normalization
normalization_strategy = None

# if we are using the Zennaki et al. signature
is_zennaki = False

# the number of neurons in the first layer
h1_size = 640

# number of neurons in the second layer
h2_size = 160

In [3]:
def getTag(aString):
    """
        convert a string to a tag
    """
    tag = "O"
    if BINARY:
        if aString != "O":
            return "NE"
    else:
        tag = aString
    return tag
     

In [4]:
def load_corpus(file, max_nb_of_phrases):
    """
    Load a corpus stored in a file
    Input:
        - file: the name of the file of the corpus
        - max_nb_of_phases: maximal number of phrases to load
    
    Return:
        - a DataFrame representing the corpus
        - the number of phrases in the corpus
    """
    nb_of_phrases = 0
    dataset = {"word": [], "ne-tag": []}
    with open(file) as f:
        prev_line = None
        for cpt, line in enumerate(f):
            if cpt == 0:
                continue
            if nb_of_phrases == max_nb_of_phrases:
                break;

            l = line.strip()
            if len(l) == 0 and len(prev_line) != 0:
                nb_of_phrases += 1
                dataset["word"].append(line)
                dataset["ne-tag"].append(None)
            else:
                l = l.split("\t")
                if l[0] not in string.punctuation:
                    dataset["word"].append(l[0])
                    dataset["ne-tag"].append(ne_type(l[1]))
            prev_line = line.strip()
        
    return pd.DataFrame(dataset), nb_of_phrases+1

In [5]:
def log_normalization(fingerprints):
    fingerprints[fingerprints > 0] = 1 + np.log(fingerprints[fingerprints > 0])
    return fingerprints

In [6]:
def max_normalization(fingerprints):
    maxis = fingerprints.max(axis = 1)
    fingerprints[fingerprints > 0] = fingerprints[fingerprints > 0].apply(lambda row: 0.5 + 0.5 * row / maxis[row.index])
    return fingerprints

In [7]:
def mean_log_normalization(fingerprints):
    means = fingerprints.mean(axis=1)
    fingerprints[fingerprints > 0] = fingerprints[fingerprints > 0].apply(lambda row: (1 + np.log(row)) / 1 + np.log(means[row.index]))
    return fingerprints

In [8]:
def normalize(fingerprints):
    if normalization_strategy == "log":
        return log_normalization(fingerprints)
    elif normalization_strategy == "max":
        return max_normalization(fingerprints)
    elif normalization_strategy == "mean_log":
        return mean_log_normalization(fingerprints)
    elif normalization_strategy == "log_inv":
        fp = log_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "max_inv":
        fp = max_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "mean_log_inv":
        fp = mean_log_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "tf":
        fp = fingerprints
        fp[fp > 0] = 1 / fp[fp > 0]
        return fp
    else:
        return fingerprints

In [9]:
def corpus_fingerprint(aDataframe, nb_of_biphrases, total_nb_of_words):
    """
    Create the distributionnal signature of each word in the corpus
    Input:
        -aDataFrame: the corpus DataFrame
        -nb_of_biphrases: number of phrases in the corpus
    Return:
        a DataFrame: corpus fingerprint, the columns are the words in the corpus
    """
    print("Normalization strategy:", normalization_strategy)
    tf = {}
    fingerprints = {}
    current_bi_phrase_index = 0
    nb_word_in_corpus = aDataframe[aDataframe.word != "\n"].word.size
    for index, row in aDataframe.iterrows():
        if current_bi_phrase_index > nb_of_biphrases:
            break
            
        word = row['word']
        
        if word != "\n":
            if word not in fingerprints:
                fingerprints[word] = np.zeros(nb_of_biphrases, dtype=np.float32)
                tf[word] = 0
            tf[word] += 1
            fingerprints[word][current_bi_phrase_index] = 1
        else:
            current_bi_phrase_index += 1
            words_in_current_phrase = []
        
    if not is_zennaki:
        for word in fingerprints:
            for i in range(nb_of_biphrases):
                if fingerprints[word][i] != 0:
                    fingerprints[word][i] = total_nb_of_words / tf[word]
    ret = pd.DataFrame(fingerprints)
        
    return normalize(ret)

In [10]:
def corpus2trainingdata(aDataframe, fingerprintsDataFrame):
    """
    Convert corpus to training data => numpy array
    
    Input:
        -aDataFrame: Corpus dataframe
        -fingerprintsDataFrame: distributionnal signature of words in the corpus
    Return:
        (X, y): X is the array of words (signature) in the corpus and y is the corresponding labels (NE tags)
    """
    X = np.zeros((aDataframe.shape[0], fingerprintsDataFrame.shape[0]), dtype=np.int8)
    y = np.zeros(aDataframe.shape[0], dtype=np.int8)
    i = 0
    for row in aDataframe.iterrows():
        X[i] = fingerprintsDataFrame[row[1]['word']].values
        y[i] = tag2int[getTag(row[1]['ne-tag'])]
        i += 1
    return X, y

In [11]:
# A utility function to convert NE tags
def ne_type(aType):
    aType = aType.lower()
    if 'per' in aType:
        t =  'NE' if BINARY else 'PER' 
    elif 'loc' in aType:
        t =  'NE' if BINARY else 'LOC'
    elif 'org' in aType:
        t =  'NE' if BINARY else 'ORG'
    elif 'hour' in aType:
        t =  'NE' if BINARY else 'MISC'
    elif aType != 'o' and len(aType) > 0 :
        t =  'NE' if BINARY else 'MISC'
    else:
        t = 'O'
    return t

In [12]:
def P_R_F1(y_pred, y_true, neg_class):
    same = y_pred[y_true==y_pred]
    tp = same[same != neg_class].size
    nb_of_pos_exple = y_true[y_true != neg_class].size
    nb_of_pos_pred = y_pred[y_pred != neg_class].size
    p = r = f1 = 0
    try:
        p = np.round(tp*100/nb_of_pos_pred, 2)
    except ZeroDivisionError:
        print("number of correct positive predictions is 0")
        
    try:
        r = np.round(tp*100/nb_of_pos_exple, 2)
    except ZeroDivisionError:
        print("number of position exple is 0")
        
    try:
        f1 = np.round(2*r*p/(r+p), 2)
    except ZeroDivisionError:
        print("Recall and precision are 0")

    return p, r, f1

In [13]:
def shuffle_data(X, y):
    indices = [i for i in  range(X.shape[0])]
    np.random.shuffle(indices)
    return X[indices], y[indices]

In [14]:
def create_model(input_dim, output_dim):
    model = Sequential()
    model.add(Dense(h1_size, input_dim=input_dim, activation='sigmoid', name="hidden1"))
    model.add(Dense(h2_size, activation='sigmoid', name="hidden2"))
    if BINARY:
        model.add(Dense(1, activation='sigmoid', name="outputlayer"))
        model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['binary_accuracy'])
    else:
        model.add(Dense(output_dim, activation='softmax', name="outputlayer"))
        model.compile(loss='categorical_crossentropy', optimizer="rmsprop", metrics=['accuracy'])
    model.summary()
    return model

In [15]:
def train_model(model, X_train, y_train, X_val, y_val, epochs=epochs):
    # stop learning if the error is the same between two consecutive epochs
    early_stop = EarlyStopping(patience=20, verbose=2)
    
    # saving best model
    best_model_cp = ModelCheckpoint(best_model_file, save_best_only=True, verbose=1)
    
    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, verbose=0, shuffle=shuffle, callbacks=[best_model_cp, early_stop])
    
    #loading and returning the best model
    return keras.models.load_model(best_model_file)

In [16]:
def predict(model, X, y, binary=BINARY):
    if BINARY:
        y_pred = np.round(model.predict(X))
        y_true = y
    else:
        predictions = model.predict(X)
        y_pred = np.array([np.argmax(p) for p in predictions])
        y_true = np.array([np.argmax(t) for t in y ])
    return y_true, y_pred

In [17]:
def model_performance(y_true, y_pred):
    return P_R_F1(y_pred, y_true, tag2int['O']) #precision, recall, f1-score

In [18]:
def model_performace_by_tag(y_true, y_pred, tag):
    p, r, f1 = 0, 0, 0
    
    eq = y_pred[y_pred==y_true]
    correctly_pred = eq[eq==tag].size
    try:
        p = np.round(100 * correctly_pred / y_pred[y_pred==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        r = np.round(100 * correctly_pred / y_true[y_true==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        f1 = np.round(2 * r * p / (r + p), 2)
    except ZeroDivisionError:
        pass
    
    return p, r, f1

In [19]:
def algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, epochs=epochs, model=None):
    """
    Train a model on (X, y) and validate on (X_val, y_val) then project on (X_ewo)
    """
    test_precision, train_precision, ewo_precision = [], [], []
    test_recall, train_recall, ewo_recall = [], [], []
    test_fscore, train_fscore, ewo_fscore = [], [], []
    
    test_result_by_tag = {}
    train_result_by_tag = {}
    ewo_result_by_tag = {}
    for t in tagSet:
        f1_key = "F1-"+t
        p_key = "P-"+t
        r_key = "R-"+t
        train_result_by_tag[f1_key], train_result_by_tag[p_key], train_result_by_tag[r_key] = [], [], []
        test_result_by_tag[f1_key], test_result_by_tag[p_key], test_result_by_tag[r_key] = [], [], []
        ewo_result_by_tag[f1_key], ewo_result_by_tag[p_key], ewo_result_by_tag[r_key] = [], [], []

    m = train_model(model, X_train, y_train, X_val, y_val, epochs=epochs)
        
    y_true, y_pred = predict(m, X_train, y_train)
    p_train, r_train, f1_train = model_performance(y_true, y_pred)
        
    y_true_val, y_pred_val = predict(m, X_val, y_val)
    p_val, r_val, f1_val = model_performance(y_true_val, y_pred_val)
        
    y_true_ewo, y_pred_ewo = predict(m, X_ewo, y_ewo) 
    p_ewo, r_ewo, f1_ewo = model_performance(y_true_ewo, y_pred_ewo)
        
    for t in range(len(int2tag)):
        f1_key = "F1-" + int2tag[t]
        p_key = "P-" + int2tag[t]
        r_key = "R-" + int2tag[t]
            
        p, r, f1 = model_performace_by_tag(y_true, y_pred, t)
        train_result_by_tag[p_key].append(p)
        train_result_by_tag[r_key].append(r)
        train_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_val, y_pred_val, t)
        test_result_by_tag[p_key].append(p)
        test_result_by_tag[r_key].append(r)
        test_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_ewo, y_pred_ewo, t)
        ewo_result_by_tag[p_key].append(p)
        ewo_result_by_tag[r_key].append(r)
        ewo_result_by_tag[f1_key].append(f1)
                
    test_precision.append(p_val)
    train_precision.append(p_train)
    ewo_precision.append(p_ewo)
        
    test_recall.append(r_val)
    train_recall.append(r_train)
    ewo_recall.append(r_ewo)
        
    test_fscore.append(f1_val)
    train_fscore.append(f1_train)
    ewo_fscore.append(f1_ewo)
    return pd.DataFrame({
        'P_val': test_precision, 
        'P_train': train_precision, 
        'P_ewo': ewo_precision, 'R_val': test_recall, 'R_train': train_recall, 
        'R_ewo': ewo_recall, 'F1-val': test_fscore, 'F1-train': train_fscore, 'F1-ewo': ewo_fscore}), pd.DataFrame(train_result_by_tag), pd.DataFrame(test_result_by_tag), pd.DataFrame(ewo_result_by_tag)

In [20]:
def algoCrossVal(X, y, X_ewo, y_ewo, k = 10, repeat=1): 
    """
    Traing a model with k-fold cross validation
    We train the model `repeat` times to check it's stability
    """
    block_size = int(X.shape[0] / k)   
    output = None
    model = None
    train_by_tags, test_by_tags, ewo_by_tags = None, None, None
    for it in range(repeat):
        print("AlgoCrossValIter -", it+1)
        model = create_model(X.shape[1], len(tagSet))
        results = None
        train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = None, None, None
        for i in range(k):
            X_val, y_val = X[i*block_size:i*block_size+block_size], y[i*block_size:i*block_size+block_size]
            X_train = np.concatenate((X[0:i*block_size], X[i*block_size+block_size:]))
            y_train = np.concatenate((y[0:i*block_size], y[i*block_size+block_size:]))

            X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
            X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])

            result, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)
            if results is None:
                results = result.copy()
                train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = train_by_tag.copy(), test_by_tag.copy(), ewo_by_tag.copy()
            else:
                results = pd.concat([results, result], ignore_index=True)
                train_by_tagsTmp = pd.concat([train_by_tagsTmp, train_by_tag], ignore_index=True)
                test_by_tagsTmp = pd.concat([test_by_tagsTmp, test_by_tag], ignore_index=True)
                ewo_by_tagsTmp = pd.concat([ewo_by_tagsTmp, ewo_by_tag], ignore_index=True)
        
        if output is None:
            output = results.mean(axis=0).to_frame()
            train_by_tags = train_by_tagsTmp.mean(axis=0).to_frame()
            test_by_tags = test_by_tagsTmp.mean(axis=0).to_frame()
            ewo_by_tags = ewo_by_tagsTmp.mean(axis=0).to_frame()
        else:
            output = pd.concat([output, results.mean(axis=0).to_frame()], axis=1)
            train_by_tags = pd.concat([train_by_tags, train_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            test_by_tags = pd.concat([test_by_tags, test_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            ewo_by_tags = pd.concat([ewo_by_tags, ewo_by_tagsTmp.mean(axis=0).to_frame()], axis=1)

    return output, train_by_tags, test_by_tags, ewo_by_tags, model

In [21]:
en_corpus, en_nb_of_phrases = load_corpus(en_corpus_file, max_nb_of_phrases)

In [22]:
ewo_corpus, ewo_nb_of_phrases = load_corpus(ewo_corpus_file, max_nb_of_phrases)

In [23]:
en_nb_word = en_corpus[en_corpus.word != "\n"].word.size
ewo_nb_word = ewo_corpus[ewo_corpus.word != "\n"].word.size
corpus_nb_word = en_nb_word + ewo_nb_word
print("Nb word in ewondo", ewo_nb_word)
print("Nb word in english", en_nb_word)
print("Nb word in corpus", corpus_nb_word)

Nb word in ewondo 3570
Nb word in english 4170
Nb word in corpus 7740


In [24]:
en_corpus.head()
en_corpus.loc[en_corpus['ne-tag'] == 'ORG']

Unnamed: 0,word,ne-tag
1335,Sadducees,ORG


In [25]:
tagSet = en_corpus["ne-tag"].dropna().unique()
if BINARY:
    tagSet = ['NE', 'O']
tag2int = {j: i for i, j in enumerate(tagSet)}
int2tag = {i: j for i, j in enumerate(tagSet)}
print(tag2int)

{'O': 0, 'MISC': 1, 'PER': 2, 'LOC': 3, 'ORG': 4}


In [26]:
en_nb_of_phrases

210

In [27]:
en_corpus.describe()

Unnamed: 0,word,ne-tag
count,4379,4170
unique,904,5
top,the,O
freq,313,3779


In [28]:
en_corpus.head(10)

Unnamed: 0,word,ne-tag
0,The,O
1,Promise,O
2,of,O
3,the,O
4,Holy,MISC
5,Spirit,MISC
6,\n,
7,In,O
8,the,O
9,first,O


In [29]:
print("---------Whole corpus stats-----------")
corpus = en_corpus[en_corpus['word']!='\n']
total=0
for tag in tagSet:
    percent = corpus[corpus['ne-tag']==tag].shape[0] *100 / corpus.shape[0]
    total += percent
    print("{0} % = {1} %".format(tag, percent))
print("Total:", total, "%\n")

print("---------Vocabulary stats-------------")
total = 0
percents = {}
for tag in tagSet:
    percent = corpus[corpus['ne-tag']==tag].word.unique().size # * 100 / corpus.word.unique().size
    total += percent
    percents[tag] = percent
for t in percents:
    print("{0} % = {1} %".format(t, percents[t] * 100 /total))

---------Whole corpus stats-----------
O % = 90.62350119904077 %
MISC % = 2.5179856115107913 %
PER % = 5.875299760191846 %
LOC % = 0.9592326139088729 %
ORG % = 0.023980815347721823 %
Total: 100.00000000000001 %

---------Vocabulary stats-------------
O % = 87.32394366197182 %
MISC % = 1.8418201516793067 %
PER % = 8.775731310942579 %
LOC % = 1.9501625135427951 %
ORG % = 0.10834236186348863 %


In [30]:
en_corpus[en_corpus.word == "\n"].shape

(209, 2)

In [31]:
print("Nb of bi-phrases", en_nb_of_phrases)

Nb of bi-phrases 210


In [32]:
en_fingerprints = corpus_fingerprint(en_corpus, en_nb_of_phrases, corpus_nb_word)

Normalization strategy: None


In [33]:
en_fingerprints.head(10)

Unnamed: 0,The,Promise,of,the,Holy,Spirit,In,first,book,O,...,considered,dream,She,save,fulfill,Immanuel,us),woke,sleep,knew
0,703.636353,7740.0,41.170212,24.728434,516.0,552.857117,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,24.728434,0.0,0.0,1290.0,3870.0,3870.0,3870.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,24.728434,516.0,552.857117,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,41.170212,24.728434,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,41.170212,24.728434,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,24.728434,516.0,552.857117,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,703.636353,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,24.728434,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,24.728434,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,41.170212,24.728434,516.0,552.857117,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
(4170 / en_nb_word)

1.0

In [35]:
en_corpus.shape

(4379, 2)

In [36]:
en_fingerprints['you'].values.shape

(210,)

In [37]:
en_corpus[en_corpus.word != "\n"].shape

(4170, 2)

In [38]:
if is_only_vocab:
    text = list(en_corpus[en_corpus.word != "\n"].word.unique())
else:
    text = list(en_corpus[en_corpus.word != "\n"].word)
en_vocab = pd.DataFrame({'text': text})
en_vocab.describe()

Unnamed: 0,text
count,903
unique,903
top,desolate
freq,1


In [39]:
if is_only_vocab:
    X = np.zeros((en_vocab.shape[0] * duplication, en_nb_of_phrases))
    target = np.zeros((en_vocab.shape[0] * duplication))
    p=0
    for i, row in en_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X[p] = en_fingerprints[c.split(" ")[0]]
            target[p] = tag2int[getTag(en_corpus[en_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X, target = shuffle_data(X, target)
    print(X.shape, en_fingerprints.shape, target.shape)

(903, 210) (210, 903) (903,)


In [40]:
en_vocab[-20:]

Unnamed: 0,text
883,Eliud
884,Eleazar
885,Matthan
886,husband
887,fourteen
888,unwilling
889,shame
890,resolved
891,divorce
892,quietly


In [41]:
if not is_only_vocab:
    X, target = corpus2trainingdata(en_corpus[en_corpus.word != "\n"], en_fingerprints)

In [42]:
if shuffle:
    X, target = shuffle_data(X, target)

In [43]:
y = target.copy()
y[0:100]
if not BINARY:
    y = np_utils.to_categorical(y, len(tagSet))
y.shape

(903, 5)

In [44]:
from sklearn.decomposition import PCA

def visualize(X, y):
    pca = PCA(n_components=2)
    X_embeded = pca.fit_transform(X)
    plt.figure(figsize=(5, 5))
    plt.scatter(X_embeded[:, 0], X_embeded[:, 1], c=y)
    plt.legend()
    plt.show()

In [45]:
# visualize(X, target)

In [46]:
X_train, X_val, y_train, y_val = model_selection.train_test_split(X, y, test_size=0.33)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])
print("X_train.shape =", X_train.shape)
print("y_train.shape =", y_train.shape)
print("X_val.shape =", X_val.shape)
print("y_val.shape =", y_val.shape)

tTarget = np.array([np.argmax(yy) for yy in y_train])
vTarget = np.array([np.argmax(yy) for yy in y_val])

for tag in tagSet:
    print("{0} % in training data = {1} %".format(tag, np.round(tTarget[tTarget==tag2int[tag]].size * 100 / tTarget.shape[0], 2)))
    print("{0} % in validation data = {1} %".format(tag, np.round(vTarget[vTarget==tag2int[tag]].size * 100 / vTarget.shape[0], 2)))

X_train.shape = (605, 210)
y_train.shape = (605, 5)
X_val.shape = (298, 210)
y_val.shape = (298, 5)
O % in training data = 88.6 %
O % in validation data = 87.58 %
MISC % in training data = 0.99 %
MISC % in validation data = 1.68 %
PER % in training data = 7.93 %
PER % in validation data = 9.73 %
LOC % in training data = 2.31 %
LOC % in validation data = 1.01 %
ORG % in training data = 0.17 %
ORG % in validation data = 0.0 %


In [47]:
ewo_corpus.loc[ewo_corpus['ne-tag'] == 'PER']

Unnamed: 0,word,ne-tag
6,Teofil,PER
15,Yesus,PER
86,Yohannes,PER
104,Yesus,PER
230,Yesus,PER
...,...,...
3676,Maria,PER
3697,Yesus,PER
3740,Emmanuel,PER
3750,Yosef,PER


In [48]:
ewo_nb_of_phrases

210

In [49]:
print("---------Whole corpus stats-----------")
corpus = ewo_corpus[ewo_corpus['word']!='\n']
total=0
for tag in tagSet:
    percent = corpus[corpus['ne-tag']==tag].shape[0] *100 / corpus.shape[0]
    total += percent
    print("{0} % = {1} %".format(tag, percent))
print("Total:", total, "%\n")

print("---------Vocabulary stats-------------")
total = 0
percents = {}
for tag in tagSet:
    percent = corpus[corpus['ne-tag']==tag].word.unique().size # * 100 / corpus.word.unique().size
    total += percent
    percents[tag] = percent
for t in percents:
    print("{0} % = {1} %".format(t, percents[t] * 100 /total))

---------Whole corpus stats-----------
O % = 89.07563025210084 %
MISC % = 2.689075630252101 %
PER % = 7.086834733893557 %
LOC % = 1.0924369747899159 %
ORG % = 0.056022408963585436 %
Total: 100.0 %

---------Vocabulary stats-------------
O % = 88.64292589027912 %
MISC % = 1.1549566891241578 %
PER % = 8.180943214629451 %
LOC % = 1.8286814244465832 %
ORG % = 0.19249278152069296 %


In [50]:
ewo_corpus.describe()

Unnamed: 0,word,ne-tag
count,3779,3570
unique,1024,5
top,\n,O
freq,209,3180


In [51]:
ewo_corpus.head()

Unnamed: 0,word,ne-tag
0,Mfufub,MISC
1,Nsisim,MISC
2,ayi,O
3,sò,O
4,\n,


In [52]:
ewo_fingerprints = corpus_fingerprint(ewo_corpus, en_nb_of_phrases, corpus_nb_word)

Normalization strategy: None


In [53]:
if is_only_vocab:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word.unique())
else:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word)
ewo_vocab = pd.DataFrame({"text":text})

In [54]:
if is_only_vocab:
    X_ewo = np.zeros((ewo_vocab.shape[0] * duplication, en_nb_of_phrases))
    ewo_target = np.zeros((ewo_vocab.shape[0] * duplication))
    p=0
    for i, row in ewo_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X_ewo[p] = ewo_fingerprints[c.split(" ")[0]]
            ewo_target[p] = tag2int[getTag(ewo_corpus[ewo_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [55]:
ewo_vocab[-10:]

Unnamed: 0,text
1013,sik
1014,Ntud
1015,bëyole
1016,Emmanuel
1017,Avëbë
1018,angavëbë
1019,oyò
1020,angabende
1021,anganòṅ
1022,angayole


In [56]:
if not is_only_vocab:
    X_ewo, ewo_target = corpus2trainingdata(ewo_corpus[ewo_corpus.word != "\n"], ewo_fingerprints)

In [57]:
if shuffle:
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [58]:
y_ewo = ewo_target.copy()
print(y_ewo.shape, len(ewo_vocab))

(1023,) 1023


In [59]:
X_ewo.shape

(1023, 210)

In [60]:
y_ewo = ewo_target.copy()
y_ewo[:20]
if not BINARY:
    y_ewo = np_utils.to_categorical(y_ewo)

In [61]:
X_ewo = X_ewo.reshape((X_ewo.shape[0], en_nb_of_phrases))

In [62]:
# model = create_model(X.shape[1], len(tagSet))
# resultEval, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)

In [63]:
# resultEval

In [64]:
# train_by_tag

In [65]:
# test_by_tag

In [66]:
# ewo_by_tag

In [67]:
# resultEval.mean()

In [68]:
# resultEval.std()

In [69]:
resultCrossVal, trainByTagResult, testByTagResult, ewoByTagResult, model = algoCrossVal(X, y, X_ewo, y_ewo, repeat=10)

AlgoCrossValIter - 1
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.57848, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.57848 to 0.56340, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.56340 to 0.52314, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.52314 to 0.46953, saving model to bes




Epoch 00001: val_loss improved from inf to 0.14643, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.14643

Epoch 00003: val_loss did not improve from 0.14643

Epoch 00004: val_loss did not improve from 0.14643

Epoch 00005: val_loss did not improve from 0.14643

Epoch 00006: val_loss did not improve from 0.14643

Epoch 00007: val_loss did not improve from 0.14643

Epoch 00008: val_loss did not improve from 0.14643

Epoch 00009: val_loss did not improve from 0.14643

Epoch 00010: val_loss did not improve from 0.14643





Epoch 00001: val_loss improved from inf to 0.02593, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02593

Epoch 00003: val_loss did not improve from 0.02593

Epoch 00004: val_loss did not improve from 0.02593

Epoch 00005: val_loss did not improve from 0.02593

Epoch 00006: val_loss did not improve from 0.02593

Epoch 00007: val_loss did not improve from 0.02593

Epoch 00008: val_loss did not improve from 0.02593

Epoch 00009: val_loss did not improve from 0.02593

Epoch 00010: val_loss improved from 0.02593 to 0.02321, saving model to best-model-conll.hdfs





Epoch 00001: val_loss improved from inf to 0.09177, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09177

Epoch 00003: val_loss did not improve from 0.09177

Epoch 00004: val_loss did not improve from 0.09177

Epoch 00005: val_loss did not improve from 0.09177

Epoch 00006: val_loss did not improve from 0.09177

Epoch 00007: val_loss did not improve from 0.09177

Epoch 00008: val_loss did not improve from 0.09177

Epoch 00009: val_loss did not improve from 0.09177

Epoch 00010: val_loss did not improve from 0.09177





Epoch 00001: val_loss improved from inf to 0.13358, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.13358

Epoch 00003: val_loss did not improve from 0.13358

Epoch 00004: val_loss did not improve from 0.13358

Epoch 00005: val_loss did not improve from 0.13358

Epoch 00006: val_loss did not improve from 0.13358

Epoch 00007: val_loss did not improve from 0.13358

Epoch 00008: val_loss did not improve from 0.13358

Epoch 00009: val_loss did not improve from 0.13358

Epoch 00010: val_loss did not improve from 0.13358





Epoch 00001: val_loss improved from inf to 0.04649, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04649

Epoch 00003: val_loss did not improve from 0.04649

Epoch 00004: val_loss did not improve from 0.04649

Epoch 00005: val_loss did not improve from 0.04649

Epoch 00006: val_loss did not improve from 0.04649

Epoch 00007: val_loss did not improve from 0.04649

Epoch 00008: val_loss did not improve from 0.04649

Epoch 00009: val_loss did not improve from 0.04649

Epoch 00010: val_loss did not improve from 0.04649





Epoch 00001: val_loss improved from inf to 0.05779, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05779

Epoch 00003: val_loss did not improve from 0.05779

Epoch 00004: val_loss did not improve from 0.05779

Epoch 00005: val_loss did not improve from 0.05779

Epoch 00006: val_loss did not improve from 0.05779

Epoch 00007: val_loss did not improve from 0.05779

Epoch 00008: val_loss did not improve from 0.05779

Epoch 00009: val_loss did not improve from 0.05779

Epoch 00010: val_loss did not improve from 0.05779





Epoch 00001: val_loss improved from inf to 0.07027, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07027

Epoch 00003: val_loss did not improve from 0.07027

Epoch 00004: val_loss did not improve from 0.07027

Epoch 00005: val_loss did not improve from 0.07027

Epoch 00006: val_loss did not improve from 0.07027

Epoch 00007: val_loss did not improve from 0.07027

Epoch 00008: val_loss did not improve from 0.07027

Epoch 00009: val_loss did not improve from 0.07027

Epoch 00010: val_loss did not improve from 0.07027

Epoch 00001: val_loss improved from inf to 0.09615, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09615 to 0.08745, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08745

Epoch 00004: val_loss did not improve from 0.08745

Epoch 00005: val_loss did not improve from 0.08745

Epoch 00006: val_loss did not improve from 0.08745

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.03149, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.03149 to 0.03110, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.03110 to 0.02966, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.02966 to 0.02907, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.02907

Epoch 00006: val_loss did not improve from 0.02907

Epoch 00007: val_loss improved from 0.02907 to 0.02813, saving model to best-model-conll.hdfs

Epoch 00008: val_loss did not improve from 0.02813

Epoch 00009: val_loss did not improve from 0.02813

Epoch 00010: val_loss did not improve from 0.02813
AlgoCrossValIter - 2
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
____________________________




Epoch 00001: val_loss improved from inf to 0.19657, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.19657

Epoch 00003: val_loss did not improve from 0.19657

Epoch 00004: val_loss improved from 0.19657 to 0.17424, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.17424

Epoch 00006: val_loss did not improve from 0.17424

Epoch 00007: val_loss did not improve from 0.17424

Epoch 00008: val_loss did not improve from 0.17424

Epoch 00009: val_loss did not improve from 0.17424

Epoch 00010: val_loss did not improve from 0.17424





Epoch 00001: val_loss improved from inf to 0.01651, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.01651

Epoch 00003: val_loss did not improve from 0.01651

Epoch 00004: val_loss did not improve from 0.01651

Epoch 00005: val_loss did not improve from 0.01651

Epoch 00006: val_loss did not improve from 0.01651

Epoch 00007: val_loss did not improve from 0.01651

Epoch 00008: val_loss did not improve from 0.01651

Epoch 00009: val_loss did not improve from 0.01651

Epoch 00010: val_loss did not improve from 0.01651

Epoch 00001: val_loss improved from inf to 0.09413, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09413

Epoch 00003: val_loss did not improve from 0.09413

Epoch 00004: val_loss did not improve from 0.09413

Epoch 00005: val_loss did not improve from 0.09413

Epoch 00006: val_loss did not improve from 0.09413

Epoch 00007: val_loss did not improve from 0.09413

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.13963, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.13963

Epoch 00003: val_loss did not improve from 0.13963

Epoch 00004: val_loss did not improve from 0.13963

Epoch 00005: val_loss did not improve from 0.13963

Epoch 00006: val_loss did not improve from 0.13963

Epoch 00007: val_loss did not improve from 0.13963

Epoch 00008: val_loss did not improve from 0.13963

Epoch 00009: val_loss did not improve from 0.13963

Epoch 00010: val_loss did not improve from 0.13963





Epoch 00001: val_loss improved from inf to 0.04772, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04772

Epoch 00003: val_loss did not improve from 0.04772

Epoch 00004: val_loss did not improve from 0.04772

Epoch 00005: val_loss did not improve from 0.04772

Epoch 00006: val_loss did not improve from 0.04772

Epoch 00007: val_loss did not improve from 0.04772

Epoch 00008: val_loss did not improve from 0.04772

Epoch 00009: val_loss did not improve from 0.04772

Epoch 00010: val_loss did not improve from 0.04772





Epoch 00001: val_loss improved from inf to 0.05961, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05961

Epoch 00003: val_loss did not improve from 0.05961

Epoch 00004: val_loss did not improve from 0.05961

Epoch 00005: val_loss did not improve from 0.05961

Epoch 00006: val_loss did not improve from 0.05961

Epoch 00007: val_loss did not improve from 0.05961

Epoch 00008: val_loss did not improve from 0.05961

Epoch 00009: val_loss did not improve from 0.05961

Epoch 00010: val_loss did not improve from 0.05961





Epoch 00001: val_loss improved from inf to 0.10707, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10707 to 0.08148, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08148

Epoch 00004: val_loss did not improve from 0.08148

Epoch 00005: val_loss did not improve from 0.08148

Epoch 00006: val_loss did not improve from 0.08148

Epoch 00007: val_loss did not improve from 0.08148

Epoch 00008: val_loss did not improve from 0.08148

Epoch 00009: val_loss did not improve from 0.08148

Epoch 00010: val_loss did not improve from 0.08148

Epoch 00001: val_loss improved from inf to 0.10500, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10500 to 0.08443, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08443

Epoch 00004: val_loss did not improve from 0.08443

Epoch 00005: val_loss did not improve from 0.08443

Epoch 00006: val_loss did not improve from 0.08443

Epo




Epoch 00001: val_loss improved from inf to 0.02899, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02899

Epoch 00003: val_loss did not improve from 0.02899

Epoch 00004: val_loss did not improve from 0.02899

Epoch 00005: val_loss did not improve from 0.02899

Epoch 00006: val_loss did not improve from 0.02899

Epoch 00007: val_loss did not improve from 0.02899

Epoch 00008: val_loss did not improve from 0.02899

Epoch 00009: val_loss did not improve from 0.02899

Epoch 00010: val_loss did not improve from 0.02899
AlgoCrossValIter - 3
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
ou




Epoch 00001: val_loss improved from inf to 0.15638, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.15638 to 0.14839, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.14839

Epoch 00004: val_loss did not improve from 0.14839

Epoch 00005: val_loss did not improve from 0.14839

Epoch 00006: val_loss did not improve from 0.14839

Epoch 00007: val_loss did not improve from 0.14839

Epoch 00008: val_loss did not improve from 0.14839

Epoch 00009: val_loss did not improve from 0.14839

Epoch 00010: val_loss did not improve from 0.14839





Epoch 00001: val_loss improved from inf to 0.03000, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03000

Epoch 00003: val_loss did not improve from 0.03000

Epoch 00004: val_loss did not improve from 0.03000

Epoch 00005: val_loss did not improve from 0.03000

Epoch 00006: val_loss did not improve from 0.03000

Epoch 00007: val_loss improved from 0.03000 to 0.02062, saving model to best-model-conll.hdfs

Epoch 00008: val_loss did not improve from 0.02062

Epoch 00009: val_loss did not improve from 0.02062

Epoch 00010: val_loss did not improve from 0.02062

Epoch 00001: val_loss improved from inf to 0.08462, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08462

Epoch 00003: val_loss did not improve from 0.08462

Epoch 00004: val_loss did not improve from 0.08462

Epoch 00005: val_loss did not improve from 0.08462

Epoch 00006: val_loss did not improve from 0.08462

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.15160, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.15160 to 0.14800, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.14800

Epoch 00004: val_loss did not improve from 0.14800

Epoch 00005: val_loss did not improve from 0.14800

Epoch 00006: val_loss did not improve from 0.14800

Epoch 00007: val_loss did not improve from 0.14800

Epoch 00008: val_loss did not improve from 0.14800

Epoch 00009: val_loss did not improve from 0.14800

Epoch 00010: val_loss did not improve from 0.14800





Epoch 00001: val_loss improved from inf to 0.04610, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.04610

Epoch 00003: val_loss did not improve from 0.04610

Epoch 00004: val_loss did not improve from 0.04610

Epoch 00005: val_loss did not improve from 0.04610

Epoch 00006: val_loss did not improve from 0.04610

Epoch 00007: val_loss did not improve from 0.04610

Epoch 00008: val_loss did not improve from 0.04610

Epoch 00009: val_loss did not improve from 0.04610

Epoch 00010: val_loss did not improve from 0.04610





Epoch 00001: val_loss improved from inf to 0.05629, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05629

Epoch 00003: val_loss did not improve from 0.05629

Epoch 00004: val_loss did not improve from 0.05629

Epoch 00005: val_loss did not improve from 0.05629

Epoch 00006: val_loss did not improve from 0.05629

Epoch 00007: val_loss did not improve from 0.05629

Epoch 00008: val_loss did not improve from 0.05629

Epoch 00009: val_loss did not improve from 0.05629

Epoch 00010: val_loss did not improve from 0.05629





Epoch 00001: val_loss improved from inf to 0.07224, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07224

Epoch 00003: val_loss did not improve from 0.07224

Epoch 00004: val_loss did not improve from 0.07224

Epoch 00005: val_loss did not improve from 0.07224

Epoch 00006: val_loss did not improve from 0.07224

Epoch 00007: val_loss did not improve from 0.07224

Epoch 00008: val_loss did not improve from 0.07224

Epoch 00009: val_loss did not improve from 0.07224

Epoch 00010: val_loss did not improve from 0.07224

Epoch 00001: val_loss improved from inf to 0.07744, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07744

Epoch 00003: val_loss did not improve from 0.07744

Epoch 00004: val_loss did not improve from 0.07744

Epoch 00005: val_loss did not improve from 0.07744

Epoch 00006: val_loss did not improve from 0.07744

Epoch 00007: val_loss did not improve from 0.07744

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.03478, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.03478 to 0.02966, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.02966

Epoch 00004: val_loss did not improve from 0.02966

Epoch 00005: val_loss did not improve from 0.02966

Epoch 00006: val_loss did not improve from 0.02966

Epoch 00007: val_loss did not improve from 0.02966

Epoch 00008: val_loss did not improve from 0.02966

Epoch 00009: val_loss did not improve from 0.02966

Epoch 00010: val_loss did not improve from 0.02966
AlgoCrossValIter - 4
Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________




Epoch 00001: val_loss improved from inf to 0.13108, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.13108

Epoch 00003: val_loss did not improve from 0.13108

Epoch 00004: val_loss did not improve from 0.13108

Epoch 00005: val_loss did not improve from 0.13108

Epoch 00006: val_loss did not improve from 0.13108

Epoch 00007: val_loss did not improve from 0.13108

Epoch 00008: val_loss did not improve from 0.13108

Epoch 00009: val_loss did not improve from 0.13108

Epoch 00010: val_loss did not improve from 0.13108





Epoch 00001: val_loss improved from inf to 0.03020, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.03020 to 0.02289, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.02289

Epoch 00004: val_loss did not improve from 0.02289

Epoch 00005: val_loss did not improve from 0.02289

Epoch 00006: val_loss did not improve from 0.02289

Epoch 00007: val_loss did not improve from 0.02289

Epoch 00008: val_loss did not improve from 0.02289

Epoch 00009: val_loss did not improve from 0.02289

Epoch 00010: val_loss did not improve from 0.02289





Epoch 00001: val_loss improved from inf to 0.09933, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09933

Epoch 00003: val_loss did not improve from 0.09933

Epoch 00004: val_loss did not improve from 0.09933

Epoch 00005: val_loss did not improve from 0.09933

Epoch 00006: val_loss did not improve from 0.09933

Epoch 00007: val_loss did not improve from 0.09933

Epoch 00008: val_loss did not improve from 0.09933

Epoch 00009: val_loss did not improve from 0.09933

Epoch 00010: val_loss did not improve from 0.09933





Epoch 00001: val_loss improved from inf to 0.16090, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.16090 to 0.15555, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.15555

Epoch 00004: val_loss did not improve from 0.15555

Epoch 00005: val_loss did not improve from 0.15555

Epoch 00006: val_loss did not improve from 0.15555

Epoch 00007: val_loss did not improve from 0.15555

Epoch 00008: val_loss did not improve from 0.15555

Epoch 00009: val_loss did not improve from 0.15555

Epoch 00010: val_loss did not improve from 0.15555





Epoch 00001: val_loss improved from inf to 0.05003, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05003

Epoch 00003: val_loss did not improve from 0.05003

Epoch 00004: val_loss did not improve from 0.05003

Epoch 00005: val_loss did not improve from 0.05003

Epoch 00006: val_loss did not improve from 0.05003

Epoch 00007: val_loss did not improve from 0.05003

Epoch 00008: val_loss did not improve from 0.05003

Epoch 00009: val_loss did not improve from 0.05003

Epoch 00010: val_loss did not improve from 0.05003





Epoch 00001: val_loss improved from inf to 0.06466, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06466

Epoch 00003: val_loss improved from 0.06466 to 0.06393, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.06393

Epoch 00005: val_loss did not improve from 0.06393

Epoch 00006: val_loss did not improve from 0.06393

Epoch 00007: val_loss did not improve from 0.06393

Epoch 00008: val_loss did not improve from 0.06393

Epoch 00009: val_loss did not improve from 0.06393

Epoch 00010: val_loss did not improve from 0.06393





Epoch 00001: val_loss improved from inf to 0.06968, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06968

Epoch 00003: val_loss did not improve from 0.06968

Epoch 00004: val_loss did not improve from 0.06968

Epoch 00005: val_loss did not improve from 0.06968

Epoch 00006: val_loss did not improve from 0.06968

Epoch 00007: val_loss did not improve from 0.06968

Epoch 00008: val_loss did not improve from 0.06968

Epoch 00009: val_loss did not improve from 0.06968

Epoch 00010: val_loss did not improve from 0.06968

Epoch 00001: val_loss improved from inf to 0.08869, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08869

Epoch 00003: val_loss did not improve from 0.08869

Epoch 00004: val_loss did not improve from 0.08869

Epoch 00005: val_loss did not improve from 0.08869

Epoch 00006: val_loss did not improve from 0.08869

Epoch 00007: val_loss did not improve from 0.08869

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.03477, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.03477 to 0.03378, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.03378 to 0.03176, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.03176

Epoch 00005: val_loss did not improve from 0.03176

Epoch 00006: val_loss improved from 0.03176 to 0.03053, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.03053

Epoch 00008: val_loss did not improve from 0.03053

Epoch 00009: val_loss did not improve from 0.03053

Epoch 00010: val_loss did not improve from 0.03053
AlgoCrossValIter - 5
Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidde




Epoch 00001: val_loss improved from inf to 0.15291, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.15291

Epoch 00003: val_loss did not improve from 0.15291

Epoch 00004: val_loss did not improve from 0.15291

Epoch 00005: val_loss did not improve from 0.15291

Epoch 00006: val_loss did not improve from 0.15291

Epoch 00007: val_loss did not improve from 0.15291

Epoch 00008: val_loss did not improve from 0.15291

Epoch 00009: val_loss did not improve from 0.15291

Epoch 00010: val_loss did not improve from 0.15291





Epoch 00001: val_loss improved from inf to 0.01984, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.01984

Epoch 00003: val_loss did not improve from 0.01984

Epoch 00004: val_loss did not improve from 0.01984

Epoch 00005: val_loss did not improve from 0.01984

Epoch 00006: val_loss did not improve from 0.01984

Epoch 00007: val_loss did not improve from 0.01984

Epoch 00008: val_loss did not improve from 0.01984

Epoch 00009: val_loss did not improve from 0.01984

Epoch 00010: val_loss did not improve from 0.01984





Epoch 00001: val_loss improved from inf to 0.11739, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.11739 to 0.10309, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.10309

Epoch 00004: val_loss did not improve from 0.10309

Epoch 00005: val_loss did not improve from 0.10309

Epoch 00006: val_loss did not improve from 0.10309

Epoch 00007: val_loss did not improve from 0.10309

Epoch 00008: val_loss did not improve from 0.10309

Epoch 00009: val_loss did not improve from 0.10309

Epoch 00010: val_loss did not improve from 0.10309





Epoch 00001: val_loss improved from inf to 0.12529, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12529

Epoch 00003: val_loss did not improve from 0.12529

Epoch 00004: val_loss did not improve from 0.12529

Epoch 00005: val_loss did not improve from 0.12529

Epoch 00006: val_loss did not improve from 0.12529

Epoch 00007: val_loss did not improve from 0.12529

Epoch 00008: val_loss did not improve from 0.12529

Epoch 00009: val_loss did not improve from 0.12529

Epoch 00010: val_loss did not improve from 0.12529





Epoch 00001: val_loss improved from inf to 0.05441, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05441

Epoch 00003: val_loss did not improve from 0.05441

Epoch 00004: val_loss did not improve from 0.05441

Epoch 00005: val_loss did not improve from 0.05441

Epoch 00006: val_loss did not improve from 0.05441

Epoch 00007: val_loss did not improve from 0.05441

Epoch 00008: val_loss did not improve from 0.05441

Epoch 00009: val_loss did not improve from 0.05441

Epoch 00010: val_loss did not improve from 0.05441





Epoch 00001: val_loss improved from inf to 0.06062, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06062

Epoch 00003: val_loss did not improve from 0.06062

Epoch 00004: val_loss did not improve from 0.06062

Epoch 00005: val_loss did not improve from 0.06062

Epoch 00006: val_loss did not improve from 0.06062

Epoch 00007: val_loss did not improve from 0.06062

Epoch 00008: val_loss did not improve from 0.06062

Epoch 00009: val_loss did not improve from 0.06062

Epoch 00010: val_loss did not improve from 0.06062





Epoch 00001: val_loss improved from inf to 0.10367, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10367 to 0.08164, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.08164

Epoch 00004: val_loss did not improve from 0.08164

Epoch 00005: val_loss did not improve from 0.08164

Epoch 00006: val_loss did not improve from 0.08164

Epoch 00007: val_loss did not improve from 0.08164

Epoch 00008: val_loss did not improve from 0.08164

Epoch 00009: val_loss did not improve from 0.08164

Epoch 00010: val_loss did not improve from 0.08164

Epoch 00001: val_loss improved from inf to 0.09034, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09034 to 0.08851, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.08851 to 0.08512, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.08512

Epoch 00005: val_loss did not improve from 0.08512

Epoch 00006:




Epoch 00001: val_loss improved from inf to 0.03213, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03213

Epoch 00003: val_loss did not improve from 0.03213

Epoch 00004: val_loss improved from 0.03213 to 0.03131, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.03131

Epoch 00006: val_loss did not improve from 0.03131

Epoch 00007: val_loss did not improve from 0.03131

Epoch 00008: val_loss did not improve from 0.03131

Epoch 00009: val_loss did not improve from 0.03131

Epoch 00010: val_loss did not improve from 0.03131




AlgoCrossValIter - 6
Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.60332, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.60332 to 0.57645, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.57645 to 0.50567, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.50567

Epoch 00005: val_loss i




Epoch 00001: val_loss improved from inf to 0.15363, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.15363 to 0.15351, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.15351

Epoch 00004: val_loss did not improve from 0.15351

Epoch 00005: val_loss did not improve from 0.15351

Epoch 00006: val_loss did not improve from 0.15351

Epoch 00007: val_loss did not improve from 0.15351

Epoch 00008: val_loss did not improve from 0.15351

Epoch 00009: val_loss did not improve from 0.15351

Epoch 00010: val_loss did not improve from 0.15351





Epoch 00001: val_loss improved from inf to 0.02899, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02899

Epoch 00003: val_loss did not improve from 0.02899

Epoch 00004: val_loss did not improve from 0.02899

Epoch 00005: val_loss did not improve from 0.02899

Epoch 00006: val_loss improved from 0.02899 to 0.02896, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.02896

Epoch 00008: val_loss improved from 0.02896 to 0.02513, saving model to best-model-conll.hdfs

Epoch 00009: val_loss did not improve from 0.02513

Epoch 00010: val_loss improved from 0.02513 to 0.02409, saving model to best-model-conll.hdfs

Epoch 00001: val_loss improved from inf to 0.08205, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08205

Epoch 00003: val_loss did not improve from 0.08205

Epoch 00004: val_loss did not improve from 0.08205

Epoch 00005: val_loss did not improve from 0.08205

Epoch 00006:




Epoch 00001: val_loss improved from inf to 0.13415, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.13415

Epoch 00003: val_loss did not improve from 0.13415

Epoch 00004: val_loss did not improve from 0.13415

Epoch 00005: val_loss did not improve from 0.13415

Epoch 00006: val_loss did not improve from 0.13415

Epoch 00007: val_loss did not improve from 0.13415

Epoch 00008: val_loss did not improve from 0.13415

Epoch 00009: val_loss did not improve from 0.13415

Epoch 00010: val_loss did not improve from 0.13415





Epoch 00001: val_loss improved from inf to 0.07146, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07146 to 0.07005, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07005

Epoch 00004: val_loss did not improve from 0.07005

Epoch 00005: val_loss did not improve from 0.07005

Epoch 00006: val_loss did not improve from 0.07005

Epoch 00007: val_loss did not improve from 0.07005

Epoch 00008: val_loss did not improve from 0.07005

Epoch 00009: val_loss did not improve from 0.07005

Epoch 00010: val_loss did not improve from 0.07005





Epoch 00001: val_loss improved from inf to 0.07678, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07678 to 0.06004, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06004

Epoch 00004: val_loss did not improve from 0.06004

Epoch 00005: val_loss did not improve from 0.06004

Epoch 00006: val_loss did not improve from 0.06004

Epoch 00007: val_loss did not improve from 0.06004

Epoch 00008: val_loss did not improve from 0.06004

Epoch 00009: val_loss did not improve from 0.06004

Epoch 00010: val_loss did not improve from 0.06004





Epoch 00001: val_loss improved from inf to 0.07292, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07292

Epoch 00003: val_loss did not improve from 0.07292

Epoch 00004: val_loss did not improve from 0.07292

Epoch 00005: val_loss did not improve from 0.07292

Epoch 00006: val_loss did not improve from 0.07292

Epoch 00007: val_loss did not improve from 0.07292

Epoch 00008: val_loss did not improve from 0.07292

Epoch 00009: val_loss did not improve from 0.07292

Epoch 00010: val_loss did not improve from 0.07292

Epoch 00001: val_loss improved from inf to 0.07199, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07199

Epoch 00003: val_loss did not improve from 0.07199

Epoch 00004: val_loss did not improve from 0.07199

Epoch 00005: val_loss did not improve from 0.07199

Epoch 00006: val_loss did not improve from 0.07199

Epoch 00007: val_loss did not improve from 0.07199

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.03099, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.03099 to 0.03047, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.03047

Epoch 00004: val_loss did not improve from 0.03047

Epoch 00005: val_loss did not improve from 0.03047

Epoch 00006: val_loss improved from 0.03047 to 0.02981, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.02981

Epoch 00008: val_loss did not improve from 0.02981

Epoch 00009: val_loss did not improve from 0.02981

Epoch 00010: val_loss did not improve from 0.02981
AlgoCrossValIter - 7
Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)        




Epoch 00001: val_loss improved from inf to 0.12827, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12827

Epoch 00003: val_loss did not improve from 0.12827

Epoch 00004: val_loss did not improve from 0.12827

Epoch 00005: val_loss did not improve from 0.12827

Epoch 00006: val_loss did not improve from 0.12827

Epoch 00007: val_loss did not improve from 0.12827

Epoch 00008: val_loss did not improve from 0.12827

Epoch 00009: val_loss did not improve from 0.12827

Epoch 00010: val_loss did not improve from 0.12827





Epoch 00001: val_loss improved from inf to 0.03327, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.03327 to 0.02964, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.02964 to 0.02409, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.02409

Epoch 00005: val_loss improved from 0.02409 to 0.02113, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.02113

Epoch 00007: val_loss did not improve from 0.02113

Epoch 00008: val_loss did not improve from 0.02113

Epoch 00009: val_loss did not improve from 0.02113

Epoch 00010: val_loss did not improve from 0.02113





Epoch 00001: val_loss improved from inf to 0.10597, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.10597 to 0.09893, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.09893 to 0.09882, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.09882

Epoch 00005: val_loss did not improve from 0.09882

Epoch 00006: val_loss did not improve from 0.09882

Epoch 00007: val_loss did not improve from 0.09882

Epoch 00008: val_loss did not improve from 0.09882

Epoch 00009: val_loss did not improve from 0.09882

Epoch 00010: val_loss did not improve from 0.09882





Epoch 00001: val_loss improved from inf to 0.15421, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.15421

Epoch 00003: val_loss did not improve from 0.15421

Epoch 00004: val_loss did not improve from 0.15421

Epoch 00005: val_loss did not improve from 0.15421

Epoch 00006: val_loss did not improve from 0.15421

Epoch 00007: val_loss did not improve from 0.15421

Epoch 00008: val_loss did not improve from 0.15421

Epoch 00009: val_loss did not improve from 0.15421

Epoch 00010: val_loss did not improve from 0.15421





Epoch 00001: val_loss improved from inf to 0.03905, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03905

Epoch 00003: val_loss did not improve from 0.03905

Epoch 00004: val_loss did not improve from 0.03905

Epoch 00005: val_loss did not improve from 0.03905

Epoch 00006: val_loss did not improve from 0.03905

Epoch 00007: val_loss did not improve from 0.03905

Epoch 00008: val_loss did not improve from 0.03905

Epoch 00009: val_loss did not improve from 0.03905

Epoch 00010: val_loss did not improve from 0.03905





Epoch 00001: val_loss improved from inf to 0.08117, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.08117 to 0.06688, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06688

Epoch 00004: val_loss did not improve from 0.06688

Epoch 00005: val_loss improved from 0.06688 to 0.06443, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.06443

Epoch 00007: val_loss did not improve from 0.06443

Epoch 00008: val_loss did not improve from 0.06443

Epoch 00009: val_loss did not improve from 0.06443

Epoch 00010: val_loss did not improve from 0.06443

Epoch 00001: val_loss improved from inf to 0.07958, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07958 to 0.07662, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07662

Epoch 00004: val_loss did not improve from 0.07662

Epoch 00005: val_loss did not improve from 0.07662

Epoch 00006:




Epoch 00001: val_loss improved from inf to 0.02895, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02895

Epoch 00003: val_loss did not improve from 0.02895

Epoch 00004: val_loss did not improve from 0.02895

Epoch 00005: val_loss did not improve from 0.02895

Epoch 00006: val_loss did not improve from 0.02895

Epoch 00007: val_loss did not improve from 0.02895

Epoch 00008: val_loss did not improve from 0.02895

Epoch 00009: val_loss did not improve from 0.02895

Epoch 00010: val_loss did not improve from 0.02895
AlgoCrossValIter - 8
Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
ou




Epoch 00001: val_loss improved from inf to 0.15071, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.15071 to 0.14987, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.14987

Epoch 00004: val_loss did not improve from 0.14987

Epoch 00005: val_loss did not improve from 0.14987

Epoch 00006: val_loss did not improve from 0.14987

Epoch 00007: val_loss did not improve from 0.14987

Epoch 00008: val_loss did not improve from 0.14987

Epoch 00009: val_loss did not improve from 0.14987

Epoch 00010: val_loss did not improve from 0.14987





Epoch 00001: val_loss improved from inf to 0.05096, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.05096 to 0.03390, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.03390

Epoch 00004: val_loss improved from 0.03390 to 0.02311, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.02311

Epoch 00006: val_loss did not improve from 0.02311

Epoch 00007: val_loss did not improve from 0.02311

Epoch 00008: val_loss did not improve from 0.02311

Epoch 00009: val_loss did not improve from 0.02311

Epoch 00010: val_loss did not improve from 0.02311

Epoch 00001: val_loss improved from inf to 0.09350, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09350

Epoch 00003: val_loss did not improve from 0.09350

Epoch 00004: val_loss did not improve from 0.09350

Epoch 00005: val_loss did not improve from 0.09350

Epoch 00006: val_loss did not improve from 0.09350

Epo




Epoch 00001: val_loss improved from inf to 0.15679, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.15679

Epoch 00003: val_loss did not improve from 0.15679

Epoch 00004: val_loss did not improve from 0.15679

Epoch 00005: val_loss did not improve from 0.15679

Epoch 00006: val_loss did not improve from 0.15679

Epoch 00007: val_loss did not improve from 0.15679

Epoch 00008: val_loss did not improve from 0.15679

Epoch 00009: val_loss did not improve from 0.15679

Epoch 00010: val_loss did not improve from 0.15679





Epoch 00001: val_loss improved from inf to 0.06152, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06152

Epoch 00003: val_loss did not improve from 0.06152

Epoch 00004: val_loss did not improve from 0.06152

Epoch 00005: val_loss did not improve from 0.06152

Epoch 00006: val_loss did not improve from 0.06152

Epoch 00007: val_loss did not improve from 0.06152

Epoch 00008: val_loss did not improve from 0.06152

Epoch 00009: val_loss did not improve from 0.06152

Epoch 00010: val_loss did not improve from 0.06152





Epoch 00001: val_loss improved from inf to 0.05470, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05470

Epoch 00003: val_loss did not improve from 0.05470

Epoch 00004: val_loss did not improve from 0.05470

Epoch 00005: val_loss did not improve from 0.05470

Epoch 00006: val_loss did not improve from 0.05470

Epoch 00007: val_loss did not improve from 0.05470

Epoch 00008: val_loss did not improve from 0.05470

Epoch 00009: val_loss did not improve from 0.05470

Epoch 00010: val_loss did not improve from 0.05470





Epoch 00001: val_loss improved from inf to 0.07039, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07039

Epoch 00003: val_loss did not improve from 0.07039

Epoch 00004: val_loss did not improve from 0.07039

Epoch 00005: val_loss did not improve from 0.07039

Epoch 00006: val_loss did not improve from 0.07039

Epoch 00007: val_loss did not improve from 0.07039

Epoch 00008: val_loss did not improve from 0.07039

Epoch 00009: val_loss did not improve from 0.07039

Epoch 00010: val_loss did not improve from 0.07039

Epoch 00001: val_loss improved from inf to 0.08837, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08837

Epoch 00003: val_loss did not improve from 0.08837

Epoch 00004: val_loss did not improve from 0.08837

Epoch 00005: val_loss did not improve from 0.08837

Epoch 00006: val_loss did not improve from 0.08837

Epoch 00007: val_loss did not improve from 0.08837

Epoch 00008: val_loss did not improve




Epoch 00001: val_loss improved from inf to 0.02847, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02847

Epoch 00003: val_loss did not improve from 0.02847

Epoch 00004: val_loss did not improve from 0.02847

Epoch 00005: val_loss did not improve from 0.02847

Epoch 00006: val_loss did not improve from 0.02847

Epoch 00007: val_loss did not improve from 0.02847

Epoch 00008: val_loss did not improve from 0.02847

Epoch 00009: val_loss did not improve from 0.02847

Epoch 00010: val_loss did not improve from 0.02847
AlgoCrossValIter - 9
Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
ou




Epoch 00001: val_loss improved from inf to 0.16938, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.16938

Epoch 00003: val_loss did not improve from 0.16938

Epoch 00004: val_loss did not improve from 0.16938

Epoch 00005: val_loss did not improve from 0.16938

Epoch 00006: val_loss did not improve from 0.16938

Epoch 00007: val_loss did not improve from 0.16938

Epoch 00008: val_loss did not improve from 0.16938

Epoch 00009: val_loss did not improve from 0.16938

Epoch 00010: val_loss did not improve from 0.16938





Epoch 00001: val_loss improved from inf to 0.03083, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03083

Epoch 00003: val_loss did not improve from 0.03083

Epoch 00004: val_loss did not improve from 0.03083

Epoch 00005: val_loss improved from 0.03083 to 0.02998, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.02998

Epoch 00007: val_loss improved from 0.02998 to 0.02976, saving model to best-model-conll.hdfs

Epoch 00008: val_loss improved from 0.02976 to 0.02445, saving model to best-model-conll.hdfs

Epoch 00009: val_loss did not improve from 0.02445

Epoch 00010: val_loss improved from 0.02445 to 0.02178, saving model to best-model-conll.hdfs

Epoch 00001: val_loss improved from inf to 0.08914, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08914

Epoch 00003: val_loss did not improve from 0.08914

Epoch 00004: val_loss did not improve from 0.08914

Epoch 00005: val_loss




Epoch 00001: val_loss improved from inf to 0.12476, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12476

Epoch 00003: val_loss did not improve from 0.12476

Epoch 00004: val_loss did not improve from 0.12476

Epoch 00005: val_loss did not improve from 0.12476

Epoch 00006: val_loss did not improve from 0.12476

Epoch 00007: val_loss did not improve from 0.12476

Epoch 00008: val_loss did not improve from 0.12476

Epoch 00009: val_loss did not improve from 0.12476

Epoch 00010: val_loss did not improve from 0.12476





Epoch 00001: val_loss improved from inf to 0.05407, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05407

Epoch 00003: val_loss did not improve from 0.05407

Epoch 00004: val_loss did not improve from 0.05407

Epoch 00005: val_loss did not improve from 0.05407

Epoch 00006: val_loss did not improve from 0.05407

Epoch 00007: val_loss did not improve from 0.05407

Epoch 00008: val_loss did not improve from 0.05407

Epoch 00009: val_loss did not improve from 0.05407

Epoch 00010: val_loss did not improve from 0.05407





Epoch 00001: val_loss improved from inf to 0.05787, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05787

Epoch 00003: val_loss did not improve from 0.05787

Epoch 00004: val_loss did not improve from 0.05787

Epoch 00005: val_loss did not improve from 0.05787

Epoch 00006: val_loss did not improve from 0.05787

Epoch 00007: val_loss did not improve from 0.05787

Epoch 00008: val_loss did not improve from 0.05787

Epoch 00009: val_loss did not improve from 0.05787

Epoch 00010: val_loss did not improve from 0.05787





Epoch 00001: val_loss improved from inf to 0.07822, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.07822 to 0.07490, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07490

Epoch 00004: val_loss did not improve from 0.07490

Epoch 00005: val_loss did not improve from 0.07490

Epoch 00006: val_loss did not improve from 0.07490

Epoch 00007: val_loss did not improve from 0.07490

Epoch 00008: val_loss did not improve from 0.07490

Epoch 00009: val_loss did not improve from 0.07490

Epoch 00010: val_loss did not improve from 0.07490

Epoch 00001: val_loss improved from inf to 0.08815, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08815

Epoch 00003: val_loss did not improve from 0.08815

Epoch 00004: val_loss did not improve from 0.08815

Epoch 00005: val_loss did not improve from 0.08815

Epoch 00006: val_loss did not improve from 0.08815

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.02963, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.02963

Epoch 00003: val_loss improved from 0.02963 to 0.02947, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.02947

Epoch 00005: val_loss did not improve from 0.02947

Epoch 00006: val_loss did not improve from 0.02947

Epoch 00007: val_loss did not improve from 0.02947

Epoch 00008: val_loss did not improve from 0.02947

Epoch 00009: val_loss did not improve from 0.02947

Epoch 00010: val_loss did not improve from 0.02947
AlgoCrossValIter - 10
Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_______________________




Epoch 00001: val_loss improved from inf to 0.15861, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.15861

Epoch 00003: val_loss did not improve from 0.15861

Epoch 00004: val_loss did not improve from 0.15861

Epoch 00005: val_loss did not improve from 0.15861

Epoch 00006: val_loss did not improve from 0.15861

Epoch 00007: val_loss did not improve from 0.15861

Epoch 00008: val_loss did not improve from 0.15861

Epoch 00009: val_loss did not improve from 0.15861

Epoch 00010: val_loss did not improve from 0.15861





Epoch 00001: val_loss improved from inf to 0.01323, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.01323

Epoch 00003: val_loss did not improve from 0.01323

Epoch 00004: val_loss did not improve from 0.01323

Epoch 00005: val_loss did not improve from 0.01323

Epoch 00006: val_loss did not improve from 0.01323

Epoch 00007: val_loss did not improve from 0.01323

Epoch 00008: val_loss did not improve from 0.01323

Epoch 00009: val_loss did not improve from 0.01323

Epoch 00010: val_loss did not improve from 0.01323





Epoch 00001: val_loss improved from inf to 0.08176, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08176

Epoch 00003: val_loss did not improve from 0.08176

Epoch 00004: val_loss did not improve from 0.08176

Epoch 00005: val_loss did not improve from 0.08176

Epoch 00006: val_loss did not improve from 0.08176

Epoch 00007: val_loss did not improve from 0.08176

Epoch 00008: val_loss did not improve from 0.08176

Epoch 00009: val_loss did not improve from 0.08176

Epoch 00010: val_loss did not improve from 0.08176





Epoch 00001: val_loss improved from inf to 0.14562, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.14562

Epoch 00003: val_loss did not improve from 0.14562

Epoch 00004: val_loss did not improve from 0.14562

Epoch 00005: val_loss did not improve from 0.14562

Epoch 00006: val_loss did not improve from 0.14562

Epoch 00007: val_loss did not improve from 0.14562

Epoch 00008: val_loss did not improve from 0.14562

Epoch 00009: val_loss did not improve from 0.14562

Epoch 00010: val_loss did not improve from 0.14562





Epoch 00001: val_loss improved from inf to 0.05022, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05022

Epoch 00003: val_loss did not improve from 0.05022

Epoch 00004: val_loss did not improve from 0.05022

Epoch 00005: val_loss did not improve from 0.05022

Epoch 00006: val_loss did not improve from 0.05022

Epoch 00007: val_loss did not improve from 0.05022

Epoch 00008: val_loss did not improve from 0.05022

Epoch 00009: val_loss did not improve from 0.05022

Epoch 00010: val_loss did not improve from 0.05022





Epoch 00001: val_loss improved from inf to 0.06108, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.06108 to 0.06088, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.06088 to 0.05757, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.05757

Epoch 00005: val_loss did not improve from 0.05757

Epoch 00006: val_loss did not improve from 0.05757

Epoch 00007: val_loss did not improve from 0.05757

Epoch 00008: val_loss did not improve from 0.05757

Epoch 00009: val_loss did not improve from 0.05757

Epoch 00010: val_loss did not improve from 0.05757





Epoch 00001: val_loss improved from inf to 0.07364, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07364

Epoch 00003: val_loss did not improve from 0.07364

Epoch 00004: val_loss did not improve from 0.07364

Epoch 00005: val_loss did not improve from 0.07364

Epoch 00006: val_loss did not improve from 0.07364

Epoch 00007: val_loss did not improve from 0.07364

Epoch 00008: val_loss did not improve from 0.07364

Epoch 00009: val_loss did not improve from 0.07364

Epoch 00010: val_loss did not improve from 0.07364

Epoch 00001: val_loss improved from inf to 0.09905, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09905 to 0.07983, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07983

Epoch 00004: val_loss did not improve from 0.07983

Epoch 00005: val_loss did not improve from 0.07983

Epoch 00006: val_loss did not improve from 0.07983

Epoch 00007: val_loss did not improve from 0.0




Epoch 00001: val_loss improved from inf to 0.03173, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.03173

Epoch 00003: val_loss improved from 0.03173 to 0.03040, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.03040

Epoch 00005: val_loss did not improve from 0.03040

Epoch 00006: val_loss did not improve from 0.03040

Epoch 00007: val_loss did not improve from 0.03040

Epoch 00008: val_loss did not improve from 0.03040

Epoch 00009: val_loss did not improve from 0.03040

Epoch 00010: val_loss did not improve from 0.03040


In [70]:
resultCrossVal.to_csv("results.csv")
resultCrossVal

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
P_val,85.039,85.868,81.673,84.701,79.384,81.491,83.53,86.387,87.432,80.637
P_train,86.978,87.726,86.687,86.325,85.572,86.577,85.737,86.818,87.414,89.242
P_ewo,75.694,78.178,75.454,75.716,75.898,77.556,75.008,75.82,77.981,79.354
R_val,78.837,77.198,77.837,76.17,80.622,77.837,79.17,76.531,76.079,76.559
R_train,86.765,85.788,87.362,83.642,88.366,86.313,85.734,85.647,83.492,83.225
R_ewo,70.093,66.76,69.536,66.852,70.279,70.278,69.444,68.149,66.112,64.446
F1-val,77.304,77.811,78.213,75.868,79.043,78.294,76.827,77.281,76.85,77.413
F1-train,86.71,86.635,86.792,84.23,86.865,86.246,85.25,85.96,84.843,85.734
F1-ewo,72.469,71.735,71.927,70.004,72.809,73.222,71.41,71.132,70.477,70.482


In [71]:
normalization_strategy

In [72]:
resultCrossVal.mean(axis=1).to_frame().T

Unnamed: 0,P_val,P_train,P_ewo,R_val,R_train,R_ewo,F1-val,F1-train,F1-ewo
0,83.6142,86.9076,76.6659,77.684,85.6334,68.1949,77.4904,85.9265,71.5667


In [73]:
resultCrossVal.std(axis=1).to_frame().T

Unnamed: 0,P_val,P_train,P_ewo,R_val,R_train,R_ewo,F1-val,F1-train,F1-ewo
0,2.699721,1.056206,1.468315,1.487568,1.718636,2.05214,0.897419,0.906502,1.068498


In [74]:
# trainByTagResult.to_csv("results/train-by-tag.csv")
# trainByTagResult

# trainByTagResult.mean(axis=1).to_frame()

In [75]:
# trainByTagResult.std(axis=1).to_frame()

In [76]:
# testByTagResult.to_csv("results/test-by-tag.csv")
# testByTagResult

In [77]:
# testByTagResult.mean(axis=1).to_frame()

In [78]:
# testByTagResult.std(axis=1).to_frame()

In [79]:
# ewoByTagResult.to_csv("results/ewo-by-tag.csv")

In [80]:
# ewoByTagResult = pd.read_csv("results/ewo-by-tag.csv", index_col=0)
# ewoByTagResult

In [81]:
# ewoByTagResult.mean(axis=1).to_frame()

In [82]:
# ewoByTagResult.std(axis=1).to_frame()

In [83]:
# columns = en_fingerprints.columns

# print("Pred", "Real", "Freq", "Word", sep="\t")
# for c in columns:
#     prediction = model.predict(en_fingerprints[c].values.reshape((1, 210)))
#     pred_tag = int2tag[np.argmax(prediction)]
#     real_tag = en_corpus[en_corpus.word == c].iloc[0]['ne-tag']
    
#     if pred_tag != real_tag:
#         print(pred_tag, real_tag, en_fingerprints[c].max(), c, sep="\t")