In [1]:
# import
import keras
import sys
import numpy as np
import string
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense
from keras.utils import np_utils, plot_model
from keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn import model_selection
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, precision_recall_fscore_support
import h5py as h5py

Using TensorFlow backend.


In [597]:
# if we are doeing binary classification. That means say if a token is a named entity or not
BINARY = False

# number of epochs for training
epochs = 10 

# the english side of the corpus
en_corpus_file = "corpus-en.txt"

# the ewondo side of the corpus
ewo_corpus_file = "corpus-ewo.txt"

# name of the file to same the model 
best_model_file = "best-model-conll.hdfs"

# The maximal number of phrases to use
max_nb_of_phrases =  -1

# the maximal number of duplicates for each word in the corpus
duplication = 1

# wether we are using only the vocabulary, ro redundancy
is_only_vocab = True

# if word should be shuffle or not
shuffle = is_only_vocab

# normalization strategy
# log, max, mean_log, log_inv, max_inv or mean_log_inv, tf
# None: for no normalization => 1/tf = nbWC/nbOcc(w)
normalization_strategy = None

# if we are using the Zennaki et al. signature
is_zennaki = False

# the number of neurons in the first layer
h1_size = 640

# number of neurons in the second layer
h2_size = 160  

In [598]:
def getTag(aString):
    """
        convert a string to a tag
    """
    tag = "O"
    if BINARY:
        if aString != "O":
            return "NE"
    else:
        tag = aString
    return tag
     

In [599]:
def load_corpus(file, max_nb_of_phrases):
    """
    Load a corpus stored in a file
    Input:
        - file: the name of the file of the corpus
        - max_nb_of_phases: maximal number of phrases to load
    
    Return:
        - a DataFrame representing the corpus
        - the number of phrases in the corpus
    """
    nb_of_phrases = 0
    dataset = {"word": [], "ne-tag": []}
    with open(file) as f:
        prev_line = None
        for cpt, line in enumerate(f):
            if cpt == 0:
                continue
            if nb_of_phrases == max_nb_of_phrases:
                break;

            l = line.strip()
            if len(l) == 0 and len(prev_line) != 0:
                nb_of_phrases += 1
                dataset["word"].append(line)
                dataset["ne-tag"].append(None)
            else:
                l = l.split("\t")
                if l[0] not in string.punctuation:
                    dataset["word"].append(l[0])
                    dataset["ne-tag"].append(ne_type(l[1]))
            prev_line = line.strip()
        
    return pd.DataFrame(dataset), nb_of_phrases+1

In [600]:
def log_normalization(fingerprints):
    fingerprints[fingerprints > 0] = 1 / fingerprints[fingerprints > 0] # get tf = nbOcc(w)/nbWC
    fingerprints[fingerprints > 0] = 1 + np.log(fingerprints[fingerprints > 0])
    return fingerprints

In [601]:
def max_normalization(fingerprints):
    fingerprints[fingerprints > 0] = 1 / fingerprints[fingerprints > 0] # get tf = nbOcc(w)/nbWC
    maxis = fingerprints.max(axis = 1)
    fingerprints[fingerprints > 0] = fingerprints[fingerprints > 0].apply(lambda row: 0.5 + 0.5 * row / maxis[row.index])
    return fingerprints

In [602]:
def mean_log_normalization(fingerprints):
    fingerprints[fingerprints > 0] = 1 / fingerprints[fingerprints > 0] # get tf = nbOcc(w)/nbWC
    means = fingerprints.mean(axis=1)
    fingerprints[fingerprints > 0] = fingerprints[fingerprints > 0].apply(lambda row: (1 + np.log(row)) / 1 + np.log(means[row.index]))
    return fingerprints

In [603]:
def normalize(fingerprints):
    if normalization_strategy == "log":
        return log_normalization(fingerprints)
    elif normalization_strategy == "max":
        return max_normalization(fingerprints)
    elif normalization_strategy == "mean_log":
        return mean_log_normalization(fingerprints)
    elif normalization_strategy == "log_inv":
        fp = log_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "max_inv":
        fp = max_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "mean_log_inv":
        fp = mean_log_normalization(fingerprints)
        fp = 1 / (1 + fp)
        return fp
    elif normalization_strategy == "tf":
        fp = fingerprints
        fp[fp > 0] = 1 / fp[fp > 0]
        return fp
    else:
        return fingerprints

In [604]:
def corpus_fingerprint(aDataframe, nb_of_biphrases, nb_of_words):
    """
    Create the distributionnal signature of each word in the corpus
    Input:
        -aDataFrame: the corpus DataFrame
        -nb_of_biphrases: number of phrases in the corpus
    Return:
        a DataFrame: corpus fingerprint, the columns are the words in the corpus
    """
    print("Normalization strategy:", normalization_strategy)
    fingerprints = {}
    current_bi_phrase_index = 0
    nb_word_in_corpus = aDataframe[aDataframe.word != "\n"].word.size
    words_in_current_phrase = []
    for index, row in aDataframe.iterrows():
        if current_bi_phrase_index > nb_of_biphrases:
            break
            
        word = row['word']
        
        if word != "\n":
            words_in_current_phrase.append(word)
            if word not in fingerprints:
                fingerprints[word] = np.zeros(nb_of_biphrases, dtype=np.float32)
            fingerprints[word][current_bi_phrase_index] = 1
        else:
            nb_word_in_current_phrase = len(words_in_current_phrase)
            current_bi_phrase_index += 1
            words_in_current_phrase = []
        
    if not is_zennaki:
        for word in fingerprints:
            for i in range(nb_of_biphrases):
                if fingerprints[word][i] != 0:
                    fingerprints[word][i] = nb_word_in_corpus / nb_of_words
    ret = pd.DataFrame(fingerprints)
        
    return normalize(ret)

In [605]:
def corpus2trainingdata(aDataframe, fingerprintsDataFrame):
    """
    Convert corpus to training data => numpy array
    
    Input:
        -aDataFrame: Corpus dataframe
        -fingerprintsDataFrame: distributionnal signature of words in the corpus
    Return:
        (X, y): X is the array of words (signature) in the corpus and y is the corresponding labels (NE tags)
    """
    X = np.zeros((aDataframe.shape[0], fingerprintsDataFrame.shape[0]), dtype=np.int8)
    y = np.zeros(aDataframe.shape[0], dtype=np.int8)
    i = 0
    for row in aDataframe.iterrows():
        X[i] = fingerprintsDataFrame[row[1]['word']].values
        y[i] = tag2int[getTag(row[1]['ne-tag'])]
        i += 1
    return X, y

In [606]:
# A utility function to convert NE tags
def ne_type(aType):
    aType = aType.lower()
    if 'per' in aType:
        t =  'NE' if BINARY else 'PER' 
    elif 'loc' in aType:
        t =  'NE' if BINARY else 'LOC'
    elif 'org' in aType:
        t =  'NE' if BINARY else 'ORG'
    elif 'hour' in aType:
        t =  'NE' if BINARY else 'MISC'
    elif aType != 'o' and len(aType) > 0 :
        t =  'NE' if BINARY else 'MISC'
    else:
        t = 'O'
    return t

In [607]:
def P_R_F1(y_pred, y_true, neg_class):
    same = y_pred[y_true==y_pred]
    tp = same[same != neg_class].size
    nb_of_pos_exple = y_true[y_true != neg_class].size
    nb_of_pos_pred = y_pred[y_pred != neg_class].size
    p = r = f1 = 0
    try:
        p = np.round(tp*100/nb_of_pos_pred, 2)
    except ZeroDivisionError:
        print("number of correct positive predictions is 0")
        
    try:
        r = np.round(tp*100/nb_of_pos_exple, 2)
    except ZeroDivisionError:
        print("number of position exple is 0")
        
    try:
        f1 = np.round(2*r*p/(r+p), 2)
    except ZeroDivisionError:
        print("Recall and precision are 0")

    return p, r, f1

In [608]:
def shuffle_data(X, y):
    indices = [i for i in  range(X.shape[0])]
    np.random.shuffle(indices)
    return X[indices], y[indices]

In [609]:
def create_model(input_dim, output_dim):
    model = Sequential()
    model.add(Dense(h1_size, input_dim=input_dim, activation='sigmoid', name="hidden1"))
    model.add(Dense(h2_size, activation='sigmoid', name="hidden2"))
    if BINARY:
        model.add(Dense(1, activation='sigmoid', name="outputlayer"))
        model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['binary_accuracy'])
    else:
        model.add(Dense(output_dim, activation='softmax', name="outputlayer"))
        model.compile(loss='categorical_crossentropy', optimizer="rmsprop", metrics=['accuracy'])
    model.summary()
    return model

In [610]:
def train_model(model, X_train, y_train, X_val, y_val, epochs=epochs):
    # stop learning if the error is the same between two consecutive epochs
    early_stop = EarlyStopping(patience=20, verbose=2)
    
    # saving best model
    best_model_cp = ModelCheckpoint(best_model_file, save_best_only=True, verbose=1)
    
    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epochs, verbose=0, shuffle=shuffle, callbacks=[best_model_cp, early_stop])
    
    #loading and returning the best model
    return keras.models.load_model(best_model_file)

In [611]:
def predict(model, X, y, binary=BINARY):
    if BINARY:
        y_pred = np.round(model.predict(X))
        y_true = y
    else:
        predictions = model.predict(X)
        y_pred = np.array([np.argmax(p) for p in predictions])
        y_true = np.array([np.argmax(t) for t in y ])
    return y_true, y_pred

In [612]:
def model_performance(y_true, y_pred):
    return P_R_F1(y_pred, y_true, tag2int['O']) #precision, recall, f1-score

In [613]:
def model_performace_by_tag(y_true, y_pred, tag):
    p, r, f1 = 0, 0, 0
    
    eq = y_pred[y_pred==y_true]
    correctly_pred = eq[eq==tag].size
    try:
        p = np.round(100 * correctly_pred / y_pred[y_pred==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        r = np.round(100 * correctly_pred / y_true[y_true==tag].size, 2)
    except ZeroDivisionError:
        pass
    
    try:
        f1 = np.round(2 * r * p / (r + p), 2)
    except ZeroDivisionError:
        pass
    
    return p, r, f1

In [614]:
def algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, epochs=epochs, model=None):
    """
    Train a model on (X, y) and validate on (X_val, y_val) then project on (X_ewo)
    """
    test_precision, train_precision, ewo_precision = [], [], []
    test_recall, train_recall, ewo_recall = [], [], []
    test_fscore, train_fscore, ewo_fscore = [], [], []
    
    test_result_by_tag = {}
    train_result_by_tag = {}
    ewo_result_by_tag = {}
    for t in tagSet:
        f1_key = "F1-"+t
        p_key = "P-"+t
        r_key = "R-"+t
        train_result_by_tag[f1_key], train_result_by_tag[p_key], train_result_by_tag[r_key] = [], [], []
        test_result_by_tag[f1_key], test_result_by_tag[p_key], test_result_by_tag[r_key] = [], [], []
        ewo_result_by_tag[f1_key], ewo_result_by_tag[p_key], ewo_result_by_tag[r_key] = [], [], []

    m = train_model(model, X_train, y_train, X_val, y_val, epochs=epochs)
        
    y_true, y_pred = predict(m, X_train, y_train)
    p_train, r_train, f1_train = model_performance(y_true, y_pred)
        
    y_true_val, y_pred_val = predict(m, X_val, y_val)
    p_val, r_val, f1_val = model_performance(y_true_val, y_pred_val)
        
    y_true_ewo, y_pred_ewo = predict(m, X_ewo, y_ewo) 
    p_ewo, r_ewo, f1_ewo = model_performance(y_true_ewo, y_pred_ewo)
        
    for t in range(len(int2tag)):
        f1_key = "F1-" + int2tag[t]
        p_key = "P-" + int2tag[t]
        r_key = "R-" + int2tag[t]
            
        p, r, f1 = model_performace_by_tag(y_true, y_pred, t)
        train_result_by_tag[p_key].append(p)
        train_result_by_tag[r_key].append(r)
        train_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_val, y_pred_val, t)
        test_result_by_tag[p_key].append(p)
        test_result_by_tag[r_key].append(r)
        test_result_by_tag[f1_key].append(f1)
            
        p, r, f1 = model_performace_by_tag(y_true_ewo, y_pred_ewo, t)
        ewo_result_by_tag[p_key].append(p)
        ewo_result_by_tag[r_key].append(r)
        ewo_result_by_tag[f1_key].append(f1)
                
    test_precision.append(p_val)
    train_precision.append(p_train)
    ewo_precision.append(p_ewo)
        
    test_recall.append(r_val)
    train_recall.append(r_train)
    ewo_recall.append(r_ewo)
        
    test_fscore.append(f1_val)
    train_fscore.append(f1_train)
    ewo_fscore.append(f1_ewo)
    return pd.DataFrame({
        'P_val': test_precision, 
        'P_train': train_precision, 
        'P_ewo': ewo_precision, 'R_val': test_recall, 'R_train': train_recall, 
        'R_ewo': ewo_recall, 'F1-val': test_fscore, 'F1-train': train_fscore, 'F1-ewo': ewo_fscore}), pd.DataFrame(train_result_by_tag), pd.DataFrame(test_result_by_tag), pd.DataFrame(ewo_result_by_tag)

In [615]:
def algoCrossVal(X, y, X_ewo, y_ewo, k = 10, repeat=1): 
    """
    Traing a model with k-fold cross validation
    We train the model `repeat` times to check it's stability
    """
    block_size = int(X.shape[0] / k)   
    output = None
    model = None
    train_by_tags, test_by_tags, ewo_by_tags = None, None, None
    for it in range(repeat):
        print("AlgoCrossValIter -", it+1)
        model = create_model(X.shape[1], len(tagSet))
        results = None
        train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = None, None, None
        for i in range(k):
            X_val, y_val = X[i*block_size:i*block_size+block_size], y[i*block_size:i*block_size+block_size]
            X_train = np.concatenate((X[0:i*block_size], X[i*block_size+block_size:]))
            y_train = np.concatenate((y[0:i*block_size], y[i*block_size+block_size:]))

            X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
            X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])

            result, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)
            if results is None:
                results = result.copy()
                train_by_tagsTmp, test_by_tagsTmp, ewo_by_tagsTmp = train_by_tag.copy(), test_by_tag.copy(), ewo_by_tag.copy()
            else:
                results = pd.concat([results, result], ignore_index=True)
                train_by_tagsTmp = pd.concat([train_by_tagsTmp, train_by_tag], ignore_index=True)
                test_by_tagsTmp = pd.concat([test_by_tagsTmp, test_by_tag], ignore_index=True)
                ewo_by_tagsTmp = pd.concat([ewo_by_tagsTmp, ewo_by_tag], ignore_index=True)
        
        if output is None:
            output = results.mean(axis=0).to_frame()
            train_by_tags = train_by_tagsTmp.mean(axis=0).to_frame()
            test_by_tags = test_by_tagsTmp.mean(axis=0).to_frame()
            ewo_by_tags = ewo_by_tagsTmp.mean(axis=0).to_frame()
        else:
            output = pd.concat([output, results.mean(axis=0).to_frame()], axis=1)
            train_by_tags = pd.concat([train_by_tags, train_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            test_by_tags = pd.concat([test_by_tags, test_by_tagsTmp.mean(axis=0).to_frame()], axis=1)
            ewo_by_tags = pd.concat([ewo_by_tags, ewo_by_tagsTmp.mean(axis=0).to_frame()], axis=1)

    return output, train_by_tags, test_by_tags, ewo_by_tags, model

In [616]:
en_corpus, en_nb_of_phrases = load_corpus(en_corpus_file, max_nb_of_phrases)

In [617]:
ewo_corpus, ewo_nb_of_phrases = load_corpus(ewo_corpus_file, max_nb_of_phrases)

In [618]:
en_nb_word = en_corpus[en_corpus.word != "\n"].word.size
ewo_nb_word = ewo_corpus[ewo_corpus.word != "\n"].word.size
corpus_nb_word = en_nb_word + ewo_nb_word
print("Nb word in ewondo", ewo_nb_word)
print("Nb word in english", en_nb_word)
print("Nb word in corpus", corpus_nb_word)

Nb word in ewondo 3570
Nb word in english 4170
Nb word in corpus 7740


In [619]:
en_corpus.head()
en_corpus.loc[en_corpus['ne-tag'] == 'ORG']

Unnamed: 0,word,ne-tag
1335,Sadducees,ORG


In [620]:
tagSet = en_corpus["ne-tag"].dropna().unique()
if BINARY:
    tagSet = ['NE', 'O']
tag2int = {j: i for i, j in enumerate(tagSet)}
int2tag = {i: j for i, j in enumerate(tagSet)}
print(tag2int)

{'O': 0, 'MISC': 1, 'PER': 2, 'LOC': 3, 'ORG': 4}


In [621]:
en_nb_of_phrases

210

In [622]:
en_corpus.describe()

Unnamed: 0,word,ne-tag
count,4379,4170
unique,904,5
top,the,O
freq,313,3779


In [623]:
en_corpus.head(10)

Unnamed: 0,word,ne-tag
0,The,O
1,Promise,O
2,of,O
3,the,O
4,Holy,MISC
5,Spirit,MISC
6,\n,
7,In,O
8,the,O
9,first,O


In [624]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(en_corpus[en_corpus['ne-tag']==tag].shape[0] * 100 / en_corpus[en_corpus['ne-tag']!='\n'].shape[0], 2)))

O % = 86.3 %
MISC % = 2.4 %
PER % = 5.59 %
LOC % = 0.91 %
ORG % = 0.02 %


In [625]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(en_corpus[en_corpus['ne-tag']==tag].word.unique().shape[0] * 100 / en_corpus[en_corpus['ne-tag']!='\n'].word.unique().shape[0], 2)))

O % = 89.16 %
MISC % = 1.88 %
PER % = 8.96 %
LOC % = 1.99 %
ORG % = 0.11 %


In [626]:
en_corpus[en_corpus.word == "\n"].shape

(209, 2)

In [627]:
print("Nb of bi-phrases", en_nb_of_phrases)

Nb of bi-phrases 210


In [628]:
en_fingerprints = corpus_fingerprint(en_corpus, en_nb_of_phrases, corpus_nb_word)

Normalization strategy: None


In [629]:
en_fingerprints.head(10)

Unnamed: 0,The,Promise,of,the,Holy,Spirit,In,first,book,O,...,considered,dream,She,save,fulfill,Immanuel,us),woke,sleep,knew
0,0.53876,0.53876,0.53876,0.53876,0.53876,0.53876,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.53876,0.0,0.0,0.53876,0.53876,0.53876,0.53876,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.53876,0.53876,0.53876,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.53876,0.53876,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.53876,0.53876,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.53876,0.53876,0.53876,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.53876,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.53876,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.53876,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.53876,0.53876,0.53876,0.53876,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [630]:
(4170 / en_nb_word)

1.0

In [631]:
en_corpus.shape

(4379, 2)

In [632]:
en_fingerprints['you'].values.shape

(210,)

In [633]:
en_corpus[en_corpus.word != "\n"].shape

(4170, 2)

In [634]:
if is_only_vocab:
    text = list(en_corpus[en_corpus.word != "\n"].word.unique())
else:
    text = list(en_corpus[en_corpus.word != "\n"].word)
en_vocab = pd.DataFrame({'text': text})
en_vocab.describe()

Unnamed: 0,text
count,903
unique,903
top,has
freq,1


In [635]:
if is_only_vocab:
    X = np.zeros((en_vocab.shape[0] * duplication, en_nb_of_phrases))
    target = np.zeros((en_vocab.shape[0] * duplication))
    p=0
    for i, row in en_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X[p] = en_fingerprints[c.split(" ")[0]]
            target[p] = tag2int[getTag(en_corpus[en_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X, target = shuffle_data(X, target)
    print(X.shape, en_fingerprints.shape, target.shape)

(903, 210) (210, 903) (903,)


In [636]:
en_vocab[-20:]

Unnamed: 0,text
883,Eliud
884,Eleazar
885,Matthan
886,husband
887,fourteen
888,unwilling
889,shame
890,resolved
891,divorce
892,quietly


In [637]:
if not is_only_vocab:
    X, target = corpus2trainingdata(en_corpus[en_corpus.word != "\n"], en_fingerprints)

In [638]:
if shuffle:
    X, target = shuffle_data(X, target)

In [639]:
y = target.copy()
y[0:100]
if not BINARY:
    y = np_utils.to_categorical(y, len(tagSet))
y.shape

(903, 5)

In [640]:
from sklearn.decomposition import PCA

def visualize(X, y):
    pca = PCA(n_components=2)
    X_embeded = pca.fit_transform(X)
    plt.figure(figsize=(5, 5))
    plt.scatter(X_embeded[:, 0], X_embeded[:, 1], c=y)
    plt.legend()
    plt.show()

In [641]:
# visualize(X, target)

In [642]:
X_train, X_val, y_train, y_val = model_selection.train_test_split(X, y, test_size=0.33)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1])
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1])
print("X_train.shape =", X_train.shape)
print("y_train.shape =", y_train.shape)
print("X_val.shape =", X_val.shape)
print("y_val.shape =", y_val.shape)

tTarget = np.array([np.argmax(yy) for yy in y_train])
vTarget = np.array([np.argmax(yy) for yy in y_val])

for tag in tagSet:
    print("{0} % in training data = {1} %".format(tag, np.round(tTarget[tTarget==tag2int[tag]].size * 100 / tTarget.shape[0], 2)))
    print("{0} % in validation data = {1} %".format(tag, np.round(vTarget[vTarget==tag2int[tag]].size * 100 / vTarget.shape[0], 2)))

X_train.shape = (605, 210)
y_train.shape = (605, 5)
X_val.shape = (298, 210)
y_val.shape = (298, 5)
O % in training data = 88.26 %
O % in validation data = 88.26 %
MISC % in training data = 0.99 %
MISC % in validation data = 1.68 %
PER % in training data = 8.6 %
PER % in validation data = 8.39 %
LOC % in training data = 1.98 %
LOC % in validation data = 1.68 %
ORG % in training data = 0.17 %
ORG % in validation data = 0.0 %


In [643]:
ewo_corpus.loc[ewo_corpus['ne-tag'] == 'PER']

Unnamed: 0,word,ne-tag
6,Teofil,PER
15,Yesus,PER
86,Yohannes,PER
104,Yesus,PER
230,Yesus,PER
...,...,...
3676,Maria,PER
3697,Yesus,PER
3740,Emmanuel,PER
3750,Yosef,PER


In [644]:
ewo_nb_of_phrases

210

In [645]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(ewo_corpus[ewo_corpus['ne-tag']==tag].shape[0] * 100 / ewo_corpus[ewo_corpus['ne-tag']!='\n'].shape[0], 2)))

O % = 84.15 %
MISC % = 2.54 %
PER % = 6.69 %
LOC % = 1.03 %
ORG % = 0.05 %


In [646]:
for tag in tagSet:
    print("{0} % = {1} %".format(tag, np.round(ewo_corpus[ewo_corpus['ne-tag']==tag].word.unique().shape[0] * 100 / ewo_corpus[ewo_corpus['ne-tag']!='\n'].word.unique().shape[0], 2)))

O % = 89.94 %
MISC % = 1.17 %
PER % = 8.3 %
LOC % = 1.86 %
ORG % = 0.2 %


In [647]:
ewo_corpus.describe()

Unnamed: 0,word,ne-tag
count,3779,3570
unique,1024,5
top,\n,O
freq,209,3180


In [648]:
ewo_corpus.head()

Unnamed: 0,word,ne-tag
0,Mfufub,MISC
1,Nsisim,MISC
2,ayi,O
3,sò,O
4,\n,


In [649]:
ewo_fingerprints = corpus_fingerprint(ewo_corpus, en_nb_of_phrases, corpus_nb_word)

Normalization strategy: None


In [650]:
if is_only_vocab:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word.unique())
else:
    text = list(ewo_corpus[ewo_corpus.word != "\n"].word)
ewo_vocab = pd.DataFrame({"text":text})

In [651]:
if is_only_vocab:
    X_ewo = np.zeros((ewo_vocab.shape[0] * duplication, en_nb_of_phrases))
    ewo_target = np.zeros((ewo_vocab.shape[0] * duplication))
    p=0
    for i, row in ewo_vocab.iterrows():
        c = row.text
        for j in range(duplication):
            X_ewo[p] = ewo_fingerprints[c.split(" ")[0]]
            ewo_target[p] = tag2int[getTag(ewo_corpus[ewo_corpus.word == c.split(" ")[-1:][0]]['ne-tag'].iloc[0])]
            p+=1
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [652]:
ewo_vocab[-10:]

Unnamed: 0,text
1013,sik
1014,Ntud
1015,bëyole
1016,Emmanuel
1017,Avëbë
1018,angavëbë
1019,oyò
1020,angabende
1021,anganòṅ
1022,angayole


In [653]:
if not is_only_vocab:
    X_ewo, ewo_target = corpus2trainingdata(ewo_corpus[ewo_corpus.word != "\n"], ewo_fingerprints)

In [654]:
if shuffle:
    X_ewo, ewo_target = shuffle_data(X_ewo, ewo_target)

In [655]:
y_ewo = ewo_target.copy()
print(y_ewo.shape, len(ewo_vocab))

(1023,) 1023


In [656]:
X_ewo.shape

(1023, 210)

In [657]:
y_ewo = ewo_target.copy()
y_ewo[:20]
if not BINARY:
    y_ewo = np_utils.to_categorical(y_ewo)

In [658]:
X_ewo = X_ewo.reshape((X_ewo.shape[0], en_nb_of_phrases))

In [659]:
# model = create_model(X.shape[1], len(tagSet))
# resultEval, train_by_tag, test_by_tag, ewo_by_tag = algoEval(X_train, y_train, X_val, y_val, X_ewo, y_ewo, model=model)

In [660]:
# resultEval

In [661]:
# train_by_tag

In [662]:
# test_by_tag

In [663]:
# ewo_by_tag

In [664]:
# resultEval.mean()

In [665]:
# resultEval.std()

In [666]:
resultCrossVal, trainByTagResult, testByTagResult, ewoByTagResult, model = algoCrossVal(X, y, X_ewo, y_ewo, repeat=10)

AlgoCrossValIter - 1
Model: "sequential_71"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.55725, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.55725 to 0.51825, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.51825

Epoch 00004: val_loss did not improve from 0.51825

Epoch 00005: val_loss did not improve from 0.51825

Epoch 00006: 




Epoch 00002: val_loss did not improve from 0.58479

Epoch 00003: val_loss improved from 0.58479 to 0.57928, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.57928

Epoch 00005: val_loss did not improve from 0.57928

Epoch 00006: val_loss did not improve from 0.57928

Epoch 00007: val_loss did not improve from 0.57928

Epoch 00008: val_loss did not improve from 0.57928

Epoch 00009: val_loss did not improve from 0.57928

Epoch 00010: val_loss did not improve from 0.57928




number of correct positive predictions is 0

Epoch 00001: val_loss improved from inf to 0.37485, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.37485

Epoch 00003: val_loss improved from 0.37485 to 0.36969, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.36969 to 0.36004, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.36004

Epoch 00006: val_loss did not improve from 0.36004

Epoch 00007: val_loss did not improve from 0.36004

Epoch 00008: val_loss did not improve from 0.36004

Epoch 00009: val_loss did not improve from 0.36004

Epoch 00010: val_loss did not improve from 0.36004





Epoch 00001: val_loss improved from inf to 0.18404, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.18404

Epoch 00003: val_loss did not improve from 0.18404

Epoch 00004: val_loss did not improve from 0.18404

Epoch 00005: val_loss did not improve from 0.18404

Epoch 00006: val_loss improved from 0.18404 to 0.17170, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.17170

Epoch 00008: val_loss improved from 0.17170 to 0.16546, saving model to best-model-conll.hdfs

Epoch 00009: val_loss improved from 0.16546 to 0.16269, saving model to best-model-conll.hdfs

Epoch 00010: val_loss did not improve from 0.16269





Epoch 00001: val_loss improved from inf to 0.16998, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.16998

Epoch 00003: val_loss did not improve from 0.16998

Epoch 00004: val_loss did not improve from 0.16998

Epoch 00005: val_loss did not improve from 0.16998

Epoch 00006: val_loss did not improve from 0.16998

Epoch 00007: val_loss did not improve from 0.16998

Epoch 00008: val_loss did not improve from 0.16998

Epoch 00009: val_loss did not improve from 0.16998

Epoch 00010: val_loss did not improve from 0.16998





Epoch 00001: val_loss improved from inf to 0.32297, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.32297 to 0.29047, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.29047 to 0.24367, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.24367

Epoch 00005: val_loss did not improve from 0.24367

Epoch 00006: val_loss did not improve from 0.24367

Epoch 00007: val_loss did not improve from 0.24367

Epoch 00008: val_loss did not improve from 0.24367

Epoch 00009: val_loss did not improve from 0.24367

Epoch 00010: val_loss did not improve from 0.24367





Epoch 00001: val_loss improved from inf to 0.28447, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.28447 to 0.17840, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.17840

Epoch 00004: val_loss did not improve from 0.17840

Epoch 00005: val_loss did not improve from 0.17840

Epoch 00006: val_loss improved from 0.17840 to 0.16694, saving model to best-model-conll.hdfs

Epoch 00007: val_loss improved from 0.16694 to 0.14086, saving model to best-model-conll.hdfs

Epoch 00008: val_loss did not improve from 0.14086

Epoch 00009: val_loss did not improve from 0.14086

Epoch 00010: val_loss did not improve from 0.14086





Epoch 00001: val_loss improved from inf to 0.11755, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11755

Epoch 00003: val_loss did not improve from 0.11755

Epoch 00004: val_loss did not improve from 0.11755

Epoch 00005: val_loss did not improve from 0.11755

Epoch 00006: val_loss did not improve from 0.11755

Epoch 00007: val_loss did not improve from 0.11755

Epoch 00008: val_loss did not improve from 0.11755

Epoch 00009: val_loss did not improve from 0.11755

Epoch 00010: val_loss did not improve from 0.11755





Epoch 00001: val_loss improved from inf to 0.07315, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07315

Epoch 00003: val_loss did not improve from 0.07315

Epoch 00004: val_loss did not improve from 0.07315

Epoch 00005: val_loss improved from 0.07315 to 0.07218, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.07218

Epoch 00007: val_loss improved from 0.07218 to 0.06953, saving model to best-model-conll.hdfs

Epoch 00008: val_loss did not improve from 0.06953

Epoch 00009: val_loss did not improve from 0.06953

Epoch 00010: val_loss did not improve from 0.06953





Epoch 00001: val_loss improved from inf to 0.09974, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.09974 to 0.09879, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.09879 to 0.08456, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.08456

Epoch 00005: val_loss did not improve from 0.08456

Epoch 00006: val_loss did not improve from 0.08456

Epoch 00007: val_loss did not improve from 0.08456

Epoch 00008: val_loss did not improve from 0.08456

Epoch 00009: val_loss did not improve from 0.08456

Epoch 00010: val_loss did not improve from 0.08456




AlgoCrossValIter - 2
Model: "sequential_72"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.55876, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.55876 to 0.55291, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.55291 to 0.54784, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.54784

Epoch 00005: val_loss 




Epoch 00002: val_loss did not improve from 0.68257

Epoch 00003: val_loss did not improve from 0.68257

Epoch 00004: val_loss did not improve from 0.68257

Epoch 00005: val_loss did not improve from 0.68257

Epoch 00006: val_loss improved from 0.68257 to 0.64419, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.64419

Epoch 00008: val_loss improved from 0.64419 to 0.60650, saving model to best-model-conll.hdfs

Epoch 00009: val_loss did not improve from 0.60650

Epoch 00010: val_loss did not improve from 0.60650
number of correct positive predictions is 0

Epoch 00001: val_loss improved from inf to 0.43985, saving model to best-model-conll.hdfs





Epoch 00002: val_loss did not improve from 0.43985

Epoch 00003: val_loss did not improve from 0.43985

Epoch 00004: val_loss improved from 0.43985 to 0.40700, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.40700

Epoch 00006: val_loss improved from 0.40700 to 0.37605, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.37605

Epoch 00008: val_loss improved from 0.37605 to 0.34122, saving model to best-model-conll.hdfs

Epoch 00009: val_loss did not improve from 0.34122

Epoch 00010: val_loss did not improve from 0.34122





Epoch 00001: val_loss improved from inf to 0.18726, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.18726

Epoch 00003: val_loss did not improve from 0.18726

Epoch 00004: val_loss did not improve from 0.18726

Epoch 00005: val_loss did not improve from 0.18726

Epoch 00006: val_loss improved from 0.18726 to 0.15215, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.15215

Epoch 00008: val_loss did not improve from 0.15215

Epoch 00009: val_loss did not improve from 0.15215

Epoch 00010: val_loss did not improve from 0.15215





Epoch 00001: val_loss improved from inf to 0.22910, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.22910 to 0.18762, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.18762

Epoch 00004: val_loss did not improve from 0.18762

Epoch 00005: val_loss did not improve from 0.18762

Epoch 00006: val_loss improved from 0.18762 to 0.17020, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.17020

Epoch 00008: val_loss did not improve from 0.17020

Epoch 00009: val_loss did not improve from 0.17020

Epoch 00010: val_loss improved from 0.17020 to 0.16342, saving model to best-model-conll.hdfs





Epoch 00001: val_loss improved from inf to 0.24574, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.24574

Epoch 00003: val_loss did not improve from 0.24574

Epoch 00004: val_loss did not improve from 0.24574

Epoch 00005: val_loss did not improve from 0.24574

Epoch 00006: val_loss did not improve from 0.24574

Epoch 00007: val_loss did not improve from 0.24574

Epoch 00008: val_loss did not improve from 0.24574

Epoch 00009: val_loss did not improve from 0.24574

Epoch 00010: val_loss did not improve from 0.24574





Epoch 00001: val_loss improved from inf to 0.11159, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11159

Epoch 00003: val_loss did not improve from 0.11159

Epoch 00004: val_loss did not improve from 0.11159

Epoch 00005: val_loss did not improve from 0.11159

Epoch 00006: val_loss did not improve from 0.11159

Epoch 00007: val_loss did not improve from 0.11159

Epoch 00008: val_loss did not improve from 0.11159

Epoch 00009: val_loss did not improve from 0.11159

Epoch 00010: val_loss did not improve from 0.11159





Epoch 00001: val_loss improved from inf to 0.15312, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.15312

Epoch 00003: val_loss improved from 0.15312 to 0.11999, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.11999

Epoch 00005: val_loss did not improve from 0.11999

Epoch 00006: val_loss did not improve from 0.11999

Epoch 00007: val_loss did not improve from 0.11999

Epoch 00008: val_loss did not improve from 0.11999

Epoch 00009: val_loss did not improve from 0.11999

Epoch 00010: val_loss did not improve from 0.11999





Epoch 00001: val_loss improved from inf to 0.05930, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.05930

Epoch 00003: val_loss improved from 0.05930 to 0.05768, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.05768

Epoch 00005: val_loss did not improve from 0.05768

Epoch 00006: val_loss did not improve from 0.05768

Epoch 00007: val_loss did not improve from 0.05768

Epoch 00008: val_loss did not improve from 0.05768

Epoch 00009: val_loss did not improve from 0.05768

Epoch 00010: val_loss did not improve from 0.05768





Epoch 00001: val_loss improved from inf to 0.14856, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.14856 to 0.12581, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.12581 to 0.10690, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.10690 to 0.09896, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.09896

Epoch 00006: val_loss improved from 0.09896 to 0.09004, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.09004

Epoch 00008: val_loss did not improve from 0.09004

Epoch 00009: val_loss did not improve from 0.09004

Epoch 00010: val_loss did not improve from 0.09004




AlgoCrossValIter - 3
Model: "sequential_73"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.55092, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.55092 to 0.52806, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.52806

Epoch 00004: val_loss did not improve from 0.52806

Epoch 00005: val_loss did not improve from 0.52806

Epoch 00006: 




Epoch 00002: val_loss improved from 0.67339 to 0.58594, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.58594

Epoch 00004: val_loss did not improve from 0.58594

Epoch 00005: val_loss did not improve from 0.58594

Epoch 00006: val_loss did not improve from 0.58594

Epoch 00007: val_loss did not improve from 0.58594

Epoch 00008: val_loss did not improve from 0.58594

Epoch 00009: val_loss did not improve from 0.58594

Epoch 00010: val_loss did not improve from 0.58594




number of correct positive predictions is 0

Epoch 00001: val_loss improved from inf to 0.62027, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.62027 to 0.48218, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.48218 to 0.37553, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.37553 to 0.36849, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.36849

Epoch 00006: val_loss did not improve from 0.36849

Epoch 00007: val_loss improved from 0.36849 to 0.34435, saving model to best-model-conll.hdfs

Epoch 00008: val_loss did not improve from 0.34435

Epoch 00009: val_loss did not improve from 0.34435

Epoch 00010: val_loss did not improve from 0.34435





Epoch 00001: val_loss improved from inf to 0.15665, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.15665

Epoch 00003: val_loss did not improve from 0.15665

Epoch 00004: val_loss improved from 0.15665 to 0.15352, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.15352

Epoch 00006: val_loss did not improve from 0.15352

Epoch 00007: val_loss did not improve from 0.15352

Epoch 00008: val_loss did not improve from 0.15352

Epoch 00009: val_loss did not improve from 0.15352

Epoch 00010: val_loss did not improve from 0.15352





Epoch 00001: val_loss improved from inf to 0.21948, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.21948

Epoch 00003: val_loss improved from 0.21948 to 0.17805, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.17805

Epoch 00005: val_loss improved from 0.17805 to 0.17002, saving model to best-model-conll.hdfs

Epoch 00006: val_loss improved from 0.17002 to 0.15851, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.15851

Epoch 00008: val_loss did not improve from 0.15851

Epoch 00009: val_loss did not improve from 0.15851

Epoch 00010: val_loss did not improve from 0.15851





Epoch 00001: val_loss improved from inf to 0.22747, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.22747

Epoch 00003: val_loss did not improve from 0.22747

Epoch 00004: val_loss did not improve from 0.22747

Epoch 00005: val_loss did not improve from 0.22747

Epoch 00006: val_loss did not improve from 0.22747

Epoch 00007: val_loss did not improve from 0.22747

Epoch 00008: val_loss did not improve from 0.22747

Epoch 00009: val_loss did not improve from 0.22747

Epoch 00010: val_loss did not improve from 0.22747





Epoch 00001: val_loss improved from inf to 0.14534, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.14534

Epoch 00003: val_loss improved from 0.14534 to 0.14232, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.14232

Epoch 00005: val_loss did not improve from 0.14232

Epoch 00006: val_loss did not improve from 0.14232

Epoch 00007: val_loss did not improve from 0.14232

Epoch 00008: val_loss did not improve from 0.14232

Epoch 00009: val_loss did not improve from 0.14232

Epoch 00010: val_loss did not improve from 0.14232





Epoch 00001: val_loss improved from inf to 0.21503, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.21503 to 0.11865, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.11865

Epoch 00004: val_loss did not improve from 0.11865

Epoch 00005: val_loss did not improve from 0.11865

Epoch 00006: val_loss did not improve from 0.11865

Epoch 00007: val_loss did not improve from 0.11865

Epoch 00008: val_loss did not improve from 0.11865

Epoch 00009: val_loss did not improve from 0.11865

Epoch 00010: val_loss did not improve from 0.11865





Epoch 00001: val_loss improved from inf to 0.06166, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.06166 to 0.05600, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.05600

Epoch 00004: val_loss did not improve from 0.05600

Epoch 00005: val_loss did not improve from 0.05600

Epoch 00006: val_loss did not improve from 0.05600

Epoch 00007: val_loss did not improve from 0.05600

Epoch 00008: val_loss did not improve from 0.05600

Epoch 00009: val_loss did not improve from 0.05600

Epoch 00010: val_loss did not improve from 0.05600





Epoch 00001: val_loss improved from inf to 0.08799, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08799

Epoch 00003: val_loss did not improve from 0.08799

Epoch 00004: val_loss did not improve from 0.08799

Epoch 00005: val_loss did not improve from 0.08799

Epoch 00006: val_loss did not improve from 0.08799

Epoch 00007: val_loss did not improve from 0.08799

Epoch 00008: val_loss did not improve from 0.08799

Epoch 00009: val_loss did not improve from 0.08799

Epoch 00010: val_loss did not improve from 0.08799




AlgoCrossValIter - 4
Model: "sequential_74"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.54845, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.54845 to 0.50511, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.50511

Epoch 00004: val_loss did not improve from 0.50511

Epoch 00005: val_loss did not improve from 0.50511

Epoch 00006: 




Epoch 00002: val_loss improved from 0.60519 to 0.59287, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.59287

Epoch 00004: val_loss did not improve from 0.59287

Epoch 00005: val_loss improved from 0.59287 to 0.57568, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.57568

Epoch 00007: val_loss did not improve from 0.57568

Epoch 00008: val_loss did not improve from 0.57568

Epoch 00009: val_loss did not improve from 0.57568

Epoch 00010: val_loss did not improve from 0.57568




number of correct positive predictions is 0

Epoch 00001: val_loss improved from inf to 0.40233, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.40233

Epoch 00003: val_loss did not improve from 0.40233

Epoch 00004: val_loss improved from 0.40233 to 0.36389, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.36389

Epoch 00006: val_loss did not improve from 0.36389

Epoch 00007: val_loss did not improve from 0.36389

Epoch 00008: val_loss improved from 0.36389 to 0.35773, saving model to best-model-conll.hdfs

Epoch 00009: val_loss did not improve from 0.35773

Epoch 00010: val_loss did not improve from 0.35773





Epoch 00001: val_loss improved from inf to 0.20176, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.20176

Epoch 00003: val_loss improved from 0.20176 to 0.17034, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.17034 to 0.16706, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.16706

Epoch 00006: val_loss did not improve from 0.16706

Epoch 00007: val_loss improved from 0.16706 to 0.15773, saving model to best-model-conll.hdfs

Epoch 00008: val_loss improved from 0.15773 to 0.15364, saving model to best-model-conll.hdfs

Epoch 00009: val_loss did not improve from 0.15364

Epoch 00010: val_loss did not improve from 0.15364





Epoch 00001: val_loss improved from inf to 0.16812, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.16812

Epoch 00003: val_loss did not improve from 0.16812

Epoch 00004: val_loss did not improve from 0.16812

Epoch 00005: val_loss did not improve from 0.16812

Epoch 00006: val_loss improved from 0.16812 to 0.16738, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.16738

Epoch 00008: val_loss did not improve from 0.16738

Epoch 00009: val_loss improved from 0.16738 to 0.16113, saving model to best-model-conll.hdfs

Epoch 00010: val_loss did not improve from 0.16113





Epoch 00001: val_loss improved from inf to 0.23889, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.23889

Epoch 00003: val_loss did not improve from 0.23889

Epoch 00004: val_loss did not improve from 0.23889

Epoch 00005: val_loss did not improve from 0.23889

Epoch 00006: val_loss did not improve from 0.23889

Epoch 00007: val_loss did not improve from 0.23889

Epoch 00008: val_loss did not improve from 0.23889

Epoch 00009: val_loss did not improve from 0.23889

Epoch 00010: val_loss did not improve from 0.23889





Epoch 00001: val_loss improved from inf to 0.14005, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.14005 to 0.11738, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.11738

Epoch 00004: val_loss did not improve from 0.11738

Epoch 00005: val_loss did not improve from 0.11738

Epoch 00006: val_loss did not improve from 0.11738

Epoch 00007: val_loss did not improve from 0.11738

Epoch 00008: val_loss did not improve from 0.11738

Epoch 00009: val_loss did not improve from 0.11738

Epoch 00010: val_loss did not improve from 0.11738





Epoch 00001: val_loss improved from inf to 0.12459, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12459

Epoch 00003: val_loss did not improve from 0.12459

Epoch 00004: val_loss did not improve from 0.12459

Epoch 00005: val_loss did not improve from 0.12459

Epoch 00006: val_loss did not improve from 0.12459

Epoch 00007: val_loss did not improve from 0.12459

Epoch 00008: val_loss did not improve from 0.12459

Epoch 00009: val_loss did not improve from 0.12459

Epoch 00010: val_loss did not improve from 0.12459





Epoch 00001: val_loss improved from inf to 0.09735, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09735

Epoch 00003: val_loss improved from 0.09735 to 0.07676, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.07676

Epoch 00005: val_loss improved from 0.07676 to 0.07643, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.07643

Epoch 00007: val_loss did not improve from 0.07643

Epoch 00008: val_loss did not improve from 0.07643

Epoch 00009: val_loss did not improve from 0.07643

Epoch 00010: val_loss did not improve from 0.07643





Epoch 00001: val_loss improved from inf to 0.07986, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.07986

Epoch 00003: val_loss did not improve from 0.07986

Epoch 00004: val_loss did not improve from 0.07986

Epoch 00005: val_loss did not improve from 0.07986

Epoch 00006: val_loss did not improve from 0.07986

Epoch 00007: val_loss did not improve from 0.07986

Epoch 00008: val_loss did not improve from 0.07986

Epoch 00009: val_loss did not improve from 0.07986

Epoch 00010: val_loss did not improve from 0.07986




AlgoCrossValIter - 5
Model: "sequential_75"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.56228, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.56228 to 0.53980, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.53980

Epoch 00004: val_loss did not improve from 0.53980

Epoch 00005: val_loss improved from 0.53980 to 0.51970, saving mo




Epoch 00002: val_loss did not improve from 0.63026

Epoch 00003: val_loss improved from 0.63026 to 0.61718, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.61718

Epoch 00005: val_loss improved from 0.61718 to 0.61317, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.61317

Epoch 00007: val_loss did not improve from 0.61317

Epoch 00008: val_loss did not improve from 0.61317

Epoch 00009: val_loss improved from 0.61317 to 0.59904, saving model to best-model-conll.hdfs

Epoch 00010: val_loss did not improve from 0.59904
number of correct positive predictions is 0

Epoch 00001: val_loss improved from inf to 0.39873, saving model to best-model-conll.hdfs





Epoch 00002: val_loss did not improve from 0.39873

Epoch 00003: val_loss improved from 0.39873 to 0.37364, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.37364

Epoch 00005: val_loss did not improve from 0.37364

Epoch 00006: val_loss did not improve from 0.37364

Epoch 00007: val_loss did not improve from 0.37364

Epoch 00008: val_loss did not improve from 0.37364

Epoch 00009: val_loss improved from 0.37364 to 0.33719, saving model to best-model-conll.hdfs

Epoch 00010: val_loss did not improve from 0.33719





Epoch 00001: val_loss improved from inf to 0.21883, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.21883 to 0.16619, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.16619

Epoch 00004: val_loss improved from 0.16619 to 0.16543, saving model to best-model-conll.hdfs

Epoch 00005: val_loss improved from 0.16543 to 0.15840, saving model to best-model-conll.hdfs

Epoch 00006: val_loss improved from 0.15840 to 0.14827, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.14827

Epoch 00008: val_loss did not improve from 0.14827

Epoch 00009: val_loss did not improve from 0.14827

Epoch 00010: val_loss did not improve from 0.14827





Epoch 00001: val_loss improved from inf to 0.46993, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.46993 to 0.17243, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.17243

Epoch 00004: val_loss did not improve from 0.17243

Epoch 00005: val_loss improved from 0.17243 to 0.16903, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.16903

Epoch 00007: val_loss did not improve from 0.16903

Epoch 00008: val_loss did not improve from 0.16903

Epoch 00009: val_loss did not improve from 0.16903

Epoch 00010: val_loss improved from 0.16903 to 0.15865, saving model to best-model-conll.hdfs





Epoch 00001: val_loss improved from inf to 0.22712, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.22712 to 0.21774, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.21774

Epoch 00004: val_loss did not improve from 0.21774

Epoch 00005: val_loss did not improve from 0.21774

Epoch 00006: val_loss did not improve from 0.21774

Epoch 00007: val_loss did not improve from 0.21774

Epoch 00008: val_loss did not improve from 0.21774

Epoch 00009: val_loss did not improve from 0.21774

Epoch 00010: val_loss did not improve from 0.21774





Epoch 00001: val_loss improved from inf to 0.23690, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.23690 to 0.12695, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.12695

Epoch 00004: val_loss did not improve from 0.12695

Epoch 00005: val_loss did not improve from 0.12695

Epoch 00006: val_loss did not improve from 0.12695

Epoch 00007: val_loss did not improve from 0.12695

Epoch 00008: val_loss did not improve from 0.12695

Epoch 00009: val_loss did not improve from 0.12695

Epoch 00010: val_loss did not improve from 0.12695





Epoch 00001: val_loss improved from inf to 0.13396, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.13396

Epoch 00003: val_loss did not improve from 0.13396

Epoch 00004: val_loss did not improve from 0.13396

Epoch 00005: val_loss did not improve from 0.13396

Epoch 00006: val_loss did not improve from 0.13396

Epoch 00007: val_loss did not improve from 0.13396

Epoch 00008: val_loss did not improve from 0.13396

Epoch 00009: val_loss did not improve from 0.13396

Epoch 00010: val_loss did not improve from 0.13396





Epoch 00001: val_loss improved from inf to 0.18437, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.18437 to 0.06085, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.06085

Epoch 00004: val_loss did not improve from 0.06085

Epoch 00005: val_loss did not improve from 0.06085

Epoch 00006: val_loss did not improve from 0.06085

Epoch 00007: val_loss did not improve from 0.06085

Epoch 00008: val_loss did not improve from 0.06085

Epoch 00009: val_loss did not improve from 0.06085

Epoch 00010: val_loss did not improve from 0.06085





Epoch 00001: val_loss improved from inf to 0.13396, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.13396

Epoch 00003: val_loss did not improve from 0.13396

Epoch 00004: val_loss improved from 0.13396 to 0.09728, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.09728

Epoch 00006: val_loss did not improve from 0.09728

Epoch 00007: val_loss did not improve from 0.09728

Epoch 00008: val_loss did not improve from 0.09728

Epoch 00009: val_loss did not improve from 0.09728

Epoch 00010: val_loss did not improve from 0.09728




AlgoCrossValIter - 6
Model: "sequential_76"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.56172, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.56172 to 0.52491, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.52491

Epoch 00004: val_loss did not improve from 0.52491

Epoch 00005: val_loss did not improve from 0.52491

Epoch 00006: 




Epoch 00002: val_loss improved from 0.61464 to 0.59923, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.59923

Epoch 00004: val_loss improved from 0.59923 to 0.58811, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.58811

Epoch 00006: val_loss did not improve from 0.58811

Epoch 00007: val_loss did not improve from 0.58811

Epoch 00008: val_loss did not improve from 0.58811

Epoch 00009: val_loss did not improve from 0.58811

Epoch 00010: val_loss did not improve from 0.58811
number of correct positive predictions is 0

Epoch 00001: val_loss improved from inf to 0.45972, saving model to best-model-conll.hdfs





Epoch 00002: val_loss improved from 0.45972 to 0.39424, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.39424

Epoch 00004: val_loss did not improve from 0.39424

Epoch 00005: val_loss did not improve from 0.39424

Epoch 00006: val_loss improved from 0.39424 to 0.35862, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.35862

Epoch 00008: val_loss improved from 0.35862 to 0.34171, saving model to best-model-conll.hdfs

Epoch 00009: val_loss did not improve from 0.34171

Epoch 00010: val_loss did not improve from 0.34171





Epoch 00001: val_loss improved from inf to 0.22325, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.22325 to 0.16933, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.16933 to 0.16791, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.16791

Epoch 00005: val_loss did not improve from 0.16791

Epoch 00006: val_loss did not improve from 0.16791

Epoch 00007: val_loss did not improve from 0.16791

Epoch 00008: val_loss did not improve from 0.16791

Epoch 00009: val_loss did not improve from 0.16791

Epoch 00010: val_loss did not improve from 0.16791





Epoch 00001: val_loss improved from inf to 0.17238, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.17238

Epoch 00003: val_loss did not improve from 0.17238

Epoch 00004: val_loss did not improve from 0.17238

Epoch 00005: val_loss did not improve from 0.17238

Epoch 00006: val_loss did not improve from 0.17238

Epoch 00007: val_loss improved from 0.17238 to 0.16121, saving model to best-model-conll.hdfs

Epoch 00008: val_loss did not improve from 0.16121

Epoch 00009: val_loss did not improve from 0.16121

Epoch 00010: val_loss did not improve from 0.16121





Epoch 00001: val_loss improved from inf to 0.22057, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.22057

Epoch 00003: val_loss did not improve from 0.22057

Epoch 00004: val_loss did not improve from 0.22057

Epoch 00005: val_loss did not improve from 0.22057

Epoch 00006: val_loss did not improve from 0.22057

Epoch 00007: val_loss did not improve from 0.22057

Epoch 00008: val_loss did not improve from 0.22057

Epoch 00009: val_loss did not improve from 0.22057

Epoch 00010: val_loss did not improve from 0.22057





Epoch 00001: val_loss improved from inf to 0.14123, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.14123

Epoch 00003: val_loss did not improve from 0.14123

Epoch 00004: val_loss did not improve from 0.14123

Epoch 00005: val_loss did not improve from 0.14123

Epoch 00006: val_loss did not improve from 0.14123

Epoch 00007: val_loss did not improve from 0.14123

Epoch 00008: val_loss did not improve from 0.14123

Epoch 00009: val_loss did not improve from 0.14123

Epoch 00010: val_loss did not improve from 0.14123





Epoch 00001: val_loss improved from inf to 0.12461, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.12461 to 0.12216, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.12216

Epoch 00004: val_loss did not improve from 0.12216

Epoch 00005: val_loss did not improve from 0.12216

Epoch 00006: val_loss did not improve from 0.12216

Epoch 00007: val_loss did not improve from 0.12216

Epoch 00008: val_loss did not improve from 0.12216

Epoch 00009: val_loss did not improve from 0.12216

Epoch 00010: val_loss did not improve from 0.12216





Epoch 00001: val_loss improved from inf to 0.11926, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.11926 to 0.07658, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.07658

Epoch 00004: val_loss improved from 0.07658 to 0.07585, saving model to best-model-conll.hdfs

Epoch 00005: val_loss improved from 0.07585 to 0.07333, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.07333

Epoch 00007: val_loss did not improve from 0.07333

Epoch 00008: val_loss did not improve from 0.07333

Epoch 00009: val_loss did not improve from 0.07333

Epoch 00010: val_loss did not improve from 0.07333





Epoch 00001: val_loss improved from inf to 0.13324, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.13324 to 0.09298, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.09298

Epoch 00004: val_loss did not improve from 0.09298

Epoch 00005: val_loss did not improve from 0.09298

Epoch 00006: val_loss did not improve from 0.09298

Epoch 00007: val_loss did not improve from 0.09298

Epoch 00008: val_loss did not improve from 0.09298

Epoch 00009: val_loss did not improve from 0.09298

Epoch 00010: val_loss did not improve from 0.09298




AlgoCrossValIter - 7
Model: "sequential_77"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.58799, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.58799 to 0.53909, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.53909 to 0.52571, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.52571 to 0.52065, saving model to be




Epoch 00002: val_loss did not improve from 0.61896

Epoch 00003: val_loss did not improve from 0.61896

Epoch 00004: val_loss did not improve from 0.61896

Epoch 00005: val_loss improved from 0.61896 to 0.58464, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.58464

Epoch 00007: val_loss did not improve from 0.58464

Epoch 00008: val_loss did not improve from 0.58464

Epoch 00009: val_loss did not improve from 0.58464

Epoch 00010: val_loss did not improve from 0.58464
number of correct positive predictions is 0

Epoch 00001: val_loss improved from inf to 0.42317, saving model to best-model-conll.hdfs





Epoch 00002: val_loss improved from 0.42317 to 0.36397, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.36397

Epoch 00004: val_loss did not improve from 0.36397

Epoch 00005: val_loss improved from 0.36397 to 0.35930, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.35930

Epoch 00007: val_loss did not improve from 0.35930

Epoch 00008: val_loss did not improve from 0.35930

Epoch 00009: val_loss did not improve from 0.35930

Epoch 00010: val_loss did not improve from 0.35930





Epoch 00001: val_loss improved from inf to 0.16671, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.16671 to 0.16518, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.16518

Epoch 00004: val_loss did not improve from 0.16518

Epoch 00005: val_loss did not improve from 0.16518

Epoch 00006: val_loss improved from 0.16518 to 0.15337, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.15337

Epoch 00008: val_loss did not improve from 0.15337

Epoch 00009: val_loss did not improve from 0.15337

Epoch 00010: val_loss did not improve from 0.15337





Epoch 00001: val_loss improved from inf to 0.17880, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.17880

Epoch 00003: val_loss did not improve from 0.17880

Epoch 00004: val_loss did not improve from 0.17880

Epoch 00005: val_loss did not improve from 0.17880

Epoch 00006: val_loss did not improve from 0.17880

Epoch 00007: val_loss improved from 0.17880 to 0.15883, saving model to best-model-conll.hdfs

Epoch 00008: val_loss did not improve from 0.15883

Epoch 00009: val_loss did not improve from 0.15883

Epoch 00010: val_loss did not improve from 0.15883





Epoch 00001: val_loss improved from inf to 0.25748, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.25748

Epoch 00003: val_loss improved from 0.25748 to 0.24207, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.24207

Epoch 00005: val_loss did not improve from 0.24207

Epoch 00006: val_loss did not improve from 0.24207

Epoch 00007: val_loss did not improve from 0.24207

Epoch 00008: val_loss did not improve from 0.24207

Epoch 00009: val_loss did not improve from 0.24207

Epoch 00010: val_loss did not improve from 0.24207





Epoch 00001: val_loss improved from inf to 0.16067, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.16067

Epoch 00003: val_loss did not improve from 0.16067

Epoch 00004: val_loss did not improve from 0.16067

Epoch 00005: val_loss improved from 0.16067 to 0.15839, saving model to best-model-conll.hdfs

Epoch 00006: val_loss improved from 0.15839 to 0.13576, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.13576

Epoch 00008: val_loss did not improve from 0.13576

Epoch 00009: val_loss did not improve from 0.13576

Epoch 00010: val_loss did not improve from 0.13576





Epoch 00001: val_loss improved from inf to 0.10890, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10890

Epoch 00003: val_loss did not improve from 0.10890

Epoch 00004: val_loss did not improve from 0.10890

Epoch 00005: val_loss did not improve from 0.10890

Epoch 00006: val_loss did not improve from 0.10890

Epoch 00007: val_loss did not improve from 0.10890

Epoch 00008: val_loss did not improve from 0.10890

Epoch 00009: val_loss did not improve from 0.10890

Epoch 00010: val_loss did not improve from 0.10890





Epoch 00001: val_loss improved from inf to 0.20004, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.20004 to 0.04954, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.04954

Epoch 00004: val_loss did not improve from 0.04954

Epoch 00005: val_loss did not improve from 0.04954

Epoch 00006: val_loss did not improve from 0.04954

Epoch 00007: val_loss did not improve from 0.04954

Epoch 00008: val_loss did not improve from 0.04954

Epoch 00009: val_loss did not improve from 0.04954

Epoch 00010: val_loss did not improve from 0.04954





Epoch 00001: val_loss improved from inf to 0.13500, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.13500 to 0.09537, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.09537 to 0.09453, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.09453

Epoch 00005: val_loss did not improve from 0.09453

Epoch 00006: val_loss did not improve from 0.09453

Epoch 00007: val_loss did not improve from 0.09453

Epoch 00008: val_loss did not improve from 0.09453

Epoch 00009: val_loss did not improve from 0.09453

Epoch 00010: val_loss did not improve from 0.09453




AlgoCrossValIter - 8
Model: "sequential_78"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.60685, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.60685 to 0.52984, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.52984

Epoch 00004: val_loss did not improve from 0.52984

Epoch 00005: val_loss did not improve from 0.52984

Epoch 00006: 




Epoch 00002: val_loss improved from 0.65079 to 0.61977, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.61977

Epoch 00004: val_loss improved from 0.61977 to 0.58388, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.58388

Epoch 00006: val_loss did not improve from 0.58388

Epoch 00007: val_loss did not improve from 0.58388

Epoch 00008: val_loss did not improve from 0.58388

Epoch 00009: val_loss did not improve from 0.58388

Epoch 00010: val_loss did not improve from 0.58388




number of correct positive predictions is 0

Epoch 00001: val_loss improved from inf to 0.50576, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.50576 to 0.37718, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.37718

Epoch 00004: val_loss did not improve from 0.37718

Epoch 00005: val_loss improved from 0.37718 to 0.35458, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.35458

Epoch 00007: val_loss did not improve from 0.35458

Epoch 00008: val_loss did not improve from 0.35458

Epoch 00009: val_loss improved from 0.35458 to 0.34746, saving model to best-model-conll.hdfs

Epoch 00010: val_loss did not improve from 0.34746





Epoch 00001: val_loss improved from inf to 0.21069, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.21069 to 0.17485, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.17485 to 0.17023, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.17023 to 0.14668, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.14668

Epoch 00006: val_loss did not improve from 0.14668

Epoch 00007: val_loss did not improve from 0.14668

Epoch 00008: val_loss did not improve from 0.14668

Epoch 00009: val_loss did not improve from 0.14668

Epoch 00010: val_loss did not improve from 0.14668





Epoch 00001: val_loss improved from inf to 0.26888, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.26888 to 0.15661, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.15661

Epoch 00004: val_loss did not improve from 0.15661

Epoch 00005: val_loss did not improve from 0.15661

Epoch 00006: val_loss did not improve from 0.15661

Epoch 00007: val_loss did not improve from 0.15661

Epoch 00008: val_loss did not improve from 0.15661

Epoch 00009: val_loss did not improve from 0.15661

Epoch 00010: val_loss did not improve from 0.15661





Epoch 00001: val_loss improved from inf to 0.34644, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.34644 to 0.24784, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.24784

Epoch 00004: val_loss did not improve from 0.24784

Epoch 00005: val_loss did not improve from 0.24784

Epoch 00006: val_loss did not improve from 0.24784

Epoch 00007: val_loss did not improve from 0.24784

Epoch 00008: val_loss did not improve from 0.24784

Epoch 00009: val_loss did not improve from 0.24784

Epoch 00010: val_loss did not improve from 0.24784





Epoch 00001: val_loss improved from inf to 0.12213, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.12213

Epoch 00003: val_loss did not improve from 0.12213

Epoch 00004: val_loss did not improve from 0.12213

Epoch 00005: val_loss did not improve from 0.12213

Epoch 00006: val_loss did not improve from 0.12213

Epoch 00007: val_loss did not improve from 0.12213

Epoch 00008: val_loss did not improve from 0.12213

Epoch 00009: val_loss did not improve from 0.12213

Epoch 00010: val_loss did not improve from 0.12213





Epoch 00001: val_loss improved from inf to 0.10604, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10604

Epoch 00003: val_loss did not improve from 0.10604

Epoch 00004: val_loss did not improve from 0.10604

Epoch 00005: val_loss did not improve from 0.10604

Epoch 00006: val_loss did not improve from 0.10604

Epoch 00007: val_loss did not improve from 0.10604

Epoch 00008: val_loss did not improve from 0.10604

Epoch 00009: val_loss did not improve from 0.10604

Epoch 00010: val_loss did not improve from 0.10604





Epoch 00001: val_loss improved from inf to 0.08735, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.08735 to 0.07325, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.07325 to 0.06020, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.06020

Epoch 00005: val_loss did not improve from 0.06020

Epoch 00006: val_loss did not improve from 0.06020

Epoch 00007: val_loss did not improve from 0.06020

Epoch 00008: val_loss did not improve from 0.06020

Epoch 00009: val_loss did not improve from 0.06020

Epoch 00010: val_loss did not improve from 0.06020





Epoch 00001: val_loss improved from inf to 0.13567, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.13567 to 0.09788, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.09788 to 0.09666, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.09666

Epoch 00005: val_loss did not improve from 0.09666

Epoch 00006: val_loss did not improve from 0.09666

Epoch 00007: val_loss did not improve from 0.09666

Epoch 00008: val_loss did not improve from 0.09666

Epoch 00009: val_loss did not improve from 0.09666

Epoch 00010: val_loss did not improve from 0.09666




AlgoCrossValIter - 9
Model: "sequential_79"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.55643, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.55643 to 0.52666, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.52666

Epoch 00004: val_loss did not improve from 0.52666

Epoch 00005: val_loss improved from 0.52666 to 0.49695, saving mo




Epoch 00002: val_loss did not improve from 0.58909

Epoch 00003: val_loss did not improve from 0.58909

Epoch 00004: val_loss did not improve from 0.58909

Epoch 00005: val_loss did not improve from 0.58909

Epoch 00006: val_loss improved from 0.58909 to 0.58719, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.58719

Epoch 00008: val_loss did not improve from 0.58719

Epoch 00009: val_loss did not improve from 0.58719

Epoch 00010: val_loss did not improve from 0.58719
number of correct positive predictions is 0

Epoch 00001: val_loss improved from inf to 0.55380, saving model to best-model-conll.hdfs





Epoch 00002: val_loss improved from 0.55380 to 0.47923, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.47923 to 0.36543, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.36543

Epoch 00005: val_loss did not improve from 0.36543

Epoch 00006: val_loss did not improve from 0.36543

Epoch 00007: val_loss did not improve from 0.36543

Epoch 00008: val_loss did not improve from 0.36543

Epoch 00009: val_loss improved from 0.36543 to 0.32931, saving model to best-model-conll.hdfs

Epoch 00010: val_loss did not improve from 0.32931





Epoch 00001: val_loss improved from inf to 0.23632, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.23632

Epoch 00003: val_loss did not improve from 0.23632

Epoch 00004: val_loss improved from 0.23632 to 0.15157, saving model to best-model-conll.hdfs

Epoch 00005: val_loss improved from 0.15157 to 0.14990, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.14990

Epoch 00007: val_loss did not improve from 0.14990

Epoch 00008: val_loss did not improve from 0.14990

Epoch 00009: val_loss did not improve from 0.14990

Epoch 00010: val_loss did not improve from 0.14990





Epoch 00001: val_loss improved from inf to 0.20158, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.20158

Epoch 00003: val_loss did not improve from 0.20158

Epoch 00004: val_loss did not improve from 0.20158

Epoch 00005: val_loss did not improve from 0.20158

Epoch 00006: val_loss improved from 0.20158 to 0.16086, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.16086

Epoch 00008: val_loss did not improve from 0.16086

Epoch 00009: val_loss did not improve from 0.16086

Epoch 00010: val_loss did not improve from 0.16086





Epoch 00001: val_loss improved from inf to 0.23248, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.23248

Epoch 00003: val_loss did not improve from 0.23248

Epoch 00004: val_loss did not improve from 0.23248

Epoch 00005: val_loss did not improve from 0.23248

Epoch 00006: val_loss did not improve from 0.23248

Epoch 00007: val_loss did not improve from 0.23248

Epoch 00008: val_loss did not improve from 0.23248

Epoch 00009: val_loss did not improve from 0.23248

Epoch 00010: val_loss did not improve from 0.23248





Epoch 00001: val_loss improved from inf to 0.45758, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.45758 to 0.22609, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.22609 to 0.17877, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.17877 to 0.12828, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.12828

Epoch 00006: val_loss did not improve from 0.12828

Epoch 00007: val_loss did not improve from 0.12828

Epoch 00008: val_loss did not improve from 0.12828

Epoch 00009: val_loss did not improve from 0.12828

Epoch 00010: val_loss did not improve from 0.12828





Epoch 00001: val_loss improved from inf to 0.11316, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11316

Epoch 00003: val_loss did not improve from 0.11316

Epoch 00004: val_loss did not improve from 0.11316

Epoch 00005: val_loss did not improve from 0.11316

Epoch 00006: val_loss did not improve from 0.11316

Epoch 00007: val_loss did not improve from 0.11316

Epoch 00008: val_loss did not improve from 0.11316

Epoch 00009: val_loss did not improve from 0.11316

Epoch 00010: val_loss did not improve from 0.11316





Epoch 00001: val_loss improved from inf to 0.10808, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.10808

Epoch 00003: val_loss improved from 0.10808 to 0.07420, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.07420

Epoch 00005: val_loss did not improve from 0.07420

Epoch 00006: val_loss did not improve from 0.07420

Epoch 00007: val_loss did not improve from 0.07420

Epoch 00008: val_loss did not improve from 0.07420

Epoch 00009: val_loss did not improve from 0.07420

Epoch 00010: val_loss did not improve from 0.07420





Epoch 00001: val_loss improved from inf to 0.08125, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.08125

Epoch 00003: val_loss did not improve from 0.08125

Epoch 00004: val_loss did not improve from 0.08125

Epoch 00005: val_loss did not improve from 0.08125

Epoch 00006: val_loss did not improve from 0.08125

Epoch 00007: val_loss did not improve from 0.08125

Epoch 00008: val_loss did not improve from 0.08125

Epoch 00009: val_loss did not improve from 0.08125

Epoch 00010: val_loss did not improve from 0.08125




AlgoCrossValIter - 10
Model: "sequential_80"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
hidden1 (Dense)              (None, 640)               135040    
_________________________________________________________________
hidden2 (Dense)              (None, 160)               102560    
_________________________________________________________________
outputlayer (Dense)          (None, 5)                 805       
Total params: 238,405
Trainable params: 238,405
Non-trainable params: 0
_________________________________________________________________

Epoch 00001: val_loss improved from inf to 0.53817, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.53817 to 0.53100, saving model to best-model-conll.hdfs

Epoch 00003: val_loss improved from 0.53100 to 0.52105, saving model to best-model-conll.hdfs

Epoch 00004: val_loss improved from 0.52105 to 0.49775, saving model to b




Epoch 00002: val_loss improved from 0.64542 to 0.64277, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.64277

Epoch 00004: val_loss improved from 0.64277 to 0.61420, saving model to best-model-conll.hdfs

Epoch 00005: val_loss did not improve from 0.61420

Epoch 00006: val_loss did not improve from 0.61420

Epoch 00007: val_loss did not improve from 0.61420

Epoch 00008: val_loss improved from 0.61420 to 0.60466, saving model to best-model-conll.hdfs

Epoch 00009: val_loss did not improve from 0.60466

Epoch 00010: val_loss did not improve from 0.60466
number of correct positive predictions is 0

Epoch 00001: val_loss improved from inf to 0.44373, saving model to best-model-conll.hdfs





Epoch 00002: val_loss improved from 0.44373 to 0.39089, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.39089

Epoch 00004: val_loss did not improve from 0.39089

Epoch 00005: val_loss improved from 0.39089 to 0.35359, saving model to best-model-conll.hdfs

Epoch 00006: val_loss did not improve from 0.35359

Epoch 00007: val_loss improved from 0.35359 to 0.34780, saving model to best-model-conll.hdfs

Epoch 00008: val_loss improved from 0.34780 to 0.34340, saving model to best-model-conll.hdfs

Epoch 00009: val_loss did not improve from 0.34340

Epoch 00010: val_loss did not improve from 0.34340





Epoch 00001: val_loss improved from inf to 0.15670, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.15670

Epoch 00003: val_loss did not improve from 0.15670

Epoch 00004: val_loss did not improve from 0.15670

Epoch 00005: val_loss did not improve from 0.15670

Epoch 00006: val_loss did not improve from 0.15670

Epoch 00007: val_loss did not improve from 0.15670

Epoch 00008: val_loss did not improve from 0.15670

Epoch 00009: val_loss did not improve from 0.15670

Epoch 00010: val_loss did not improve from 0.15670





Epoch 00001: val_loss improved from inf to 0.15966, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.15966

Epoch 00003: val_loss did not improve from 0.15966

Epoch 00004: val_loss did not improve from 0.15966

Epoch 00005: val_loss did not improve from 0.15966

Epoch 00006: val_loss did not improve from 0.15966

Epoch 00007: val_loss did not improve from 0.15966

Epoch 00008: val_loss did not improve from 0.15966

Epoch 00009: val_loss did not improve from 0.15966

Epoch 00010: val_loss did not improve from 0.15966





Epoch 00001: val_loss improved from inf to 0.23820, saving model to best-model-conll.hdfs

Epoch 00002: val_loss improved from 0.23820 to 0.22518, saving model to best-model-conll.hdfs

Epoch 00003: val_loss did not improve from 0.22518

Epoch 00004: val_loss did not improve from 0.22518

Epoch 00005: val_loss did not improve from 0.22518

Epoch 00006: val_loss did not improve from 0.22518

Epoch 00007: val_loss did not improve from 0.22518

Epoch 00008: val_loss did not improve from 0.22518

Epoch 00009: val_loss did not improve from 0.22518

Epoch 00010: val_loss did not improve from 0.22518





Epoch 00001: val_loss improved from inf to 0.30119, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.30119

Epoch 00003: val_loss improved from 0.30119 to 0.17289, saving model to best-model-conll.hdfs

Epoch 00004: val_loss did not improve from 0.17289

Epoch 00005: val_loss did not improve from 0.17289

Epoch 00006: val_loss improved from 0.17289 to 0.16322, saving model to best-model-conll.hdfs

Epoch 00007: val_loss did not improve from 0.16322

Epoch 00008: val_loss did not improve from 0.16322

Epoch 00009: val_loss did not improve from 0.16322

Epoch 00010: val_loss did not improve from 0.16322





Epoch 00001: val_loss improved from inf to 0.11652, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.11652

Epoch 00003: val_loss did not improve from 0.11652

Epoch 00004: val_loss did not improve from 0.11652

Epoch 00005: val_loss did not improve from 0.11652

Epoch 00006: val_loss did not improve from 0.11652

Epoch 00007: val_loss did not improve from 0.11652

Epoch 00008: val_loss did not improve from 0.11652

Epoch 00009: val_loss did not improve from 0.11652

Epoch 00010: val_loss did not improve from 0.11652





Epoch 00001: val_loss improved from inf to 0.06575, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.06575

Epoch 00003: val_loss did not improve from 0.06575

Epoch 00004: val_loss did not improve from 0.06575

Epoch 00005: val_loss did not improve from 0.06575

Epoch 00006: val_loss did not improve from 0.06575

Epoch 00007: val_loss did not improve from 0.06575

Epoch 00008: val_loss did not improve from 0.06575

Epoch 00009: val_loss did not improve from 0.06575

Epoch 00010: val_loss did not improve from 0.06575





Epoch 00001: val_loss improved from inf to 0.09443, saving model to best-model-conll.hdfs

Epoch 00002: val_loss did not improve from 0.09443

Epoch 00003: val_loss did not improve from 0.09443

Epoch 00004: val_loss did not improve from 0.09443

Epoch 00005: val_loss did not improve from 0.09443

Epoch 00006: val_loss did not improve from 0.09443

Epoch 00007: val_loss did not improve from 0.09443

Epoch 00008: val_loss did not improve from 0.09443

Epoch 00009: val_loss did not improve from 0.09443

Epoch 00010: val_loss did not improve from 0.09443




In [667]:
normalization_strategy

In [668]:
resultCrossVal.to_csv("results.csv")
resultCrossVal

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
P_val,63.75,72.404,67.81,66.536,67.865,68.0,64.167,63.686,68.893,68.96
P_train,69.095,79.091,69.584,70.673,77.774,77.265,76.103,69.372,79.425,77.137
P_ewo,64.048,74.806,65.209,64.075,69.413,66.189,64.608,64.554,73.83,69.71
R_val,49.684,54.018,48.159,53.109,53.541,48.606,49.279,49.662,53.239,51.217
R_train,47.794,50.405,48.73,50.237,49.644,47.651,46.848,48.133,48.347,50.267
R_ewo,37.778,37.499,36.666,37.963,38.334,37.037,36.481,37.315,36.574,38.241
F1-val,68.51375,75.54125,68.87625,72.6725,73.3825,67.9225,67.65,67.08,74.2625,71.5125
F1-train,68.8475,65.832222,71.23125,72.87875,64.277778,61.414444,61.405556,69.68125,64.088889,64.472222
F1-ewo,57.125,52.977778,58.26125,58.77375,52.271111,58.0125,56.64625,57.2075,51.534444,52.908889


In [669]:
resultCrossVal.mean(axis=1).to_frame()

Unnamed: 0,0
P_val,67.2071
P_train,74.5519
P_ewo,67.6442
R_val,51.0514
R_train,48.8056
R_ewo,37.3888
F1-val,70.741375
F1-train,66.412986
F1-ewo,55.571847


In [670]:
resultCrossVal.std(axis=1).to_frame()

Unnamed: 0,0
P_val,2.756608
P_train,4.314279
P_ewo,4.074844
R_val,2.24433
R_train,1.260628
R_ewo,0.687396
F1-val,3.092749
F1-train,4.024292
F1-ewo,2.802774


In [671]:
# trainByTagResult.to_csv("results/train-by-tag.csv")
# trainByTagResult

In [672]:
# trainByTagResult.mean(axis=1).to_frame()

In [673]:
# trainByTagResult.std(axis=1).to_frame()

In [674]:
# testByTagResult.to_csv("results/test-by-tag.csv")
# testByTagResult

In [675]:
# testByTagResult.mean(axis=1).to_frame()

In [676]:
# testByTagResult.std(axis=1).to_frame()

In [677]:
# ewoByTagResult.to_csv("results/ewo-by-tag.csv")

In [678]:
# ewoByTagResult = pd.read_csv("results/ewo-by-tag.csv", index_col=0)
# ewoByTagResult

In [679]:
# ewoByTagResult.mean(axis=1).to_frame()

In [680]:
# ewoByTagResult.std(axis=1).to_frame()

In [681]:
# columns = en_fingerprints.columns

# print("Pred", "Real", "Freq", "Word", sep="\t")
# for c in columns:
#     prediction = model.predict(en_fingerprints[c].values.reshape((1, 210)))
#     pred_tag = int2tag[np.argmax(prediction)]
#     real_tag = en_corpus[en_corpus.word == c].iloc[0]['ne-tag']
    
#     if pred_tag != real_tag:
#         print(pred_tag, real_tag, en_fingerprints[c].max(), c, sep="\t")