# deepSimDEF for Prediction of Protein-Protein Interactions (PPI)

## Importing the required libraries

In [1]:
import os
import sys
import random
import operator
import numpy as np
import keras.backend as K

from keras import regularizers
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.callbacks import EarlyStopping

from scipy.stats.stats import pearsonr, spearmanr
from sklearn.metrics import f1_score

from deepSimDEF.tools.PPI_data_provider import gene_pair_data_reader, input_data_maker
from deepSimDEF.tools.PPI_model_saver import save_model, save_embeddings
from deepSimDEF.netwroks.PPI_network import PPI_model_builder

np.random.seed(321)


Using TensorFlow backend.


## Setting variables, reading GO annotations of genes, and preparing them for networks

In [2]:
FOLD = 10
DROPOUT = 0.3
MAX_POOL = True

PRE_TRAINED = True
UPDATABLE = True

ACTIVATION_HIDDEN = 'relu'
ACTIVATION_HIGHWAY = 'sigmoid'
ACTIVATION_OUTPUT = 'sigmoid'

EMBEDDING_DIM = 100
NB_EPOCH = 20
BATCH_SIZE = 256
OPTIMIZER = 'adadelta'

IEA = True
SEQ = False

TRANSFER_LEARNING = False

SAVE_MODEL = True
SAVE_EMBEDDINGS = True

SUB_ONTOLOGY = ['BP', 'CC', 'MF']
SUB_ONTOLOGY_work = ['BP', 'CC', 'MF']

WITH_HIGH_THROUPUT = False

SBOs = {}
for sbo in SUB_ONTOLOGY_work:
    if sbo == 'BP':
        SBOs[sbo] = 'Biolobical Process (BP)'
    elif sbo == 'CC':
        SBOs[sbo] = 'Cellular Component (CC)'
    elif sbo == 'MF':
        SBOs[sbo] = 'Molecular Function (MF)'
    
WE = {}
embedding_save = {}
MAX_SEQUENCE_LENGTH = {}
MAX_SEQUENCE_LENGTH_INDEX = {}
sequences = {}
word_indeces = {}
protein_index = {}    
    
for sbo in SUB_ONTOLOGY:
    WE[sbo] = 'deepSimDEF/embeddings/GO_' + sbo + '_Embeddings_100D.emb'
    embedding_save[sbo] = 'GO_' + sbo + '_Embeddings_100D_Updated'
    MAX_SEQUENCE_LENGTH[sbo] = 0
    MAX_SEQUENCE_LENGTH_INDEX[sbo] = []
    sequences[sbo] = []
    word_indeces[sbo] = []
    protein_index[sbo] = {}
    
    if IEA:
        file_reader = open('deepSimDEF/gene_annotations/gene_product_GO_terms_with_IEA' + '.' + sbo)
    else:
        file_reader = open('deepSimDEF/gene_annotations/gene_product_GO_terms_without_IEA' + '.' + sbo)
    
    index_counter = 1
    texts = []
    for line in file_reader:
        values = line.rstrip().replace(':', '').split()
        protein_index[sbo][values[0]] = index_counter
        if len(values[1:]) > MAX_SEQUENCE_LENGTH[sbo]:
            MAX_SEQUENCE_LENGTH[sbo] = len(values[1:])
            MAX_SEQUENCE_LENGTH_INDEX[sbo] = index_counter
        texts.append(' '.join(values[1:]))
        index_counter += 1
        
    tokenizer = Tokenizer(lower=False, num_words=0)
    tokenizer.fit_on_texts(texts)
    sequences[sbo] = tokenizer.texts_to_sequences(texts)

    word_indeces[sbo] = tokenizer.word_index
    
    if sbo == 'BP':
        print "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Biolobical Process (BP) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
    elif sbo == 'CC':
        print "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Cellular Component (CC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
    elif sbo == 'MF':
        print "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Molecular Function (MF) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
            
    print "Found " + str(len(word_indeces[sbo])) + " unique tokens in " + sbo

    MOST_FREQUENT_LEVEL = 10
    print 'Top', MOST_FREQUENT_LEVEL, 'Most Frequent GO terms annotating sequences in', sbo + ":"
    for GO_ID, indx in sorted(word_indeces[sbo].items(), key=operator.itemgetter(1))[:MOST_FREQUENT_LEVEL]:
        print '  >>>', GO_ID, '   ' ,indx
        
    print "Number of annotated gene products by '" + sbo + "' terms: " + str(len(sequences[sbo]))
    print "Maximum annotation length of one gene product ('" + sbo + "' sub-ontology):", MAX_SEQUENCE_LENGTH[sbo]
    print "Index/line of the gene product with maximum annotations ('" + sbo + "' sub-ontology):", MAX_SEQUENCE_LENGTH_INDEX[sbo]
    print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"
    
    file_reader.close()
    
    
fully_annotated_sequences = []   # we keep only those genes for which we have annatoation from all ontologies (defined in SUB_ONTOLOGY variable)
for sbo in SUB_ONTOLOGY:
    fully_annotated_sequences.append(protein_index[sbo].keys())
fully_annotated_sequences = list(set(fully_annotated_sequences[0]).intersection(*fully_annotated_sequences))
print "Number of fully annotated gene products:", len(fully_annotated_sequences)



~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Biolobical Process (BP) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Found 3054 unique tokens in BP
Top 10 Most Frequent GO terms annotating sequences in BP:
  >>> GO0006810     1
  >>> GO0006351     2
  >>> GO0006355     3
  >>> GO0015031     4
  >>> GO0055114     5
  >>> GO0007049     6
  >>> GO0006414     7
  >>> GO0008152     8
  >>> GO0006412     9
  >>> GO0055085     10
Number of annotated gene products by 'BP' terms: 5680
Maximum annotation length of one gene product ('BP' sub-ontology): 44
Index/line of the gene product with maximum annotations ('BP' sub-ontology): 294
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Cellular Component (CC) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Found 782 unique tokens in CC
Top 10 Most Frequent GO terms annotating sequences in CC:
  >>> GO0005737     1
  >>> GO0005634     2
  >>> GO0016020     3
  >>> GO0016021     4
  >>> GO0005739     5
  >>> GO0005829     6
  >>> GO0005783 

## Reading the gene-pair PPIs: manually curated PPIs and high-throughput PPIs (optional)

In [3]:

input_data_dir = 'deepSimDEF/datasets/PPI_data/PPI_FULL_physical_interactions_manually_curated'
annotation_G1_dic_MC, annotation_G2_dic_MC, interaction_pr_list_MC = gene_pair_data_reader(data_dir=input_data_dir, 
                                                                                           SUB_ONTOLOGY_work=SUB_ONTOLOGY_work, 
                                                                                           fully_annotated_sequences=fully_annotated_sequences, 
                                                                                           sequences=sequences, 
                                                                                           protein_index=protein_index,
                                                                                           MAX_SEQUENCE_LENGTH=MAX_SEQUENCE_LENGTH)


VALIDATION_SPLIT = 1.0/FOLD
indices = np.arange(annotation_G1_dic_MC[sbo].shape[0])
np.random.shuffle(indices)
test_size = int(VALIDATION_SPLIT * annotation_G1_dic_MC[sbo].shape[0])


annotation_G1_dic_HT = []
annotation_G2_dic_HT = []
interaction_pr_list_HT = []

if WITH_HIGH_THROUPUT:
    input_data_dir = 'deepSimDEF/datasets/PPI_data/PPI_FULL_physical_interactions_high_throughput'
    annotation_G1_dic_HT, annotation_G2_dic_HT, interaction_pr_list_HT = gene_pair_data_reader(data_dir=input_data_dir, 
                                                                                               SUB_ONTOLOGY_work=SUB_ONTOLOGY_work, 
                                                                                               fully_annotated_sequences=fully_annotated_sequences, 
                                                                                               sequences=sequences, 
                                                                                               protein_index=protein_index,
                                                                                               MAX_SEQUENCE_LENGTH= MAX_SEQUENCE_LENGTH)


Shape of data tensor 1 (BP): (32956, 44)
Shape of data tensor 2 (BP): (32956, 44)
Shape of similarity tensor (BP): (32956,) 

Shape of data tensor 1 (CC): (32956, 17)
Shape of data tensor 2 (CC): (32956, 17)
Shape of similarity tensor (CC): (32956,) 

Shape of data tensor 1 (MF): (32956, 33)
Shape of data tensor 2 (MF): (32956, 33)
Shape of similarity tensor (MF): (32956,) 

Number of positive classes/interactions: 16478


## printing some information about the setting of the network and the experiment

In [4]:
for sbo in SUB_ONTOLOGY_work:
    print "@@@ " + SBOs[sbo] + " @@@"

if IEA:
    print "^^^ With IEA ^^^"
else:
    print "^^^ Without IEA ^^^"

print "%%% Optimizer:", OPTIMIZER, "%%%"

if PRE_TRAINED:
    if UPDATABLE:
        print "+++ Pre-trained (updatable) +++"
    else:
        print "+++ Pre-trained (not updatable) +++"
else:
    print "+++ NOT Pre-trained +++"

@@@ Biolobical Process (BP) @@@
@@@ Cellular Component (CC) @@@
@@@ Molecular Function (MF) @@@
^^^ With IEA ^^^
%%% Optimizer: adadelta %%%
+++ Pre-trained (updatable) +++


## Making a 10-fold cross-validation experiment

In [5]:
models = []
embedding_layers = []
bests = []
thresholds = []
B = []

for m in range(0, FOLD):
    network = PPI_model_builder(EMBEDDING_DIM, 
                                 model_ind=m, 
                                 MAX_SEQUENCE_LENGTH=MAX_SEQUENCE_LENGTH, 
                                 WORD_EMBEDDINGS=WE,
                                 SUB_ONTOLOGY_work=SUB_ONTOLOGY_work,
                                 word_indeces=word_indeces, 
                                 ACTIVATION_HIDDEN=ACTIVATION_HIDDEN, 
                                 ACTIVATION_HIGHWAY=ACTIVATION_HIGHWAY, 
                                 ACTIVATION_OUTPUT=ACTIVATION_OUTPUT, 
                                 DROPOUT=DROPOUT, 
                                 OPTIMIZER=OPTIMIZER)
    models.append(network[0])
    embedding_layers.append(network[1])
    bests.append(0)
    thresholds.append(0)
    B.append({})

Loaded 29375 word vectors for BP (Model 1)
Loaded 4046 word vectors for CC (Model 1)
Loaded 10541 word vectors for MF (Model 1)




Model for Fold Number 1 Instantiated!!

Loaded 29375 word vectors for BP (Model 2)
Loaded 4046 word vectors for CC (Model 2)
Loaded 10541 word vectors for MF (Model 2)
Model for Fold Number 2 Instantiated!!

Loaded 29375 word vectors for BP (Model 3)
Loaded 4046 word vectors for CC (Model 3)
Loaded 10541 word vectors for MF (Model 3)
Model for Fold Number 3 Instantiated!!

Loaded 29375 word vectors for BP (Model 4)
Loaded 4046 word vectors for CC (Model 4)
Loaded 10541 word vectors for MF (Model 4)
Model for Fold Number 4 Instantiated!!

Loaded 29375 word vectors for BP (Model 5)
Loaded 4046 word vectors for CC (Model 5)
Loaded 10541 word vectors for MF (Model 5)
Model for Fold Number 5 Instantiated!!

Loaded 29375 word vectors for BP (Model 6)
Loaded 4046 word vectors for CC (Model 6)
Loaded 10541 word vectors for MF (Model 6)
Model for Fold Number 6 Instantiated!!

Loaded 29375 word vectors for BP (Model 7)
Loaded 4046 word vectors for CC (Model 7)
Loaded 10541 word vectors for MF (M

## Training the deepSimDEF netwrok for PPI task

In [6]:
RES = {}
best_total_f1 = 0
best_threshold = 0

early_stopping = EarlyStopping(monitor='val_loss', patience = 3)
cor = {}

best_epoch = 0

def pred(A, treshold = 0.5):
    B = []
    for n in A:
        if treshold < n:
            B.append(1)
        else:
            B.append(0)
    return B

def run_my_model(model_index, seq):
    X_train, y_train, X_test, y_test = input_data_maker(model_id=model_index, 
                                                        test_size=test_size, 
                                                        indices=indices, 
                                                        annotation_G1_dic_MC=annotation_G1_dic_MC, 
                                                        annotation_G2_dic_MC=annotation_G2_dic_MC, 
                                                        interaction_pr_list_MC=interaction_pr_list_MC, 
                                                        annotation_G1_dic_HT=annotation_G1_dic_HT,
                                                        annotation_G2_dic_HT=annotation_G2_dic_HT,
                                                        interaction_pr_list_HT=interaction_pr_list_HT,
                                                        SUB_ONTOLOGY_work=SUB_ONTOLOGY_work,
                                                        WITH_HIGH_THROUPUT=False)
    model = models[model_index]
    history = model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=1, validation_data=(X_test,y_test))
    p =  model.predict(X_test)
    for i in seq:
        predictions = np.asarray(pred(p, i))
        B[model_index][i] = np.round(f1_score(y_test, predictions, average='binary'), 5)
    pr = max(B[model_index].iteritems(), key=operator.itemgetter(1))[1]
    thresholds[model_index] = max(B[model_index].iteritems(), key=operator.itemgetter(1))[0]
    st = ''
    b = bests[model_index]
    if bests[model_index] < pr: 
        bests[model_index] = pr
        treshold = thresholds[model_index]
        st = "+ " + str(bests[model_index])
    else:
        st = "- " + str(bests[model_index])
    print ">>> F1-score (" + str(model_index + 1) + "):", pr, "Best (" + str(model_index + 1) + "):", st, "(" + str(thresholds[model_index]) + " : " + str(np.round(pr - b, 5)) + ")" + "\n"

def get_results(epoch_no):
    for i in seq:
        RES[i] = 0
        for j in range(FOLD):
            RES[i] += B[j][i]/FOLD
    res = max(RES.iteritems(), key=operator.itemgetter(1))[1]
    threshold_res = max(RES.iteritems(), key=operator.itemgetter(1))[0]
    cor[epoch_no + 1] = res
    total_max = 0
    for i, j in sorted(cor.items(), key=operator.itemgetter(1)):
        if total_max < j:
            total_max = j
            best_epoch = i
            threshold_best = threshold_res
    
    print "F1-score for this epoch:", res, "(", threshold_res, ")-- Best F1-score::==>", str(total_max), "(", threshold_best, ")  (for epoch #", str(best_epoch), "of", str(NB_EPOCH), "epochs)" + "\n"

def get_final_result():
    final_max = 0
    best_epoch = 0
    for i, j in sorted(cor.items(), key=operator.itemgetter(1)):
        if final_max < j:
            final_max = j
            best_epoch = i
        
    print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FINAL RESULT ~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + "\n" 
    print "For embedding size '" + str(EMBEDDING_DIM) + "' best number of epochs is '" + str(i) + "' with F1-score of: " + str(final_max) +"\n"
    
for e in range(NB_EPOCH):
     
    print "~~~~~~~~~ " + '/'.join(SUB_ONTOLOGY_work) +" ~~~~~~~~~~~~~~ EPOCH " + str(e + 1) + "/" + str(NB_EPOCH) + " (Embedding dimention: " + str(EMBEDDING_DIM) + ") ~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" 
    seq = [0.5]
    if SEQ:
        seq = np.arange(0.11, 0.9, 0.01)
        
    for index in range(0, len(models)):
        run_my_model(index, seq)
    
    get_results(e)

get_final_result()

~~~~~~~~~ BP/CC/MF ~~~~~~~~~~~~~~ EPOCH 1/20 (Embedding dimention: 100) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (1): 0.80753 Best (1): + 0.80753 (0.5 : 0.80753)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (2): 0.8257 Best (2): + 0.8257 (0.5 : 0.8257)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (3): 0.79813 Best (3): + 0.79813 (0.5 : 0.79813)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (4): 0.82992 Best (4): + 0.82992 (0.5 : 0.82992)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (5): 0.80704 Best (5): + 0.80704 (0.5 : 0.80704)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (6): 0.8267 Best (6): + 0.8267 (0.5 : 0.8267)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (7): 0.81512 Best (7): + 0.81512 (0.5 : 0.81512)

Train on 59322 samples, validate on 3295 samples
E

>>> F1-score (1): 0.8592 Best (1): + 0.8592 (0.5 : 0.00665)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (2): 0.85951 Best (2): + 0.85951 (0.5 : 0.00105)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (3): 0.86059 Best (3): + 0.86059 (0.5 : 0.00571)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (4): 0.87562 Best (4): + 0.87562 (0.5 : 0.01321)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (5): 0.85884 Best (5): + 0.85884 (0.5 : 0.01436)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (6): 0.87346 Best (6): + 0.87346 (0.5 : 0.00713)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (7): 0.86327 Best (7): + 0.86327 (0.5 : 0.01315)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (8): 0.86659 Best (8): + 0.86659 (0.5 : 0.01042)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (9): 0.86889 

>>> F1-score (1): 0.87624 Best (1): - 0.88169 (0.5 : -0.00545)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (2): 0.87786 Best (2): - 0.87826 (0.5 : -0.0004)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (3): 0.87667 Best (3): + 0.87667 (0.5 : 0.00325)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (4): 0.89663 Best (4): + 0.89663 (0.5 : 0.01311)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (5): 0.879 Best (5): + 0.879 (0.5 : 0.00211)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (6): 0.8944 Best (6): + 0.8944 (0.5 : 0.00849)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (7): 0.88173 Best (7): + 0.88173 (0.5 : 0.01003)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (8): 0.87799 Best (8): - 0.88129 (0.5 : -0.0033)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (9): 0.88136 Bes

>>> F1-score (2): 0.89022 Best (2): + 0.89022 (0.5 : 0.00185)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (3): 0.88379 Best (3): + 0.88379 (0.5 : 0.002)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (4): 0.90376 Best (4): + 0.90376 (0.5 : 0.00316)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (5): 0.88994 Best (5): + 0.88994 (0.5 : 0.00415)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (6): 0.90338 Best (6): + 0.90338 (0.5 : 0.00362)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (7): 0.88736 Best (7): + 0.88736 (0.5 : 0.00396)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (8): 0.89244 Best (8): - 0.89358 (0.5 : -0.00114)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (9): 0.89262 Best (9): + 0.89262 (0.5 : 0.00015)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (10): 0.8927

>>> F1-score (2): 0.89922 Best (2): + 0.89922 (0.5 : 0.00115)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (3): 0.89409 Best (3): + 0.89409 (0.5 : 0.00687)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (4): 0.90775 Best (4): - 0.9078 (0.5 : -5e-05)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (5): 0.89642 Best (5): + 0.89642 (0.5 : 0.00146)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (6): 0.90806 Best (6): + 0.90806 (0.5 : 0.00468)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (7): 0.89763 Best (7): + 0.89763 (0.5 : 0.00573)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (8): 0.90049 Best (8): - 0.90255 (0.5 : -0.00206)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (9): 0.89751 Best (9): - 0.89757 (0.5 : -6e-05)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (10): 0.90045

>>> F1-score (2): 0.90058 Best (2): - 0.90666 (0.5 : -0.00608)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (3): 0.8979 Best (3): - 0.89968 (0.5 : -0.00178)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (4): 0.91115 Best (4): - 0.91543 (0.5 : -0.00428)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (5): 0.90233 Best (5): - 0.90296 (0.5 : -0.00063)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (6): 0.90488 Best (6): - 0.90973 (0.5 : -0.00485)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (7): 0.90024 Best (7): + 0.90024 (0.5 : 0.00064)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (8): 0.89521 Best (8): - 0.91146 (0.5 : -0.01625)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (9): 0.9049 Best (9): + 0.9049 (0.5 : 0.00506)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (10): 0.

>>> F1-score (2): 0.90882 Best (2): + 0.90882 (0.5 : 0.00124)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (3): 0.90107 Best (3): - 0.90109 (0.5 : -2e-05)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (4): 0.9109 Best (4): - 0.91543 (0.5 : -0.00453)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (5): 0.90524 Best (5): + 0.90524 (0.5 : 0.00228)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (6): 0.91194 Best (6): - 0.9128 (0.5 : -0.00086)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (7): 0.90219 Best (7): - 0.90315 (0.5 : -0.00096)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (8): 0.90936 Best (8): - 0.91146 (0.5 : -0.0021)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (9): 0.90564 Best (9): + 0.90564 (0.5 : 0.00074)

Train on 59322 samples, validate on 3295 samples
Epoch 1/1
>>> F1-score (10): 0.901

## Saving the model and the embeddings

In [7]:
if SAVE_MODEL:
    save_model(FOLD=FOLD, models=models)
    
if SAVE_EMBEDDINGS:
    save_embeddings(FOLD=FOLD, 
                           embedding_layers=embedding_layers,
                           word_indeces=word_indeces, 
                           SUB_ONTOLOGY_work=SUB_ONTOLOGY_work,
                           embedding_save=embedding_save)
    

Saving model 1 to disk ...
The Model and its Weights Are Saved!!

Saving model 2 to disk ...
The Model and its Weights Are Saved!!

Saving model 3 to disk ...
The Model and its Weights Are Saved!!

Saving model 4 to disk ...
The Model and its Weights Are Saved!!

Saving model 5 to disk ...
The Model and its Weights Are Saved!!

Saving model 6 to disk ...
The Model and its Weights Are Saved!!

Saving model 7 to disk ...
The Model and its Weights Are Saved!!

Saving model 8 to disk ...
The Model and its Weights Are Saved!!

Saving model 9 to disk ...
The Model and its Weights Are Saved!!

Saving model 10 to disk ...
The Model and its Weights Are Saved!!

The Word Embeddings Are Saved!!
