# Neural Network

### Utility functions

In [1]:
import json
from flair.data import Sentence
import progressbar
import pickle
import sys

path_dir = "/mnt/d/ASE/HLTProject/BioASQ2020"

if path_dir not in sys.path:
    sys.path.append(path_dir)


In [2]:
from data import load_data_yesno, generate_embeddings_yesno, generate_embeddings_yesno_pooling, load_embeddings

### Importing training data

In [3]:
data=load_data_yesno("../data/training8b.json")

### Creating embedding

In [4]:
from flair.embeddings import ELMoEmbeddings

embeddings_elmo_pubmed = ELMoEmbeddings('pubmed') 

In [5]:
from flair.embeddings import DocumentPoolEmbeddings, Sentence

pooling_model = DocumentPoolEmbeddings([embeddings_elmo_pubmed])

In [6]:
embeddings = generate_embeddings_yesno_pooling(pooling_model, data[0:10])

0% |                                                                        | 11% |########                                                                | 22% |################                                                        | 33% |########################                                                | 44% |################################                                        | 55% |########################################                                | 66% |################################################                        | 77% |########################################################                | 88% |################################################################        |100% |########################################################################|100% |########################################################################|


In [7]:
embeddings[0][0].data

tensor([-0.1104, -0.1032, -0.0877,  ..., -0.3252, -0.6331,  0.4622])

### Train test split

In [8]:
import numpy as np
VALIDATION_SPLIT = 0.33

In [9]:
emb_numpy = np.array(embeddings)
indices = np.arange(emb_numpy.shape[0])
np.random.shuffle(indices)

In [130]:
data = emb_numpy[indices,0::2]
labels = np.array(emb_numpy[indices,1], dtype=np.float)
nb_validation_samples = int(VALIDATION_SPLIT * data.shape[0])

data = np.array([np.concatenate([el[0].data, el[1].data]) for el in data])

x_train = data[:-nb_validation_samples]
y_train = labels[:-nb_validation_samples]
x_val = data[-nb_validation_samples:]
y_val = labels[-nb_validation_samples:]

In [114]:
# x_train = [np.concatenate([el[0].data, el[1].data]) for el in x_train]
# x_train = np.array(x_train, dtype=np.float)
# x_train


## Build the model

In [131]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.losses import binary_crossentropy, categorical_crossentropy
from tensorflow.python.keras.callbacks import TensorBoard

In [132]:
HIDDEN_SIZE = 128
model = Sequential()

model.add(Dense(32, input_dim = 6144, activation= 'relu'))
model.add(Dense(1, activation='sigmoid'))

In [133]:
# from keras.utils import plot_model
# plot_model(model, show_shapes=True)

In [134]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

### In order to display evaluation metrics of our model we create a callback to tensorboard

In [142]:
import datetime
import os
# log_dir = os.path.join("..\logsTB\\"+ datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
log_dir = os.path.join("./logsTB/"+ datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

### Creating the callback to save the model status every 5 epochs

In [148]:
checkpoint_path = "my_model/model-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

In [150]:
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path, 
    verbose=1, 
    save_weights_only=True,
    save_freq=5)

### Fits the model, saves it every 5 epochs and displays it on tensorboard

In [152]:
BATCH_SIZE = 32
EPOCHS = 10

model.fit(x_train, y_train,
          batch_size=BATCH_SIZE,  # default 32
          epochs=EPOCHS,
          validation_data=(x_val, y_val),
          verbose=True,
          callbacks=[tensorboard_callback, cp_callback]
          )

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10

Epoch 00005: saving model to my_model/model-0005.ckpt
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Epoch 00010: saving model to my_model/model-0010.ckpt


<tensorflow.python.keras.callbacks.History at 0x7fa5d4625290>

To dislay in our browser results, we need to run the folling command:

*tensorboard --logdir _path_selezionato_*

It will show on *localhost:6006* 


In [144]:
!tensorboard --logdir log_dir

Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.2.1 at http://localhost:6006/ (Press CTRL+C to quit)
^C


## Salvataggio

In [146]:
model.save('my_model.h5')