## Neural Network Training - Pretrained Embedding Model

### Balanced Dataset

In [None]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import BinaryCrossentropy

Load the data

In [None]:
training_feats_filepath = "~/scratch/datasets/yale_new_haven/training_test_sets/balanced_dataset/features/pretrained_embeddings/PubMedBERT/regression_nn/balanced_training_set.csv"
training_labels_filepath = "~/scratch/datasets/yale_new_haven/training_test_sets/balanced_dataset/labels/yale_new_haven_balanced_training_labels.csv"

In [None]:
X_train = pd.read_csv(training_feats_filepath)
y_train = pd.read_csv(training_labels_filepath)

In [None]:
train_ids = X_train['ID'].astype('int32')
X_train = X_train[[col for col in X_train if col != 'ID']]

Setup the network

In [None]:
binary_crossentropy = tf.keras.losses.BinaryCrossentropy()

In [None]:
model_1 = Sequential([
    Dense(512, activation="relu"),
    Dropout(0.3),
    Dense(256, activation="relu"),
    Dropout(0.2),
    Dense(1, activation="sigmoid")
])
model_1.compile(optimizer="adam", loss=binary_crossentropy, metrics=['accuracy'])

In [None]:
earlyStopping = EarlyStopping(monitor='val_loss', patience=3)

In [None]:
history = model_1.fit(
    X_train, 
    y_train, 
    epochs=10, 
    batch_size=64, 
    callbacks=[earlyStopping],
    validation_split=0.1
)

In [None]:
model_1.evaluate(X_train, y_train)

Save the model

In [None]:
nn_filepath = "/home/mila/d/david.hobson/scratch/models/balanced/experiments/pretrained/PubMedBERT/pretrained_embedding/"

In [None]:
model_1.save(nn_filepath)