# Google Colab Preamble
Imports necessary libraries and performs basic GPU and memory checks on Colab instance. Ends with mounting Google Drive for use as datastore

In [None]:
#change your dependencies as you see fit
import tensorflow as tf
import numpy as np
import pandas as pd
from PIL import Image
import glob 
import matplotlib.pyplot as plt
import h5py 
import pandas as pd
import random 
from sklearn.metrics import classification_report

%matplotlib inline

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

tf.keras.backend.set_image_data_format('channels_last')

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

In [None]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('To enable a high-RAM runtime, select the Runtime > "Change runtime type"')
  print('menu, and then select High-RAM in the Runtime shape dropdown. Then, ')
  print('re-execute this cell.')
else:
  print('You are using a high-RAM runtime!')

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Copying Data
Loads lightly processed data from mounted Google Drive. ```lowva-extrinsicdata.csv``` is a CSV containing patient ID, outcome of low VA progression, and unstructuredw EMR text. ```lowva-structured-02.csv``` is a CSV containing patient ID, outcome of low VA progression, and all structured data fields from EMR (ask Sophia re: definitions). Note: first row of ```lowva-structured-02.csv``` are headers for structured fields (```lowva-extrinsicdata.csv``` does not have such a row).

```pubmed_cbow_vocabulary.txt``` and ```pubmed_cbow_embeddings.h5``` hold the information necessary for the 300-dimensional CBOW trained embeddings on Pubmed Abstracts pertaining to Ophthalmology. ```pubmed_cbow_vocabulary.txt``` is the vocabulary list (listed in order of index they appear in and include the end-padding used by CBOW for the end / beginning of a line). ```pubmed_cbow_embeddings.h5``` is the HDF5 format saved Tensorflow model corresponding to the CBOW training. The embeddings layer can be accessed following model load at index 1 (not 0).

In [None]:
import shutil 
shutil.copy("/content/drive/Shared drives/clinicalmodels/data/lowva/lowva-extrinsicdata.csv", "lowva-extrinsicdata.csv")
shutil.copy("/content/drive/Shared drives/clinicalmodels/data/lowva/lowva-structured-02.csv", "lowva-structured-02.csv")
shutil.copy("/content/drive/Shared drives/clinicalmodels/data/pubmed_cbow_vocabulary.txt", "pubmed_cbow_vocabulary.txt")
shutil.copy("/content/drive/Shared drives/clinicalmodels/data/pubmed_cbow_embeddings.h5", "pubmed_cbow_embeddings.h5")

# Transformer
Code defining Transformer layer, borrowed from [Keras examples: "Text classification with Transformer" tutorial](https://keras.io/examples/nlp/text_classification_with_transformer/). Main modification is ```TokenAndPositionPreTrainedEmbedding``` layer which was created to support pre-trained embeddings (i.e. the Pubmed embeddings)

In [None]:
from tensorflow import keras
from tensorflow.keras import layers
class MultiHeadSelfAttention(layers.Layer):
    def __init__(self, embed_dim, num_heads=8, **kwargs):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        if embed_dim % num_heads != 0:
            raise ValueError(
                f"embedding dimension = {embed_dim} should be divisible by number of heads = {num_heads}"
            )
        self.projection_dim = embed_dim // num_heads
        self.query_dense = layers.Dense(embed_dim)
        self.key_dense = layers.Dense(embed_dim)
        self.value_dense = layers.Dense(embed_dim)
        self.combine_heads = layers.Dense(embed_dim)

    def attention(self, query, key, value):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        # x.shape = [batch_size, seq_len, embedding_dim]
        batch_size = tf.shape(inputs)[0]
        query = self.query_dense(inputs)  # (batch_size, seq_len, embed_dim)
        key = self.key_dense(inputs)  # (batch_size, seq_len, embed_dim)
        value = self.value_dense(inputs)  # (batch_size, seq_len, embed_dim)
        query = self.separate_heads(
            query, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        key = self.separate_heads(
            key, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        value = self.separate_heads(
            value, batch_size
        )  # (batch_size, num_heads, seq_len, projection_dim)
        attention, weights = self.attention(query, key, value)
        attention = tf.transpose(
            attention, perm=[0, 2, 1, 3]
        )  # (batch_size, seq_len, num_heads, projection_dim)
        concat_attention = tf.reshape(
            attention, (batch_size, -1, self.embed_dim)
        )  # (batch_size, seq_len, embed_dim)
        output = self.combine_heads(
            concat_attention
        )  # (batch_size, seq_len, embed_dim)
        return output

    def get_config(self):
        config = super(MultiHeadSelfAttention, self).get_config()
        config.update({'embed_dim': self.embed_dim, 'num_heads': self.num_heads})
        return config

class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadSelfAttention(embed_dim, num_heads)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)
        self.ff_dim = ff_dim
        self.embed_dim = embed_dim 
        self.num_heads = num_heads

    def call(self, inputs, training):
        attn_output = self.att(inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

    def get_config(self):
        config = super(TransformerBlock, self).get_config()
        config.update({'embed_dim': self.embed_dim, 'num_heads': self.num_heads, 'ff_dim': self.ff_dim})
        return config

class TokenAndPositionPreTrainedEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim, pretrained_emb, trainable=False, **kwargs):
        super(TokenAndPositionPreTrainedEmbedding, self).__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim, embeddings_initializer = tf.keras.initializers.Constant(pretrained_emb), trainable=trainable)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
        self.embed_dim = embed_dim
        self.maxlen = maxlen
        self.vocab_size = vocab_size 

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

    def get_config(self):
        config = super(TokenAndPositionPreTrainedEmbedding, self).get_config()
        config.update({'embed_dim': self.embed_dim, 'maxlen': self.maxlen, 'vocab_size': self.vocab_size})
        return config

class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim, **kwargs):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
        self.embed_dim = embed_dim
        self.maxlen = maxlen
        self.vocab_size = vocab_size 

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

    def get_config(self):
        config = super(TokenAndPositionEmbedding, self).get_config()
        config.update({'embed_dim': self.embed_dim, 'maxlen': self.maxlen, 'vocab_size': self.vocab_size})
        return config

# Loading Data
The next block loads the data from the CSVs into three separate numpy arrays (```tokenArray``` for unstructured data, ```structuredArray``` for structured data, and ```outputArray``` for all the output values) and uses the patient ID to insure that they are all in the exact same order (i.e. ```tokenArray[100]``` is the same patient as ```structuredArray[100]``` and had an outcome of ```outputArray[100]```)

This block also loads the Pubmed embeddings into ```embedding_matrix``` and the corresponding vocabulary is loaded into the tokenizer which is used to tokenize the unstructured text which feeds into ```tokenArray``` (for consistency uses the same tokenizer used in the embedding training, a now-deprecated tokenizer from the library ```tensorflow_datasets``` called ```TokenTextEncoder``` which takes an ordered vocabulary list as an argument)

In [None]:
# code to load the pre-trained embeddings and dataset
import tensorflow_datasets as tfds
import numpy as np
import csv

def isfloat(value):
    try:
        float(value)
        return True
    except ValueError:
        return False

with open('pubmed_cbow_vocabulary.txt', 'r') as f:
    vocabulary = []
    for row in f:
        vocabulary.append(row.strip())

vocabulary_size = len(vocabulary) + 2
tokenizer = tfds.deprecated.text.Tokenizer()

width = 1000
embedding_dimension = 300
encoder = tfds.deprecated.text.TokenTextEncoder(vocabulary, tokenizer=tokenizer)
csv.field_size_limit(1310720)
print('\nloading structured data')
with open('lowva-structured-02.csv', 'r') as f:
    r = csv.reader(f)
    i = 0
    structuredDict = {}
    for row in r:
        i += 1
        if i == 1:
            continue
        else:
            key = row[0].strip()
            values = np.array([float(i) for i in row[2:] if isfloat(i)])
            structuredDict[key] = values

print('loading and joining with unstructured data')
with open('lowva-extrinsicdata.csv', 'r') as f:
    r = csv.reader(f)
    i = 0
    tokenArray = []
    outputArray = []
    structuredArray = []
    for row in r:
        i += 1
        if row[0].strip() in structuredDict:
            structvalues = structuredDict[row[0].strip()]
            output = np.array([int(row[1])])
            tokens = encoder.encode(row[2])
            tokens = tokens[0:width]
            if len(tokens) < width:
                tokens = tokens + [0 for i in range(width-len(tokens))]

            structuredArray.append(structvalues)
            outputArray.append(output)
            tokenArray.append(tokens)

del structuredDict
tokenArray = np.array(tokenArray)
outputArray = np.array(outputArray)
structuredArray = np.array(structuredArray)

print(tokenArray.shape, structuredArray.shape, outputArray.shape)

print('loading Pubmed EMR vectors')
model = tf.keras.models.load_model('pubmed_cbow_embeddings.h5')
embedding_matrix = np.zeros((vocabulary_size, embedding_dimension))
e = model.layers[1]
embedding_matrix = e.get_weights()[0]
print(embedding_matrix.shape)
del model

# The Models
The rest of this notebook is highly repetitive and lists every example / corresponding data that was run. The basic structure includes:

*   Using ```tf.data``` to generate the correct data pipelines to feed the model
*   Setting up the model in Keras, training & evaluating the model, and then saving the weights for later
*   Loading the best performance (by AUROC) model and then using the validation set (called ```validation_dataset```, also used in model training for early-stopping) to determine the decision threshold with the best F1 score
*   Evaluating the best models (based on the validation set) on the holdout set (called ```test_dataset```)

Will provide detailed commentary for the first model (1) Structured and will then just have blocks of code afterwards. This is structured so you can just keep running through and it shouldn't break anything

## (1) Structured --> 1024-D-64-1
### Using ```tf.data``` to prep datasets for model
The code uses Tensorflow's ```tf.data``` paradigm to load data into Keras. Because this code is used for multiple configurations of model, the code comments out the data pipelines that are not relevant (in this case, because the first model is Structured data only, the input datasets that include the unstructured data are unnecessary). 

Afterwards, the dataset is loaded from memory and cut up
*    The first 300 items are used as holdout set
*    The second 300 items are used as a validation set for early-stopping and interim analysis of overfitting
*    The remainder of the data is used for training

In [None]:
total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
#total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
#total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

### Keras model setup, training, and evaluation
The Keras models are described by the name of the section. For Structured models, simple feed-forward fully-connected layers are used, generally with Dropout and regularization.  For the Unstructured data, architectures range from those derived from the TextCNN model ([link to original paper](https://arxiv.org/abs/1408.5882)) to Transformers. Combination models also experiment with different time points at which the two separate models are merged. 

Due to the imbalance, training reports more than just accuracy, but also true positives, true negatives, false positives, false negatives, recall, precision, and AUROC (using Tensorflow's native metrics capabilities). Training is also set to early stop based on whether or not validation dataset losses continue to decline (with a patience of 4 epochs) and a 10x reduction in learning rate if two subsequent epochs do not show improvement. 

All model weights are saved so they can be revisited, and the files are manually renamed to include the validation set AUROC in the title. Note: these model weight files are distinct from those used  for the Pubmed embeddings and uses Tensorflow's ```save_weights``` approach which is more lightweight but requires additional files -- see [Tensorflow web site](https://www.tensorflow.org/tutorials/keras/save_and_load#manually_save_weights). 

All models are trained at least 3 times with the model with the best validation set performance saved for later evaluation (see "Loading Model Weights to Evaluate on Holdout" below). For simplicity, the results of the runs including the holdout dataset evaluation (which uses decision threshold tuning to maximize F1) are included here

In [None]:
input2 = tf.keras.Input(shape=(structuredArray.shape[1],))
sl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(input2)
sl = tf.keras.layers.Dropout(0.50)(sl)
sl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(sl)
output = tf.keras.layers.Dense(1, activation='sigmoid')(sl)

#model = tf.keras.Model(inputs = input1, outputs = output)
model = tf.keras.Model(inputs = input2, outputs = output)
#model = tf.keras.Model(inputs = (input1, input2), outputs = output)
model.summary()

metrics = [
    tf.keras.metrics.TruePositives(name='tp'), 
    tf.keras.metrics.FalsePositives(name='fp'), 
    tf.keras.metrics.TrueNegatives(name='tn'), 
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.Recall(name='sen'), 
    tf.keras.metrics.Precision(name='prc'), 
    tf.keras.metrics.AUC(name='auroc'), 
    tf.keras.metrics.BinaryAccuracy(name='acc')
]
callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=4, verbose=1, restore_best_weights=True, min_delta=0.0001), 
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, verbose=1)
]
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='binary_crossentropy', metrics=metrics)
model.fit(train_dataset, epochs=80, validation_data=validation_dataset, verbose=1, callbacks=callbacks)
model.evaluate(validation_dataset)
model.save_weights('struct01_lowva_weights-XXXX')

Results:
*    (Adam = 0.0001) loss: 0.5863 - tp: 75.0000 - fp: 34.0000 - tn: 142.0000 - fn: 49.0000 - sen: 0.6048 - prc: 0.6881 - auroc: 0.8104 - acc: 0.7233 
    *    Holdout: AUROC: 78.53566408157349,
Sensitivity/Recall: 77.2357702255249, Precision: 55.55555820465088, F1: 64.6258513860928
*   (Adam = 0.0001) loss: 0.5925 - tp: 76.0000 - fp: 35.0000 - tn: 141.0000 - fn: 48.0000 - sen: 0.6129 - prc: 0.6847 - auroc: 0.8095 - acc: 0.7233
*    (Adam = 0.0001) loss: 0.5934 - tp: 76.0000 - fp: 34.0000 - tn: 142.0000 - fn: 48.0000 - sen: 0.6129 - prc: 0.6909 - auroc: 0.8076 - acc: 0.7267 

## (2) structured --> 64-D-64-1- ARVO abstract 2021

In [None]:
total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
#total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
#total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

In [None]:
input2 = tf.keras.Input(shape=(structuredArray.shape[1],))
sl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(input2)
sl = tf.keras.layers.Dropout(0.50)(sl)
sl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(sl)
output = tf.keras.layers.Dense(1, activation='sigmoid')(sl)

#model = tf.keras.Model(inputs = input1, outputs = output)
model = tf.keras.Model(inputs = input2, outputs = output)
#model = tf.keras.Model(inputs = (input1, input2), outputs = output)
model.summary()

metrics = [
    tf.keras.metrics.TruePositives(name='tp'), 
    tf.keras.metrics.FalsePositives(name='fp'), 
    tf.keras.metrics.TrueNegatives(name='tn'), 
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.Recall(name='sen'), 
    tf.keras.metrics.Precision(name='prc'), 
    tf.keras.metrics.AUC(name='auroc'), 
    tf.keras.metrics.BinaryAccuracy(name='acc')
]
callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=4, verbose=1, restore_best_weights=True, min_delta=0.0001), 
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, verbose=1)
]
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='binary_crossentropy', metrics=metrics)
model.fit(train_dataset, epochs=80, validation_data=validation_dataset, verbose=1, callbacks=callbacks)
model.evaluate(validation_dataset)
model.save_weights('struct02_lowva_weights-XXXX')

Results
* (Adam = 0.0001) loss: 0.5784 - tp: 73.0000 - fp: 31.0000 - tn: 145.0000 - fn: 51.0000 - sen: 0.5887 - prc: 0.7019 - auroc: 0.8104 - acc: 0.7267
* (Adam = 0.0001) loss: 0.5735 - tp: 75.0000 - fp: 34.0000 - tn: 142.0000 - fn: 49.0000 - sen: 0.6048 - prc: 0.6881 - auroc: 0.8143 - acc: 0.7233
    * Holdout: AUROC: 79.0661871433258, Sensitivity/Recall: 74.79674816131592, Precision: 58.598726987838745, F1: 65.71428633815779
* (Adam = 0.0001) loss: 0.5741 - tp: 73.0000 - fp: 30.0000 - tn: 146.0000 - fn: 51.0000 - sen: 0.5887 - prc: 0.7087 - auroc: 0.8141 - acc: 0.7300 
* (Adam = 0.0001) loss: 0.5786 - tp: 74.0000 - fp: 33.0000 - tn: 143.0000 - fn: 50.0000 - sen: 0.5968 - prc: 0.6916 - auroc: 0.8099 - acc: 0.7233 

## (3) structured --> elasticnet

In [None]:
total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
#total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
#total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

In [None]:
input2 = tf.keras.Input(shape=(structuredArray.shape[1],))
output = tf.keras.layers.Dense(1, activation='sigmoid', kernel_regularizer=tf.keras.regularizers.l1_l2())(input2)

#model = tf.keras.Model(inputs = input1, outputs = output)
model = tf.keras.Model(inputs = input2, outputs = output)
#model = tf.keras.Model(inputs = (input1, input2), outputs = output)
model.summary()

metrics = [
    tf.keras.metrics.TruePositives(name='tp'), 
    tf.keras.metrics.FalsePositives(name='fp'), 
    tf.keras.metrics.TrueNegatives(name='tn'), 
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.Recall(name='sen'), 
    tf.keras.metrics.Precision(name='prc'), 
    tf.keras.metrics.AUC(name='auroc'), 
    tf.keras.metrics.BinaryAccuracy(name='acc')
]
callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=4, verbose=1, restore_best_weights=True, min_delta=0.0001), 
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, verbose=1)
]
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='binary_crossentropy', metrics=metrics)
model.fit(train_dataset, epochs=80, validation_data=validation_dataset, verbose=1, callbacks=callbacks)
model.evaluate(validation_dataset)
model.save_weights('struct03_lowva_weights-XXXX')

Results (b/c of poor performance did not run test set or save model):
* (Adam = 0.001) loss: 0.6061 - tp: 49.0000 - fp: 31.0000 - tn: 145.0000 - fn: 75.0000 - sen: 0.3952 - prc: 0.6125 - auroc: 0.7481 - acc: 0.6467
* (Adam = 0.001) loss: 0.6056 - tp: 48.0000 - fp: 30.0000 - tn: 146.0000 - fn: 76.0000 - sen: 0.3871 - prc: 0.6154 - auroc: 0.7473 - acc: 0.6467
* (Adam = 0.001) loss: 0.6058 - tp: 49.0000 - fp: 31.0000 - tn: 145.0000 - fn: 75.0000 - sen: 0.3952 - prc: 0.6125 - auroc: 0.7471 - acc: 0.6467
* (Adam = 0.001) loss: 0.6056 - tp: 48.0000 - fp: 29.0000 - tn: 147.0000 - fn: 76.0000 - sen: 0.3871 - prc: 0.6234 - auroc: 0.7479 - acc: 0.6500

## (4) Paper TextCNN

In [None]:
#total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
#total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

In [None]:
input1 = tf.keras.Input(shape=(width,))
nl = tf.keras.layers.Embedding(vocabulary_size, embedding_dimension, embeddings_initializer = tf.keras.initializers.Constant(embedding_matrix), input_length = width, trainable=False)(input1)
nl = tf.keras.layers.Dense(512, activation='relu')(nl)
nl = tf.keras.layers.Dropout(0.50)(nl)
kernels = [3, 5, 7, 10]
pooled = []
for kernel_size in kernels:
    mini_layer = tf.keras.layers.Conv1D(256, kernel_size, activation='relu')(nl)
    mini_pooled = tf.keras.layers.MaxPooling1D(width - kernel_size + 1)(mini_layer)
    pooled.append(mini_pooled)
nl = tf.keras.layers.Concatenate(axis=1)(pooled)
nl = tf.keras.layers.Flatten()(nl)
nl = tf.keras.layers.Dropout(0.50)(nl)
nl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
nl = tf.keras.layers.Dropout(0.5)(nl)
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)

output = tf.keras.layers.Dense(1, activation='sigmoid')(nl)

model = tf.keras.Model(inputs = input1, outputs = output)
#model = tf.keras.Model(inputs = input2, outputs = output)
#model = tf.keras.Model(inputs = (input1, input2), outputs = output)

model.summary()

metrics = [
    tf.keras.metrics.TruePositives(name='tp'), 
    tf.keras.metrics.FalsePositives(name='fp'), 
    tf.keras.metrics.TrueNegatives(name='tn'), 
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.Recall(name='sen'), 
    tf.keras.metrics.Precision(name='prc'), 
    tf.keras.metrics.AUC(name='auroc'), 
    tf.keras.metrics.BinaryAccuracy(name='acc')
]
callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=4, verbose=1, restore_best_weights=True, min_delta=0.0001), 
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, verbose=1)
]
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='binary_crossentropy', metrics=metrics)
model.fit(train_dataset, epochs=80, validation_data=validation_dataset, verbose=1, callbacks=callbacks)
model.evaluate(validation_dataset)
model.save_weights('pubmed_unstruct04_lowva_weights-XXXX')

Results:
* (Adam = 0.0001) loss: 0.5486 - tp: 78.0000 - fp: 33.0000 - tn: 143.0000 - fn: 46.0000 - sen: 0.6290 - prc: 0.7027 - auroc: 0.8067 - acc: 0.7367
* (Adam = 0.0001) loss: 0.5358 - tp: 88.0000 - fp: 35.0000 - tn: 141.0000 - fn: 36.0000 - sen: 0.7097 - prc: 0.7154 - auroc: 0.8229 - acc: 0.7633
    * Holdout: AUROC: 80.83229660987854, Sensitivity/Recall: 69.10569071769714, Precision: 66.92913174629211, F1: 67.99999874572752
* (Adam = 0.0001) loss: 0.5532 - tp: 81.0000 - fp: 33.0000 - tn: 143.0000 - fn: 43.0000 - sen: 0.6532 - prc: 0.7105 - auroc: 0.8073 - acc: 0.7467 
* (Adam = 0.0001) loss: 0.5437 - tp: 81.0000 - fp: 34.0000 - tn: 142.0000 - fn: 43.0000 - sen: 0.6532 - prc: 0.7043 - auroc: 0.8144 - acc: 0.7433 

## (5) TextCNN + 64-D-64 + 64-1

In [None]:
#total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
#total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

In [None]:
input1 = tf.keras.Input(shape=(width,))
nl = tf.keras.layers.Embedding(vocabulary_size, embedding_dimension, embeddings_initializer = tf.keras.initializers.Constant(embedding_matrix), input_length = width, trainable=False)(input1)
nl = tf.keras.layers.Dense(512, activation='relu')(nl)
nl = tf.keras.layers.Dropout(0.50)(nl)
kernels = [3, 5, 7, 10]
pooled = []
for kernel_size in kernels:
    mini_layer = tf.keras.layers.Conv1D(256, kernel_size, activation='relu')(nl)
    mini_pooled = tf.keras.layers.MaxPooling1D(width - kernel_size + 1)(mini_layer)
    pooled.append(mini_pooled)
nl = tf.keras.layers.Concatenate(axis=1)(pooled)
nl = tf.keras.layers.Flatten()(nl)
nl = tf.keras.layers.Dropout(0.50)(nl)
nl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
nl = tf.keras.layers.Dropout(0.5)(nl)
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)

input2 = tf.keras.Input(shape=(structuredArray.shape[1],))
sl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(input2)
sl = tf.keras.layers.Dropout(0.50)(sl)
sl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(sl)

combo = tf.keras.layers.Concatenate()([nl, sl])
combo = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(combo)
nl = combo

output = tf.keras.layers.Dense(1, activation='sigmoid')(nl)

#model = tf.keras.Model(inputs = input1, outputs = output)
#model = tf.keras.Model(inputs = input2, outputs = output)
model = tf.keras.Model(inputs = (input1, input2), outputs = output)

model.summary()

metrics = [
    tf.keras.metrics.TruePositives(name='tp'), 
    tf.keras.metrics.FalsePositives(name='fp'), 
    tf.keras.metrics.TrueNegatives(name='tn'), 
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.Recall(name='sen'), 
    tf.keras.metrics.Precision(name='prc'), 
    tf.keras.metrics.AUC(name='auroc'), 
    tf.keras.metrics.BinaryAccuracy(name='acc')
]
callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=4, verbose=1, restore_best_weights=True, min_delta=0.0001), 
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, verbose=1)
]
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='binary_crossentropy', metrics=metrics)
model.fit(train_dataset, epochs=80, validation_data=validation_dataset, verbose=1, callbacks=callbacks)
model.evaluate(validation_dataset)
model.save_weights('pubmed_combo05_lowva_weights-XXXX')

Results:
* (Adam = 0.0001) loss: 0.5927 - tp: 86.0000 - fp: 39.0000 - tn: 137.0000 - fn: 38.0000 - sen: 0.6935 - prc: 0.6880 - auroc: 0.8204 - acc: 0.7433 
* (Adam = 0.0001) loss: 0.5773 - tp: 83.0000 - fp: 33.0000 - tn: 143.0000 - fn: 41.0000 - sen: 0.6694 - prc: 0.7155 - auroc: 0.8289 - acc: 0.7533
    * Holdout: AUROC: 80.15249967575073,
Sensitivity/Recall: 82.11382031440735, Precision: 56.111109256744385, F1: 66.66666508632457
* (Adam = 0.0001) loss: 0.5865 - tp: 82.0000 - fp: 36.0000 - tn: 140.0000 - fn: 42.0000 - sen: 0.6613 - prc: 0.6949 - auroc: 0.8197 - acc: 0.7400
* (Adam = 0.0001) loss: 0.5839 - tp: 86.0000 - fp: 34.0000 - tn: 142.0000 - fn: 38.0000 - sen: 0.6935 - prc: 0.7167 - auroc: 0.8262 - acc: 0.7600 

## (6) TextCNN-end + 64-D-64-end + 1

In [None]:
#total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
#total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

In [None]:
input1 = tf.keras.Input(shape=(width,))
nl = tf.keras.layers.Embedding(vocabulary_size, embedding_dimension, embeddings_initializer = tf.keras.initializers.Constant(embedding_matrix), input_length = width, trainable=False)(input1)
nl = tf.keras.layers.Dense(512, activation='relu')(nl)
nl = tf.keras.layers.Dropout(0.50)(nl)
kernels = [3, 5, 7, 10]
pooled = []
for kernel_size in kernels:
    mini_layer = tf.keras.layers.Conv1D(256, kernel_size, activation='relu')(nl)
    mini_pooled = tf.keras.layers.MaxPooling1D(width - kernel_size + 1)(mini_layer)
    pooled.append(mini_pooled)
nl = tf.keras.layers.Concatenate(axis=1)(pooled)
nl = tf.keras.layers.Flatten()(nl)
nl = tf.keras.layers.Dropout(0.50)(nl)
nl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
nl = tf.keras.layers.Dropout(0.5)(nl)
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
nl = tf.keras.layers.Dense(1, activation='relu')(nl)

input2 = tf.keras.Input(shape=(structuredArray.shape[1],))
sl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(input2)
sl = tf.keras.layers.Dropout(0.50)(sl)
sl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(sl)
sl = tf.keras.layers.Dense(1, activation='relu')(sl)

combo = tf.keras.layers.Concatenate()([nl, sl])
nl = combo

output = tf.keras.layers.Dense(1, activation='sigmoid')(nl)

#model = tf.keras.Model(inputs = input1, outputs = output)
#model = tf.keras.Model(inputs = input2, outputs = output)
model = tf.keras.Model(inputs = (input1, input2), outputs = output)

model.summary()

metrics = [
    tf.keras.metrics.TruePositives(name='tp'), 
    tf.keras.metrics.FalsePositives(name='fp'), 
    tf.keras.metrics.TrueNegatives(name='tn'), 
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.Recall(name='sen'), 
    tf.keras.metrics.Precision(name='prc'), 
    tf.keras.metrics.AUC(name='auroc'), 
    tf.keras.metrics.BinaryAccuracy(name='acc')
]
callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=4, verbose=1, restore_best_weights=True, min_delta=0.0001), 
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, verbose=1)
]
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='binary_crossentropy', metrics=metrics)
model.fit(train_dataset, epochs=80, validation_data=validation_dataset, verbose=1, callbacks=callbacks)
model.evaluate(validation_dataset)
model.save_weights('pubmed_combo06_lowva_weights-XXXX')


Results:
* (Adam = 0.0001) loss: 0.5948 - tp: 81.0000 - fp: 35.0000 - tn: 141.0000 - fn: 43.0000 - sen: 0.6532 - prc: 0.6983 - auroc: 0.7895 - acc: 0.7400
* (Adam = 0.0001) loss: 0.5998 - tp: 77.0000 - fp: 38.0000 - tn: 138.0000 - fn: 47.0000 - sen: 0.6210 - prc: 0.6696 - auroc: 0.7895 - acc: 0.7167 
* (Adam = 0.0001) loss: 0.5945 - tp: 70.0000 - fp: 31.0000 - tn: 145.0000 - fn: 54.0000 - sen: 0.5645 - prc: 0.6931 - auroc: 0.8091 - acc: 0.7167 

## (7) TextCNN-1024 + struct + 64-D-64-1

In [None]:
#total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
#total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

In [None]:
input1 = tf.keras.Input(shape=(width,)) 
nl = tf.keras.layers.Embedding(vocabulary_size, embedding_dimension, embeddings_initializer = tf.keras.initializers.Constant(embedding_matrix), input_length = width, trainable=False)(input1) 
nl = tf.keras.layers.Dense(512, activation='relu')(nl) 
nl = tf.keras.layers.Dropout(0.50)(nl) 
kernels = [3, 5, 7, 10] 
pooled = [] 
for kernel_size in kernels: 
    mini_layer = tf.keras.layers.Conv1D(256, kernel_size, activation='relu')(nl) 
    mini_pooled = tf.keras.layers.MaxPooling1D(width - kernel_size + 1)(mini_layer) 
    pooled.append(mini_pooled) 
nl = tf.keras.layers.Concatenate(axis=1)(pooled) 
nl = tf.keras.layers.Flatten()(nl) 
nl = tf.keras.layers.Dropout(0.50)(nl) 
nl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl) 

input2 = tf.keras.Input(shape=(structuredArray.shape[1],))
sl = input2

combo = tf.keras.layers.Concatenate()([nl, sl])
nl = combo
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl) 
nl = tf.keras.layers.Dropout(0.5)(nl) 
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl) 

output = tf.keras.layers.Dense(1, activation='sigmoid')(nl)

#model = tf.keras.Model(inputs = input1, outputs = output)
#model = tf.keras.Model(inputs = input2, outputs = output)
model = tf.keras.Model(inputs = (input1, input2), outputs = output)

model.summary()

metrics = [
    tf.keras.metrics.TruePositives(name='tp'), 
    tf.keras.metrics.FalsePositives(name='fp'), 
    tf.keras.metrics.TrueNegatives(name='tn'), 
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.Recall(name='sen'), 
    tf.keras.metrics.Precision(name='prc'), 
    tf.keras.metrics.AUC(name='auroc'), 
    tf.keras.metrics.BinaryAccuracy(name='acc')
]
callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=4, verbose=1, restore_best_weights=True, min_delta=0.0001), 
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, verbose=1)
]
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='binary_crossentropy', metrics=metrics)
model.fit(train_dataset, epochs=80, validation_data=validation_dataset, verbose=1, callbacks=callbacks)
model.evaluate(validation_dataset)
model.save_weights('pubmed_combo07_lowva_weights-XXXX')

Results:
* (Adam = 0.0001) loss: 0.5859 - tp: 78.0000 - fp: 38.0000 - tn: 138.0000 - fn: 46.0000 - sen: 0.6290 - prc: 0.6724 - auroc: 0.7962 - acc: 0.7200 
* (Adam = 0.0001) loss: 0.5964 - tp: 93.0000 - fp: 59.0000 - tn: 117.0000 - fn: 31.0000 - sen: 0.7500 - prc: 0.6118 - auroc: 0.8019 - acc: 0.7000 
* (Adam = 0.0001) loss: 0.5627 - tp: 84.0000 - fp: 47.0000 - tn: 129.0000 - fn: 40.0000 - sen: 0.6774 - prc: 0.6412 - auroc: 0.8085 - acc: 0.7100 
* (Adam = 0.0001) loss: 0.5858 - tp: 92.0000 - fp: 55.0000 - tn: 121.0000 - fn: 32.0000 - sen: 0.7419 - prc: 0.6259 - auroc: 0.8091 - acc: 0.7100 
    * Holdout: AUROC: 80.07441759109497,
Sensitivity/Recall: 82.92682766914368, Precision: 60.00000238418579, F1: 69.62457442042287

## (8) (6) with sigmoids

In [None]:
#total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
#total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

In [None]:
input1 = tf.keras.Input(shape=(width,))
nl = tf.keras.layers.Embedding(vocabulary_size, embedding_dimension, embeddings_initializer = tf.keras.initializers.Constant(embedding_matrix), input_length = width, trainable=False)(input1)
nl = tf.keras.layers.Dense(512, activation='relu')(nl)
nl = tf.keras.layers.Dropout(0.50)(nl)
kernels = [3, 5, 7, 10]
pooled = []
for kernel_size in kernels:
    mini_layer = tf.keras.layers.Conv1D(256, kernel_size, activation='relu')(nl)
    mini_pooled = tf.keras.layers.MaxPooling1D(width - kernel_size + 1)(mini_layer)
    pooled.append(mini_pooled)
nl = tf.keras.layers.Concatenate(axis=1)(pooled)
nl = tf.keras.layers.Flatten()(nl)
nl = tf.keras.layers.Dropout(0.50)(nl)
nl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
nl = tf.keras.layers.Dropout(0.5)(nl)
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
nl = tf.keras.layers.Dense(1, activation='sigmoid')(nl)

input2 = tf.keras.Input(shape=(structuredArray.shape[1],))
sl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(input2)
sl = tf.keras.layers.Dropout(0.50)(sl)
sl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(sl)
sl = tf.keras.layers.Dense(1, activation='sigmoid')(sl)

combo = tf.keras.layers.Concatenate()([nl, sl])
nl = combo

output = tf.keras.layers.Dense(1, activation='sigmoid')(nl)

#model = tf.keras.Model(inputs = input1, outputs = output)
#model = tf.keras.Model(inputs = input2, outputs = output)
model = tf.keras.Model(inputs = (input1, input2), outputs = output)

model.summary()

metrics = [
    tf.keras.metrics.TruePositives(name='tp'), 
    tf.keras.metrics.FalsePositives(name='fp'), 
    tf.keras.metrics.TrueNegatives(name='tn'), 
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.Recall(name='sen'), 
    tf.keras.metrics.Precision(name='prc'), 
    tf.keras.metrics.AUC(name='auroc'), 
    tf.keras.metrics.BinaryAccuracy(name='acc')
]
callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=4, verbose=1, restore_best_weights=True, min_delta=0.0001), 
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, verbose=1)
]
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='binary_crossentropy', metrics=metrics)
model.fit(train_dataset, epochs=80, validation_data=validation_dataset, verbose=1, callbacks=callbacks)
model.evaluate(validation_dataset)
model.save_weights('pubmed_combo08_lowva_weights-XXXX')

Results:
* (Adam = 0.0001) loss: 0.6270 - tp: 54.0000 - fp: 17.0000 - tn: 159.0000 - fn: 70.0000 - sen: 0.4355 - prc: 0.7606 - auroc: 0.7622 - acc: 0.7100 
* (Adam = 0.0001) loss: 0.6416 - tp: 81.0000 - fp: 63.0000 - tn: 113.0000 - fn: 43.0000 - sen: 0.6532 - prc: 0.5625 - auroc: 0.7014 - acc: 0.6467 

## (9) late concatenate TextCNN output with full structured vector

In [None]:
#total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
#total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

In [None]:
input1 = tf.keras.Input(shape=(width,))
nl = tf.keras.layers.Embedding(vocabulary_size, embedding_dimension, embeddings_initializer = tf.keras.initializers.Constant(embedding_matrix), input_length = width, trainable=False)(input1)
nl = tf.keras.layers.Dense(512, activation='relu')(nl)
nl = tf.keras.layers.Dropout(0.50)(nl)
kernels = [3, 5, 7, 10]
pooled = []
for kernel_size in kernels:
    mini_layer = tf.keras.layers.Conv1D(256, kernel_size, activation='relu')(nl)
    mini_pooled = tf.keras.layers.MaxPooling1D(width - kernel_size + 1)(mini_layer)
    pooled.append(mini_pooled)
nl = tf.keras.layers.Concatenate(axis=1)(pooled)
nl = tf.keras.layers.Flatten()(nl)
nl = tf.keras.layers.Dropout(0.50)(nl)
nl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
nl = tf.keras.layers.Dropout(0.5)(nl)
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
nl = tf.keras.layers.Dense(1, activation='relu')(nl)

input2 = tf.keras.Input(shape=(structuredArray.shape[1],))
sl = input2

combo = tf.keras.layers.Concatenate()([nl, sl])
nl = combo
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(sl)

output = tf.keras.layers.Dense(1, activation='sigmoid')(nl)

#model = tf.keras.Model(inputs = input1, outputs = output)
#model = tf.keras.Model(inputs = input2, outputs = output)
model = tf.keras.Model(inputs = (input1, input2), outputs = output)

model.summary()

metrics = [
    tf.keras.metrics.TruePositives(name='tp'), 
    tf.keras.metrics.FalsePositives(name='fp'), 
    tf.keras.metrics.TrueNegatives(name='tn'), 
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.Recall(name='sen'), 
    tf.keras.metrics.Precision(name='prc'), 
    tf.keras.metrics.AUC(name='auroc'), 
    tf.keras.metrics.BinaryAccuracy(name='acc')
]
callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=4, verbose=1, restore_best_weights=True, min_delta=0.0001), 
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, verbose=1)
]
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='binary_crossentropy', metrics=metrics)
model.fit(train_dataset, epochs=80, validation_data=validation_dataset, verbose=1, callbacks=callbacks)
model.evaluate(validation_dataset)
model.save_weights('pubmed_combo09_lowva_weights-XXXX')

Results:
* (Adam = 0.0001) loss: 0.5564 - tp: 83.0000 - fp: 39.0000 - tn: 137.0000 - fn: 41.0000 - sen: 0.6694 - prc: 0.6803 - auroc: 0.8125 - acc: 0.7333 
* (Adam = 0.0001) loss: 0.5542 - tp: 81.0000 - fp: 40.0000 - tn: 136.0000 - fn: 43.0000 - sen: 0.6532 - prc: 0.6694 - auroc: 0.8133 - acc: 0.7233
    * Holdout: AUROC: 78.93757224082947,
Sensitivity/Recall: 71.54471278190613, Precision: 64.23357725143433, F1: 67.69230683756294
* (Adam = 0.0001) loss: 0.5562 - tp: 81.0000 - fp: 40.0000 - tn: 136.0000 - fn: 43.0000 - sen: 0.6532 - prc: 0.6694 - auroc: 0.8088 - acc: 0.7233 
* (Adam = 0.0001) loss: 0.5576 - tp: 78.0000 - fp: 32.0000 - tn: 144.0000 - fn: 46.0000 - sen: 0.6290 - prc: 0.7091 - auroc: 0.8054 - acc: 0.7400

## (10) late concatenate structured output with first flat TextCNN layer

In [None]:
#total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
#total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

In [None]:
input1 = tf.keras.Input(shape=(width,))
nl = tf.keras.layers.Embedding(vocabulary_size, embedding_dimension, embeddings_initializer = tf.keras.initializers.Constant(embedding_matrix), input_length = width, trainable=False)(input1)
nl = tf.keras.layers.Dense(512, activation='relu')(nl)
nl = tf.keras.layers.Dropout(0.50)(nl)
kernels = [3, 5, 7, 10]
pooled = []
for kernel_size in kernels:
    mini_layer = tf.keras.layers.Conv1D(256, kernel_size, activation='relu')(nl)
    mini_pooled = tf.keras.layers.MaxPooling1D(width - kernel_size + 1)(mini_layer)
    pooled.append(mini_pooled)
nl = tf.keras.layers.Concatenate(axis=1)(pooled)
nl = tf.keras.layers.Flatten()(nl)

input2 = tf.keras.Input(shape=(structuredArray.shape[1],))
sl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(input2)
sl = tf.keras.layers.Dropout(0.50)(sl)
sl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(sl)
sl = tf.keras.layers.Dense(1, activation='relu')(sl)

combo = tf.keras.layers.Concatenate()([nl, sl])
nl = combo
nl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
nl = tf.keras.layers.Dropout(0.5)(nl)
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)

output = tf.keras.layers.Dense(1, activation='sigmoid')(nl)

#model = tf.keras.Model(inputs = input1, outputs = output)
#model = tf.keras.Model(inputs = input2, outputs = output)
model = tf.keras.Model(inputs = (input1, input2), outputs = output)

model.summary()

metrics = [
    tf.keras.metrics.TruePositives(name='tp'), 
    tf.keras.metrics.FalsePositives(name='fp'), 
    tf.keras.metrics.TrueNegatives(name='tn'), 
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.Recall(name='sen'), 
    tf.keras.metrics.Precision(name='prc'), 
    tf.keras.metrics.AUC(name='auroc'), 
    tf.keras.metrics.BinaryAccuracy(name='acc')
]
callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=4, verbose=1, restore_best_weights=True, min_delta=0.0001), 
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, verbose=1)
]
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='binary_crossentropy', metrics=metrics)
model.fit(train_dataset, epochs=80, validation_data=validation_dataset, verbose=1, callbacks=callbacks)
model.evaluate(validation_dataset)
model.save_weights('pubmed_combo10_lowva_weights-XXXX')

Results:
* (Adam = 0.0001) loss: 0.5663 - tp: 74.0000 - fp: 36.0000 - tn: 140.0000 - fn: 50.0000 - sen: 0.5968 - prc: 0.6727 - auroc: 0.8100 - acc: 0.7133
    * Holdout: AUROC: 80.85755705833435,
Sensitivity/Recall: 79.67479825019836, Precision: 62.42038011550903, F1: 69.99999929067427
* (Adam = 0.0001) loss: 0.5688 - tp: 88.0000 - fp: 45.0000 - tn: 131.0000 - fn: 36.0000 - sen: 0.7097 - prc: 0.6617 - auroc: 0.8008 - acc: 0.7300 
* (Adam = 0.0001) loss: 0.5619 - tp: 83.0000 - fp: 43.0000 - tn: 133.0000 - fn: 41.0000 - sen: 0.6694 - prc: 0.6587 - auroc: 0.8087 - acc: 0.7200 
* (Adam = 0.0001) loss: 0.5562 - tp: 84.0000 - fp: 43.0000 - tn: 133.0000 - fn: 40.0000 - sen: 0.6774 - prc: 0.6614 - auroc: 0.8093 - acc: 0.7233 

## (11) TextCNN-1024 + struct -64 + 64-D-64-1

In [None]:
#total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
#total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

In [None]:
input1 = tf.keras.Input(shape=(width,)) 
nl = tf.keras.layers.Embedding(vocabulary_size, embedding_dimension, embeddings_initializer = tf.keras.initializers.Constant(embedding_matrix), input_length = width, trainable=False)(input1) 
nl = tf.keras.layers.Dense(512, activation='relu')(nl) 
nl = tf.keras.layers.Dropout(0.50)(nl) 
kernels = [3, 5, 7, 10] 
pooled = [] 
for kernel_size in kernels: 
    mini_layer = tf.keras.layers.Conv1D(256, kernel_size, activation='relu')(nl) 
    mini_pooled = tf.keras.layers.MaxPooling1D(width - kernel_size + 1)(mini_layer) 
    pooled.append(mini_pooled) 
nl = tf.keras.layers.Concatenate(axis=1)(pooled) 
nl = tf.keras.layers.Flatten()(nl) 
nl = tf.keras.layers.Dropout(0.50)(nl) 
nl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl) 

input2 = tf.keras.Input(shape=(structuredArray.shape[1],))
sl = tf.keras.layers.Dense(64, activation='relu',
kernel_regularizer=tf.keras.regularizers.l2())(input2)

combo = tf.keras.layers.Concatenate()([nl, sl])
nl = combo
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl) 
nl = tf.keras.layers.Dropout(0.5)(nl) 
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl) 

output = tf.keras.layers.Dense(1, activation='sigmoid')(nl)

#model = tf.keras.Model(inputs = input1, outputs = output)
#model = tf.keras.Model(inputs = input2, outputs = output)
model = tf.keras.Model(inputs = (input1, input2), outputs = output)

model.summary()

metrics = [
    tf.keras.metrics.TruePositives(name='tp'), 
    tf.keras.metrics.FalsePositives(name='fp'), 
    tf.keras.metrics.TrueNegatives(name='tn'), 
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.Recall(name='sen'), 
    tf.keras.metrics.Precision(name='prc'), 
    tf.keras.metrics.AUC(name='auroc'), 
    tf.keras.metrics.BinaryAccuracy(name='acc')
]
callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=4, verbose=1, restore_best_weights=True, min_delta=0.0001), 
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, verbose=1)
]
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='binary_crossentropy', metrics=metrics)
model.fit(train_dataset, epochs=80, validation_data=validation_dataset, verbose=1, callbacks=callbacks)
model.evaluate(validation_dataset)
model.save_weights('pubmed_combo11_lowva_weights-XXXX')

Result:
* (Adam = 0.0001) loss: 0.5823 - tp: 85.0000 - fp: 53.0000 - tn: 123.0000 - fn: 39.0000 - sen: 0.6855 - prc: 0.6159 - auroc: 0.7971 - acc: 0.6933 
* (Adam = 0.0001) loss: 0.5971 - tp: 94.0000 - fp: 64.0000 - tn: 112.0000 - fn: 30.0000 - sen: 0.7581 - prc: 0.5949 - auroc: 0.7895 - acc: 0.6867 
* (Adam = 0.0001) loss: 0.5730 - tp: 95.0000 - fp: 51.0000 - tn: 125.0000 - fn: 29.0000 - sen: 0.7661 - prc: 0.6507 - auroc: 0.8131 - acc: 0.7333 
    * Holdout: AUROC: 80.58196902275085,
Sensitivity/Recall: 77.2357702255249, Precision: 62.5, F1: 69.09090823780406
* (Adam = 0.0001) loss: 0.5747 - tp: 95.0000 - fp: 55.0000 - tn: 121.0000 - fn: 29.0000 - sen: 0.7661 - prc: 0.6333 - auroc: 0.8018 - acc: 0.7200 

## (12) (11) but with struct-256 + 256-D-64-1

In [None]:
#total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
#total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

In [None]:
input1 = tf.keras.Input(shape=(width,)) 
nl = tf.keras.layers.Embedding(vocabulary_size, embedding_dimension, embeddings_initializer = tf.keras.initializers.Constant(embedding_matrix), input_length = width, trainable=False)(input1) 
nl = tf.keras.layers.Dense(512, activation='relu')(nl) 
nl = tf.keras.layers.Dropout(0.50)(nl) 
kernels = [3, 5, 7, 10] 
pooled = [] 
for kernel_size in kernels: 
    mini_layer = tf.keras.layers.Conv1D(256, kernel_size, activation='relu')(nl) 
    mini_pooled = tf.keras.layers.MaxPooling1D(width - kernel_size + 1)(mini_layer) 
    pooled.append(mini_pooled) 
nl = tf.keras.layers.Concatenate(axis=1)(pooled) 
nl = tf.keras.layers.Flatten()(nl) 
nl = tf.keras.layers.Dropout(0.50)(nl) 
nl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl) 

input2 = tf.keras.Input(shape=(structuredArray.shape[1],))
sl = tf.keras.layers.Dense(256, activation='relu',
kernel_regularizer=tf.keras.regularizers.l2())(input2)

combo = tf.keras.layers.Concatenate()([nl, sl])
nl = combo
nl = tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl) 
nl = tf.keras.layers.Dropout(0.5)(nl) 
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
output = tf.keras.layers.Dense(1, activation='sigmoid')(nl)

#model = tf.keras.Model(inputs = input1, outputs = output)
#model = tf.keras.Model(inputs = input2, outputs = output)
model = tf.keras.Model(inputs = (input1, input2), outputs = output)

model.summary()

metrics = [
    tf.keras.metrics.TruePositives(name='tp'), 
    tf.keras.metrics.FalsePositives(name='fp'), 
    tf.keras.metrics.TrueNegatives(name='tn'), 
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.Recall(name='sen'), 
    tf.keras.metrics.Precision(name='prc'), 
    tf.keras.metrics.AUC(name='auroc'), 
    tf.keras.metrics.BinaryAccuracy(name='acc')
]
callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=4, verbose=1, restore_best_weights=True, min_delta=0.0001), 
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, verbose=1)
]
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='binary_crossentropy', metrics=metrics)
model.fit(train_dataset, epochs=80, validation_data=validation_dataset, verbose=1, callbacks=callbacks)
model.evaluate(validation_dataset)
model.save_weights('pubmed_combo12_lowva_weights-XXXX')

Result:
* (Adam = 0.0001) loss: 0.5921 - tp: 91.0000 - fp: 45.0000 - tn: 131.0000 - fn: 33.0000 - sen: 0.7339 - prc: 0.6691 - auroc: 0.8329 - acc: 0.7400 
    * Holdout: AUROC: 81.61315321922302,
Sensitivity/Recall: 85.36585569381714, Precision: 57.065218687057495, F1: 68.40391037911867
* (Adam = 0.0001) loss: 0.5960 - tp: 100.0000 - fp: 53.0000 - tn: 123.0000 - fn: 24.0000 - sen: 0.8065 - prc: 0.6536 - auroc: 0.8324 - acc: 0.7433 
* (Adam = 0.0001) loss: 0.5780 - tp: 84.0000 - fp: 46.0000 - tn: 130.0000 - fn: 40.0000 - sen: 0.6774 - prc: 0.6462 - auroc: 0.8256 - acc: 0.7133 
* (Adam = 0.0001) loss: 0.5804 - tp: 97.0000 - fp: 54.0000 - tn: 122.0000 - fn: 27.0000 - sen: 0.7823 - prc: 0.6424 - auroc: 0.8328 - acc: 0.7300

## (13) (11) but with struct-256 + 256-D-256-D-64-1

In [None]:
#total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
#total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

In [None]:
input1 = tf.keras.Input(shape=(width,)) 
nl = tf.keras.layers.Embedding(vocabulary_size, embedding_dimension, embeddings_initializer = tf.keras.initializers.Constant(embedding_matrix), input_length = width, trainable=False)(input1) 
nl = tf.keras.layers.Dense(512, activation='relu')(nl) 
nl = tf.keras.layers.Dropout(0.50)(nl) 
kernels = [3, 5, 7, 10] 
pooled = [] 
for kernel_size in kernels: 
    mini_layer = tf.keras.layers.Conv1D(256, kernel_size, activation='relu')(nl) 
    mini_pooled = tf.keras.layers.MaxPooling1D(width - kernel_size + 1)(mini_layer) 
    pooled.append(mini_pooled) 
nl = tf.keras.layers.Concatenate(axis=1)(pooled) 
nl = tf.keras.layers.Flatten()(nl) 
nl = tf.keras.layers.Dropout(0.50)(nl) 
nl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl) 
input2 = tf.keras.Input(shape=(structuredArray.shape[1],)) 
sl = tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(input2)
combo = tf.keras.layers.Concatenate()([nl, sl]) 
nl = combo 
nl = tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl) 
nl = tf.keras.layers.Dropout(0.5)(nl) 
nl = tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
nl = tf.keras.layers.Dropout(0.5)(nl)
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl) 
output = tf.keras.layers.Dense(1, activation='sigmoid')(nl)

#model = tf.keras.Model(inputs = input1, outputs = output)
#model = tf.keras.Model(inputs = input2, outputs = output)
model = tf.keras.Model(inputs = (input1, input2), outputs = output)

model.summary()

metrics = [
    tf.keras.metrics.TruePositives(name='tp'), 
    tf.keras.metrics.FalsePositives(name='fp'), 
    tf.keras.metrics.TrueNegatives(name='tn'), 
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.Recall(name='sen'), 
    tf.keras.metrics.Precision(name='prc'), 
    tf.keras.metrics.AUC(name='auroc'), 
    tf.keras.metrics.BinaryAccuracy(name='acc')
]
callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=4, verbose=1, restore_best_weights=True, min_delta=0.0001), 
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, verbose=1)
]
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='binary_crossentropy', metrics=metrics)
model.fit(train_dataset, epochs=80, validation_data=validation_dataset, verbose=1, callbacks=callbacks)
model.evaluate(validation_dataset)
model.save_weights('pubmed_combo13_lowva_weights-XXXX')

Results:
* (Adam = 0.0001) loss: 0.5718 - tp: 84.0000 - fp: 42.0000 - tn: 134.0000 - fn: 40.0000 - sen: 0.6774 - prc: 0.6667 - auroc: 0.8281 - acc: 0.7267
    * Holdout: AUROC: 81.60167336463928,
Sensitivity/Recall: 82.11382031440735, Precision: 60.8433723449707, F1: 69.89619271499818
* (Adam = 0.0001) loss: 0.6198 - tp: 95.0000 - fp: 62.0000 - tn: 114.0000 - fn: 29.0000 - sen: 0.7661 - prc: 0.6051 - auroc: 0.8012 - acc: 0.6967
* (Adam = 0.0001) loss: 0.5982 - tp: 101.0000 - fp: 61.0000 - tn: 115.0000 - fn: 23.0000 - sen: 0.8145 - prc: 0.6235 - auroc: 0.8150 - acc: 0.7200 
* (Adam = 0.0001) loss: 0.5852 - tp: 85.0000 - fp: 47.0000 - tn: 129.0000 - fn: 39.0000 - sen: 0.6855 - prc: 0.6439 - auroc: 0.8126 - acc: 0.7133 

## (14) (13) but with 256-D-256-64-1

In [None]:
#total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
#total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

In [None]:
input1 = tf.keras.Input(shape=(width,)) 
nl = tf.keras.layers.Embedding(vocabulary_size, embedding_dimension, embeddings_initializer = tf.keras.initializers.Constant(embedding_matrix), input_length = width, trainable=False)(input1) 
nl = tf.keras.layers.Dense(512, activation='relu')(nl) 
nl = tf.keras.layers.Dropout(0.50)(nl) 
kernels = [3, 5, 7, 10] 
pooled = [] 
for kernel_size in kernels: 
    mini_layer = tf.keras.layers.Conv1D(256, kernel_size, activation='relu')(nl) 
    mini_pooled = tf.keras.layers.MaxPooling1D(width - kernel_size + 1)(mini_layer) 
    pooled.append(mini_pooled) 
nl = tf.keras.layers.Concatenate(axis=1)(pooled) 
nl = tf.keras.layers.Flatten()(nl) 
nl = tf.keras.layers.Dropout(0.50)(nl) 
nl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl) 
input2 = tf.keras.Input(shape=(structuredArray.shape[1],))
sl = tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(input2)
combo = tf.keras.layers.Concatenate()([nl, sl])
nl = combo
nl = tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
nl = tf.keras.layers.Dropout(0.5)(nl) 
nl = tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
output = tf.keras.layers.Dense(1, activation='sigmoid')(nl)

#model = tf.keras.Model(inputs = input1, outputs = output)
#model = tf.keras.Model(inputs = input2, outputs = output)
model = tf.keras.Model(inputs = (input1, input2), outputs = output)

model.summary()

metrics = [
    tf.keras.metrics.TruePositives(name='tp'), 
    tf.keras.metrics.FalsePositives(name='fp'), 
    tf.keras.metrics.TrueNegatives(name='tn'), 
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.Recall(name='sen'), 
    tf.keras.metrics.Precision(name='prc'), 
    tf.keras.metrics.AUC(name='auroc'), 
    tf.keras.metrics.BinaryAccuracy(name='acc')
]
callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=4, verbose=1, restore_best_weights=True, min_delta=0.0001), 
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, verbose=1)
]
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='binary_crossentropy', metrics=metrics)
model.fit(train_dataset, epochs=80, validation_data=validation_dataset, verbose=1, callbacks=callbacks)
model.evaluate(validation_dataset)
model.save_weights('pubmed_combo14_lowva_weights-XXXX')

Results:
* (Adam = 0.0001) loss: 0.5924 - tp: 96.0000 - fp: 50.0000 - tn: 126.0000 - fn: 28.0000 - sen: 0.7742 - prc: 0.6575 - auroc: 0.8249 - acc: 0.7400 
* (Adam = 0.0001) loss: 0.5926 - tp: 85.0000 - fp: 41.0000 - tn: 135.0000 - fn: 39.0000 - sen: 0.6855 - prc: 0.6746 - auroc: 0.8238 - acc: 0.7333 
* (Adam = 0.0001) loss: 0.5862 - tp: 88.0000 - fp: 44.0000 - tn: 132.0000 - fn: 36.0000 - sen: 0.7097 - prc: 0.6667 - auroc: 0.8232 - acc: 0.7333 
* (Adam = 0.0001) loss: 0.6070 - tp: 102.0000 - fp: 61.0000 - tn: 115.0000 - fn: 22.0000 - sen: 0.8226 - prc: 0.6258 - auroc: 0.8252 - acc: 0.7233
    * Holdout: AUROC: 80.96779584884644,
Sensitivity/Recall: 79.67479825019836, Precision: 62.82051205635071, F1: 70.25089616347687

## (15) (12) but with struct-1024 - arvo abstract 2021

In [None]:
#total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
#total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

In [None]:
input1 = tf.keras.Input(shape=(width,)) 
nl = tf.keras.layers.Embedding(vocabulary_size, embedding_dimension, embeddings_initializer = tf.keras.initializers.Constant(embedding_matrix), input_length = width, trainable=False)(input1) 
nl = tf.keras.layers.Dense(512, activation='relu')(nl) 
nl = tf.keras.layers.Dropout(0.50)(nl) 
kernels = [3, 5, 7, 10] 
pooled = [] 
for kernel_size in kernels: 
    mini_layer = tf.keras.layers.Conv1D(256, kernel_size, activation='relu')(nl) 
    mini_pooled = tf.keras.layers.MaxPooling1D(width - kernel_size + 1)(mini_layer) 
    pooled.append(mini_pooled) 
nl = tf.keras.layers.Concatenate(axis=1)(pooled) 
nl = tf.keras.layers.Flatten()(nl) 
nl = tf.keras.layers.Dropout(0.50)(nl) 
nl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl) 

input2 = tf.keras.Input(shape=(structuredArray.shape[1],))
sl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(input2)

combo = tf.keras.layers.Concatenate()([nl, sl])
nl = combo
nl = tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl) 
nl = tf.keras.layers.Dropout(0.5)(nl) 
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
output = tf.keras.layers.Dense(1, activation='sigmoid')(nl)

#model = tf.keras.Model(inputs = input1, outputs = output)
#model = tf.keras.Model(inputs = input2, outputs = output)
model = tf.keras.Model(inputs = (input1, input2), outputs = output)

model.summary()

metrics = [
    tf.keras.metrics.TruePositives(name='tp'), 
    tf.keras.metrics.FalsePositives(name='fp'), 
    tf.keras.metrics.TrueNegatives(name='tn'), 
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.Recall(name='sen'), 
    tf.keras.metrics.Precision(name='prc'), 
    tf.keras.metrics.AUC(name='auroc'), 
    tf.keras.metrics.BinaryAccuracy(name='acc')
]
callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=4, verbose=1, restore_best_weights=True, min_delta=0.0001), 
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, verbose=1)
]
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='binary_crossentropy', metrics=metrics)
model.fit(train_dataset, epochs=80, validation_data=validation_dataset, verbose=1, callbacks=callbacks)
model.evaluate(validation_dataset)
model.save_weights('pubmed_combo15_lowva_weights-XXXX')

* (Adam = 0.0001) loss: 0.5789 - tp: 96.0000 - fp: 53.0000 - tn: 123.0000 - fn: 28.0000 - sen: 0.7742 - prc: 0.6443 - auroc: 0.8336 - acc: 0.7300
    * Holdout: AUROC: 81.07114434242249, Sensitivity/Recall: 81.30081295967102, Precision: 59.8802387714386, F1: 68.96551672683742
* (Adam = 0.0001) loss: 0.5654 - tp: 82.0000 - fp: 40.0000 - tn: 136.0000 - fn: 42.0000 - sen: 0.6613 - prc: 0.6721 - auroc: 0.8285 - acc: 0.7267 
* (Adam = 0.0001) loss: 0.5766 - tp: 91.0000 - fp: 51.0000 - tn: 125.0000 - fn: 33.0000 - sen: 0.7339 - prc: 0.6408 - auroc: 0.8284 - acc: 0.7200 
* (Adam = 0.0001) loss: 0.5749 - tp: 88.0000 - fp: 44.0000 - tn: 132.0000 - fn: 36.0000 - sen: 0.7097 - prc: 0.6667 - auroc: 0.8279 - acc: 0.7333 
* (Adam = 0.0001) loss: 0.5751 - tp: 92.0000 - fp: 46.0000 - tn: 130.0000 - fn: 32.0000 - sen: 0.7419 - prc: 0.6667 - auroc: 0.8255 - acc: 0.7400 

## (16) simple Transformer model

In [None]:
#total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
#total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

In [None]:
input1 = tf.keras.Input(shape=(width,))
nl = TokenAndPositionPreTrainedEmbedding(width, vocabulary_size, embedding_dimension, embedding_matrix, trainable = False)(input1)
nl = TransformerBlock(embedding_dimension, 10, embedding_dimension)(nl)
nl = tf.keras.layers.GlobalAveragePooling1D()(nl)
nl = tf.keras.layers.Dropout(0.25)(nl)
nl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
nl = tf.keras.layers.Dropout(0.50)(nl)
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
output = tf.keras.layers.Dense(1, activation='sigmoid')(nl)

model = tf.keras.Model(inputs = input1, outputs = output)
#model = tf.keras.Model(inputs = input2, outputs = output)
#model = tf.keras.Model(inputs = (input1, input2), outputs = output)

model.summary()

metrics = [
    tf.keras.metrics.TruePositives(name='tp'), 
    tf.keras.metrics.FalsePositives(name='fp'), 
    tf.keras.metrics.TrueNegatives(name='tn'), 
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.Recall(name='sen'), 
    tf.keras.metrics.Precision(name='prc'), 
    tf.keras.metrics.AUC(name='auroc'), 
    tf.keras.metrics.BinaryAccuracy(name='acc')
]
callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=4, verbose=1, restore_best_weights=True, min_delta=0.0001), 
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, verbose=1)
]
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='binary_crossentropy', metrics=metrics)
model.fit(train_dataset, epochs=80, validation_data=validation_dataset, verbose=1, callbacks=callbacks)
model.evaluate(validation_dataset)
model.save_weights('pubmed_unstruct16_lowva_weights-XXXX')

Results:
* (Adam = 0.0001) loss: 0.6336 - tp: 65.0000 - fp: 35.0000 - tn: 141.0000 - fn: 59.0000 - sen: 0.5242 - prc: 0.6500 - auroc: 0.7433 - acc: 0.6867
    * Holdout: AUROC: 79.74828481674194, Sensitivity/Recall: 86.9918704032898, Precision: 53.76884341239929, F1: 66.45962685285666
* (Adam = 0.0001) loss: 0.6397 - tp: 75.0000 - fp: 48.0000 - tn: 128.0000 - fn: 49.0000 - sen: 0.6048 - prc: 0.6098 - auroc: 0.7303 - acc: 0.6767
* (Adam = 0.0001) loss: 0.6241 - tp: 67.0000 - fp: 39.0000 - tn: 137.0000 - fn: 57.0000 - sen: 0.5403 - prc: 0.6321 - auroc: 0.7422 - acc: 0.6800 

## (17) Transformer head + TextCNN

In [None]:
#total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
#total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

In [None]:
input1 = tf.keras.Input(shape=(width,))
nl = TokenAndPositionPreTrainedEmbedding(width, vocabulary_size, embedding_dimension, embedding_matrix, trainable = False)(input1)
nl = TransformerBlock(embedding_dimension, 10, embedding_dimension)(nl)
kernels = [3, 5, 7, 10]
pooled = []
for kernel_size in kernels:
    mini_layer = tf.keras.layers.Conv1D(256, kernel_size, activation='relu')(nl)
    mini_pooled = tf.keras.layers.MaxPooling1D(width - kernel_size + 1)(mini_layer)
    pooled.append(mini_pooled)
nl = tf.keras.layers.Concatenate(axis=1)(pooled)
nl = tf.keras.layers.Flatten()(nl)
nl = tf.keras.layers.Dropout(0.25)(nl)
nl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
nl = tf.keras.layers.Dropout(0.50)(nl)
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
output = tf.keras.layers.Dense(1, activation='sigmoid')(nl)

model = tf.keras.Model(inputs = input1, outputs = output)
#model = tf.keras.Model(inputs = input2, outputs = output)
#model = tf.keras.Model(inputs = (input1, input2), outputs = output)

model.summary()

metrics = [
    tf.keras.metrics.TruePositives(name='tp'), 
    tf.keras.metrics.FalsePositives(name='fp'), 
    tf.keras.metrics.TrueNegatives(name='tn'), 
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.Recall(name='sen'), 
    tf.keras.metrics.Precision(name='prc'), 
    tf.keras.metrics.AUC(name='auroc'), 
    tf.keras.metrics.BinaryAccuracy(name='acc')
]
callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=4, verbose=1, restore_best_weights=True, min_delta=0.0001), 
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, verbose=1)
]
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='binary_crossentropy', metrics=metrics)
model.fit(train_dataset, epochs=80, validation_data=validation_dataset, verbose=1, callbacks=callbacks)
model.evaluate(validation_dataset)
model.save_weights('pubmed_unstruct17_lowva_weights-XXXX')

Results:
* (Adam = 0.0001) loss: 0.8753 - tp: 82.0000 - fp: 32.0000 - tn: 144.0000 - fn: 42.0000 - sen: 0.6613 - prc: 0.7193 - auroc: 0.8130 - acc: 0.7533 
* (Adam = 0.0001) loss: 0.8487 - tp: 94.0000 - fp: 55.0000 - tn: 121.0000 - fn: 30.0000 - sen: 0.7581 - prc: 0.6309 - auroc: 0.8160 - acc: 0.7167 
* (Adam = 0.0001) loss: 0.8558 - tp: 75.0000 - fp: 28.0000 - tn: 148.0000 - fn: 49.0000 - sen: 0.6048 - prc: 0.7282 - auroc: 0.8084 - acc: 0.7433 
* (Adam = 0.0001) loss: 0.7989 - tp: 84.0000 - fp: 37.0000 - tn: 139.0000 - fn: 40.0000 - sen: 0.6774 - prc: 0.6942 - auroc: 0.8250 - acc: 0.7433 
    * Holdout: AUROC: 80.765700340271, Sensitivity/Recall: 85.36585569381714, Precision: 56.14973306655884, F1: 67.74193644945382

## (18) Simple 2x Transformer

In [None]:
#total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
#total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

In [None]:
input1 = tf.keras.Input(shape=(width,))
nl = TokenAndPositionPreTrainedEmbedding(width, vocabulary_size, embedding_dimension, embedding_matrix, trainable = False)(input1)
nl = TransformerBlock(embedding_dimension, 10, embedding_dimension)(nl)
nl = TransformerBlock(embedding_dimension, 10, embedding_dimension)(nl)
nl = tf.keras.layers.GlobalAveragePooling1D()(nl)
nl = tf.keras.layers.Dropout(0.25)(nl)
nl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
nl = tf.keras.layers.Dropout(0.50)(nl)
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
output = tf.keras.layers.Dense(1, activation='sigmoid')(nl)

model = tf.keras.Model(inputs = input1, outputs = output)
#model = tf.keras.Model(inputs = input2, outputs = output)
#model = tf.keras.Model(inputs = (input1, input2), outputs = output)

model.summary()

metrics = [
    tf.keras.metrics.TruePositives(name='tp'), 
    tf.keras.metrics.FalsePositives(name='fp'), 
    tf.keras.metrics.TrueNegatives(name='tn'), 
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.Recall(name='sen'), 
    tf.keras.metrics.Precision(name='prc'), 
    tf.keras.metrics.AUC(name='auroc'), 
    tf.keras.metrics.BinaryAccuracy(name='acc')
]
callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=4, verbose=1, restore_best_weights=True, min_delta=0.0001), 
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, verbose=1)
]
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='binary_crossentropy', metrics=metrics)
model.fit(train_dataset, epochs=80, validation_data=validation_dataset, verbose=1, callbacks=callbacks)
model.evaluate(validation_dataset)
model.save_weights('pubmed_unstruct18_lowva_weights-XXXX')

Result:
* (Adam = 0.0001) loss: 0.6378 - tp: 78.0000 - fp: 35.0000 - tn: 141.0000 - fn: 46.0000 - sen: 0.6290 - prc: 0.6903 - auroc: 0.7722 - acc: 0.7300 
* (Adam = 0.0001) loss: 0.6502 - tp: 75.0000 - fp: 40.0000 - tn: 136.0000 - fn: 49.0000 - sen: 0.6048 - prc: 0.6522 - auroc: 0.7752 - acc: 0.7033
    * Holdout:  AUROC: 78.16132307052612,
Sensitivity/Recall: 81.30081295967102, Precision: 57.47126340866089, F1: 67.34006666502239
* (Adam = 0.0001) loss: 0.6796 - tp: 78.0000 - fp: 49.0000 - tn: 127.0000 - fn: 46.0000 - sen: 0.6290 - prc: 0.6142 - auroc: 0.7558 - acc: 0.6833 

## (19) (15) but with Simple Transformer

In [None]:
#total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
#total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

In [None]:
input1 = tf.keras.Input(shape=(width,))
nl = TokenAndPositionPreTrainedEmbedding(width, vocabulary_size, embedding_dimension, embedding_matrix, trainable = False)(input1)
nl = TransformerBlock(embedding_dimension, 10, embedding_dimension)(nl)
nl = tf.keras.layers.GlobalAveragePooling1D()(nl)
nl = tf.keras.layers.Dropout(0.50)(nl) 
nl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl) 

input2 = tf.keras.Input(shape=(structuredArray.shape[1],))
sl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(input2)

combo = tf.keras.layers.Concatenate()([nl, sl])
nl = combo
nl = tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl) 
nl = tf.keras.layers.Dropout(0.5)(nl) 
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
output = tf.keras.layers.Dense(1, activation='sigmoid')(nl)

#model = tf.keras.Model(inputs = input1, outputs = output)
#model = tf.keras.Model(inputs = input2, outputs = output)
model = tf.keras.Model(inputs = (input1, input2), outputs = output)

model.summary()

metrics = [
    tf.keras.metrics.TruePositives(name='tp'), 
    tf.keras.metrics.FalsePositives(name='fp'), 
    tf.keras.metrics.TrueNegatives(name='tn'), 
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.Recall(name='sen'), 
    tf.keras.metrics.Precision(name='prc'), 
    tf.keras.metrics.AUC(name='auroc'), 
    tf.keras.metrics.BinaryAccuracy(name='acc')
]
callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=4, verbose=1, restore_best_weights=True, min_delta=0.0001), 
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, verbose=1)
]
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='binary_crossentropy', metrics=metrics)
model.fit(train_dataset, epochs=80, validation_data=validation_dataset, verbose=1, callbacks=callbacks)
model.evaluate(validation_dataset)
model.save_weights('pubmed_combo19_lowva_weights-XXXX')

Results:
* (Adam = 0.0001) loss: 0.5855 - tp: 76.0000 - fp: 37.0000 - tn: 139.0000 - fn: 48.0000 - sen: 0.6129 - prc: 0.6726 - auroc: 0.8179 - acc: 0.7167 
* (Adam = 0.0001) loss: 0.5823 - tp: 82.0000 - fp: 36.0000 - tn: 140.0000 - fn: 42.0000 - sen: 0.6613 - prc: 0.6949 - auroc: 0.8215 - acc: 0.7400
    * Holdout: AUROC: 82.22635984420776,
Sensitivity/Recall: 82.11382031440735, Precision: 60.8433723449707, F1: 69.89619271499818
* (Adam = 0.0001) loss: 0.5917 - tp: 85.0000 - fp: 47.0000 - tn: 129.0000 - fn: 39.0000 - sen: 0.6855 - prc: 0.6439 - auroc: 0.8072 - acc: 0.7133
* (Adam = 0.0001) loss: 0.5804 - tp: 75.0000 - fp: 28.0000 - tn: 148.0000 - fn: 49.0000 - sen: 0.6048 - prc: 0.7282 - auroc: 0.8176 - acc: 0.7433 

## (20) (15) but with Transformer-front TextCNN

In [None]:
#total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
#total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

In [None]:
input1 = tf.keras.Input(shape=(width,)) 
nl = TokenAndPositionPreTrainedEmbedding(width, vocabulary_size, embedding_dimension, embedding_matrix, trainable = False)(input1) 
nl = TransformerBlock(embedding_dimension, 10, embedding_dimension)(nl)
nl = tf.keras.layers.Dropout(0.50)(nl) 
kernels = [3, 5, 7, 10] 
pooled = [] 
for kernel_size in kernels: 
    mini_layer = tf.keras.layers.Conv1D(256, kernel_size, activation='relu')(nl) 
    mini_pooled = tf.keras.layers.MaxPooling1D(width - kernel_size + 1)(mini_layer) 
    pooled.append(mini_pooled) 
nl = tf.keras.layers.Concatenate(axis=1)(pooled) 
nl = tf.keras.layers.Flatten()(nl) 
nl = tf.keras.layers.Dropout(0.50)(nl) 
nl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl) 

input2 = tf.keras.Input(shape=(structuredArray.shape[1],))
sl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(input2)

combo = tf.keras.layers.Concatenate()([nl, sl])
nl = combo
nl = tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl) 
nl = tf.keras.layers.Dropout(0.5)(nl) 
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
output = tf.keras.layers.Dense(1, activation='sigmoid')(nl)

#model = tf.keras.Model(inputs = input1, outputs = output)
#model = tf.keras.Model(inputs = input2, outputs = output)
model = tf.keras.Model(inputs = (input1, input2), outputs = output)

model.summary()

metrics = [
    tf.keras.metrics.TruePositives(name='tp'), 
    tf.keras.metrics.FalsePositives(name='fp'), 
    tf.keras.metrics.TrueNegatives(name='tn'), 
    tf.keras.metrics.FalseNegatives(name='fn'), 
    tf.keras.metrics.Recall(name='sen'), 
    tf.keras.metrics.Precision(name='prc'), 
    tf.keras.metrics.AUC(name='auroc'), 
    tf.keras.metrics.BinaryAccuracy(name='acc')
]
callbacks=[
    tf.keras.callbacks.EarlyStopping(patience=4, verbose=1, restore_best_weights=True, min_delta=0.0001), 
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.1, patience=2, verbose=1)
]
model.compile(optimizer=tf.keras.optimizers.Adam(0.0001), loss='binary_crossentropy', metrics=metrics)
model.fit(train_dataset, epochs=80, validation_data=validation_dataset, verbose=1, callbacks=callbacks)
model.evaluate(validation_dataset)
model.save_weights('pubmed_combo20_lowva_weights-XXXX')

Results:
* (Adam = 0.0001) loss: 0.5756 - tp: 81.0000 - fp: 35.0000 - tn: 141.0000 - fn: 43.0000 - sen: 0.6532 - prc: 0.6983 - auroc: 0.8010 - acc: 0.7400 
* (Adam = 0.0001) loss: 0.5715 - tp: 82.0000 - fp: 41.0000 - tn: 135.0000 - fn: 42.0000 - sen: 0.6613 - prc: 0.6667 - auroc: 0.8172 - acc: 0.7233 
    * Holdout: AUROC: 79.8768937587738, Sensitivity/Recall: 78.86179089546204, Precision: 57.73809552192688, F1: 66.66666766968527
* (Adam = 0.0001) loss: 0.6085 - tp: 76.0000 - fp: 34.0000 - tn: 142.0000 - fn: 48.0000 - sen: 0.6129 - prc: 0.6909 - auroc: 0.7843 - acc: 0.7267 

# Loading Model Weights to Evaluate on Holdout
To re-run saved model (assuming you used ```save_weights``` path as described & used above), first re-create the model architecture making sure to sub out ```TokenAndPositionPreTrainedEmbedding``` for ```TokenAndPositionEmbedding```. Then, use ```load_weights``` for the resulting model object to load the prefix (can ignore the ```.index``` and ```.data-00000-of-000001``` extensions). 

In [None]:
# model path
model_path = 'pubmed_combo20_lowva_weights-8172'
shutil.copy("/content/drive/Shared drives/clinicalmodels/tf models/"+model_path+".data-00000-of-00001", model_path + ".data-00000-of-00001")
shutil.copy("/content/drive/Shared drives/clinicalmodels/tf models/"+model_path+".index", model_path+".index")

# load appropriate tf.data dataset
#total_input = tf.data.Dataset.from_tensor_slices(structuredArray)
#total_input = tf.data.Dataset.from_tensor_slices(tokenArray)
total_input = tf.data.Dataset.from_tensor_slices((tokenArray, structuredArray))
total_output = tf.data.Dataset.from_tensor_slices(outputArray)
total_dataset = tf.data.Dataset.zip((total_input, total_output))
train_dataset = total_dataset.skip(600).shuffle(1000).batch(15)
validation_dataset = total_dataset.skip(300).take(300).batch(15)
test_dataset = total_dataset.take(300).batch(15)

# set up model -- replace code with original model
# (20) (15) but with Transformer-front TextCNN
input1 = tf.keras.Input(shape=(width,)) 
#nl = TokenAndPositionPreTrainedEmbedding(width, vocabulary_size, embedding_dimension, embedding_matrix, trainable = False)(input1) 
#replace above line 
nl = TokenAndPositionEmbedding(width, vocabulary_size, embedding_dimension, trainable = False)(input1) 
nl = TransformerBlock(embedding_dimension, 10, embedding_dimension)(nl)
nl = tf.keras.layers.Dropout(0.50)(nl) 
kernels = [3, 5, 7, 10] 
pooled = [] 
for kernel_size in kernels: 
    mini_layer = tf.keras.layers.Conv1D(256, kernel_size, activation='relu')(nl) 
    mini_pooled = tf.keras.layers.MaxPooling1D(width - kernel_size + 1)(mini_layer) 
    pooled.append(mini_pooled) 
nl = tf.keras.layers.Concatenate(axis=1)(pooled) 
nl = tf.keras.layers.Flatten()(nl) 
nl = tf.keras.layers.Dropout(0.50)(nl) 
nl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl) 

input2 = tf.keras.Input(shape=(structuredArray.shape[1],))
sl = tf.keras.layers.Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(input2)

combo = tf.keras.layers.Concatenate()([nl, sl])
nl = combo
nl = tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl) 
nl = tf.keras.layers.Dropout(0.5)(nl) 
nl = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2())(nl)
output = tf.keras.layers.Dense(1, activation='sigmoid')(nl)

#model = tf.keras.Model(inputs = input1, outputs = output)
#model = tf.keras.Model(inputs = input2, outputs = output)
model = tf.keras.Model(inputs = (input1, input2), outputs = output)

# load the weights
model.load_weights(model_path)

The model should then be evaluating for decision thresholds every 0.05 to identify the decision threshold which achieves the best F1 score on the validation dataset. 

In [None]:
thresholds = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9]
maxf1threshold = 0.0
maxf1 = 0.0
maxResults = []
for threshold in thresholds:
    print('\nThreshold:', threshold)
    metrics = [tf.keras.metrics.TruePositives(name='tp', thresholds=threshold), 
                tf.keras.metrics.FalsePositives(name='fp', thresholds=threshold), 
                tf.keras.metrics.TrueNegatives(name='tn', thresholds=threshold), 
                tf.keras.metrics.FalseNegatives(name='fn', thresholds=threshold), 
                tf.keras.metrics.Recall(name='sen', thresholds=threshold), 
                tf.keras.metrics.Precision(name='prc', thresholds=threshold), 
                tf.keras.metrics.AUC(name='auroc', curve='ROC'), 
                tf.keras.metrics.AUC(name='auprc', curve='PR'), ]
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)

    loss, tp, fp, tn, fn, sens, prec, auroc, auprc = model.evaluate(validation_dataset)
    if sens * prec == 0.0:
        f1 = 0.0
    else:
        f1 = 2/(1/sens + 1/prec)

    if f1 > maxf1:
        maxf1threshold = threshold
        maxf1 = f1
        maxResults = [tp, fp, tn, fn, sens, prec]

print('\nBest threshold on Test Set:', maxf1threshold)
print('Best F1:', maxf1*100)
print('True Positives:', maxResults[0])
print('False Positives:', maxResults[1])
print('True Negatives:', maxResults[2])
print('False Negatives:', maxResults[3])
print('Sensitivity/Recall:', maxResults[4]*100)
print('Precision:', maxResults[5]*100)

Using the decision threshold from the last step, the model is then  evaluated against the holdout set. The predictions from the evaluation are also saved (for use in AUROC / AUPRC curve construction later).

In [None]:
metrics = [tf.keras.metrics.TruePositives(name='tp', thresholds=maxf1threshold), 
            tf.keras.metrics.FalsePositives(name='fp', thresholds=maxf1threshold), 
            tf.keras.metrics.TrueNegatives(name='tn', thresholds=maxf1threshold), 
            tf.keras.metrics.FalseNegatives(name='fn', thresholds=maxf1threshold), 
            tf.keras.metrics.Recall(name='sens', thresholds=maxf1threshold), 
            tf.keras.metrics.Precision(name='prec', thresholds=maxf1threshold), 
            tf.keras.metrics.AUC(name='auroc', curve='ROC'), 
            tf.keras.metrics.AUC(name='auprc', curve='PR'), ]
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=metrics)
loss, tp, fp, tn, fn, sens, prec, auroc, auprc = model.evaluate(test_dataset)
print('AUROC:', auroc*100)
print('Sensitivity/Recall:', sens*100)
print('Precision:', prec*100)
if sens * prec == 0.0:
    f1 = 0.0
    print('F1 not applicable')
else:
    f1 = 2/(1/sens + 1/prec)
    print('F1:', f1*100)
print('True Positives', tp)
print('False Positives', fp)
print('True Negatives', tn)
print('False Negatives', fn)

This model can then be used to generate and save predictions on the test set which can then be used to produce ROC and PRC curves. Note: be sure to remember to only use the line that corresponds to the input expected by your model

In [None]:
#ypred = model.predict(structuredArray[0:300], batch_size=15) # structured model
ypred = model.predict(tokenArray[0:300], batch_size=15) # unstructured model
#ypred = model.predict([tokenArray[0:300], structuredArray[0:300]], batch_size=15) # combo model
yreal = outputArray[0:300]
print(ypred.shape, yreal.shape)
with open(model_path+'.csv', 'w') as f:
    w = csv.writer(f)
    for i, j in zip(ypred, yreal):
        _ = w.writerow([i[0], j[0]])

In [None]:
shutil.copy(model_path+".csv", "/content/drive/Shared drives/clinicalmodels/tf models/"+model_path+".csv")