In [None]:
import os 
import pickle
import numpy as np
from os.path import join  

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.distribute import MirroredStrategy, OneDeviceStrategy

for device in tf.config.list_physical_devices("GPU"):
    tf.config.experimental.set_memory_growth(device, True)

from utils import *

datasets = join(os.getcwd(), "datasets")
preprocessed_datasets = join(datasets, "preprocessed")

X_all, y_all, X_test = pickle.load(open(join(preprocessed_datasets, "cleaned_separated.pickle"), 'rb'))

strategy = OneDeviceStrategy("CPU")

In [None]:
def train_test_tuple_split(X, y, train_size, seed=None):
    def apply_tuple_split(X, split):
        train, test = [], []
        for x in X:
            train.append(x[:split])
            test.append(x[split:])
        return tuple(train), tuple(test)
    
    np.random.seed(seed)
    
    indices_ = np.arange(len(y))
    np.random.shuffle(indices_)
    
    splitter_indice = int(len(y) * train_size)
    train_indices, test_indices = np.split(indices_, [splitter_indice])
    
    X_train, X_test = apply_tuple_split(X, splitter_indice)
    y_train, y_test = y[train_indices], y[test_indices]
    
    return X_train, X_test, y_train, y_test
    

X_train, X_val, y_train, y_val = train_test_tuple_split(X_all, y_all, .75, seed=42)

In [None]:
with strategy.scope():
    
    conv_len = X_train[0].shape[-1]
    vp_len = X_train[1].shape[-1]
    vocab_dim = int(np.unique(np.r_[X_train[1], X_test[1]]).max())+1
    
    conv_input = tf.keras.layers.Input(shape=(conv_len,), name="conv_layer_input")
    conv_reshaped = tf.keras.layers.Reshape((-1, 1), name="conv_reshaped_2D")(conv_input)
    conv_output = tf.keras.layers.Conv1D(2, kernel_size=5, activation='relu', name="conv_layer")(conv_reshaped)
    flatten_conv = tf.keras.layers.Flatten()(conv_output)
    
    vp_input = tf.keras.layers.Input(shape=(vp_len,))
    embedded = tf.keras.layers.Embedding(vocab_dim, 128, input_length=vp_len)(vp_input)
    dense_emb = tf.keras.layers.Dense(1, activation='relu')(embedded)
    flatten_emb = tf.keras.layers.Flatten()(dense_emb)
    
    concat = tf.keras.layers.Concatenate()([flatten_conv, flatten_emb])
    
    output = tf.keras.layers.Dense(1, kernel_initializer='glorot_uniform', kernel_regularizer=tf.keras.regularizers.L1(1e-2), activation='sigmoid')(concat)
    
    model = tf.keras.models.Model(inputs=[conv_input, vp_input], outputs=output)
    
    model.compile(
        loss=tf.keras.losses.BinaryCrossentropy(),
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
        metrics=[bcr, p1, p2, m1, m2, "accuracy"]
    )

callbacks = [
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_bcr', factor=.5, patience=10, verbose=2),
    BCREarlyStopping(patience=30, restore_best_weights=True)
]

model.summary()

In [None]:
with strategy.scope():
    history = model.fit(X_train, y_train, 
                        validation_data=(X_val, y_val),
                        batch_size=120,
                        epochs=200,
                        callbacks=callbacks
                       )

In [None]:
report = Report(model, X_train, y_train, X_val, y_val).to_stdout()