In [1]:
from tensorflow import keras
import tensorflow as tf
from keras import Input, Model, Sequential
from keras.layers import Flatten, Dense, Embedding, Concatenate, Conv1D, Lambda, Conv2D
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Embedding, LSTM, Input, Concatenate, Flatten, Dot, Dropout
import keras.backend as K
from tensorflow.keras.metrics import AUC
import numpy as np
import glob
from tensorflow.keras.models import load_model

In [2]:
from tensorflow.keras import layers
def conv_vit(num_layers, num_heads, dropout_rate):
    size = 4 * 4 * 64
    
    inputs = keras.Input(shape=(20, 8, 8, 34))

    # Convolutional patch embedding
    x = Conv2D(filters=16, kernel_size=(5, 5), activation='relu', padding='same')(inputs)
    x = Conv2D(filters=16, kernel_size=(4, 4), activation='relu', padding='same')(x)
    x = Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same')(x)
    x = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(x)
    x = Conv2D(filters=64, kernel_size=(2, 2), activation='relu')(x)
    x = Conv2D(filters=64, kernel_size=(2, 2), activation='relu')(x)    
    x = layers.Reshape((20, 4 * 4 * 64))(x)

    # Positional embedding
    pos_emb = layers.Embedding(input_dim=20, output_dim=size)(K.constant(np.asarray(range(0, 20))))
    pos_emb = tf.expand_dims(pos_emb, axis=0)
    pos_emb = tf.tile(pos_emb, [tf.shape(inputs)[0], 1, 1])  # Tile the tensor to match the desired batch size    
    x = layers.Add()([x, pos_emb])
    
    

    # Transformer Encoder
    for _ in range(num_layers):
        # Multi-Head Self-Attention
        y = layers.LayerNormalization(epsilon=1e-6)(x)
        y = layers.MultiHeadAttention(num_heads=num_heads, key_dim=size, dropout=dropout_rate)(y, y)
        y = layers.Add()([x, y])

        # Feed Forward Neural Network
        x = layers.LayerNormalization(epsilon=1e-6)(y)
        x = layers.Dense(1024, activation='relu')(x)
        x = layers.Dense(1024, activation='relu')(x)
        x = layers.Add()([y, x])

    # Classifier head
    x = layers.Dropout(dropout_rate)(x)
    
    x = Flatten()(x)
    outputs = Dense(1, activation='sigmoid')(x)

    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        loss=keras.losses.BinaryCrossentropy(),
        optimizer=keras.optimizers.Adam(learning_rate=0.0001),
        metrics=['accuracy', AUC(curve='ROC')],
    )
    return model

In [3]:
model = conv_vit(3, 16, 0.05)

2023-06-25 17:35:43.740482: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-25 17:35:53.313537: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10405 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:65:00.0, compute capability: 6.1


In [3]:
def replace_last(string, word_to_replace, replacement):
    split_string = string.rsplit(word_to_replace, 1)
    return replacement.join(split_string)

In [4]:
import gc
#Function to clean keras cache to allow calling the 'fit' function multiple times
def reset_keras(model):
    sess = K.get_session()
    K.clear_session()
    sess.close()
    sess = K.get_session()

    try:
        del model
    except:
        pass

    gc.collect()

In [5]:
files = glob.glob('/sise/liorrk-group/DataSets/Datamining/xs_data/xs*')
#First data file used for test set
Xs_test = np.load(files[0], allow_pickle=True) #Use first file for testing
ys_test = np.load(replace_last(files[0], 'xs', 'ys'), allow_pickle=True)

print(Xs_test.shape)
print(ys_test.shape)

def model_train_epoch():
    #Train set too big to load all at once, load each file separately and train, skip first file as it is for testing
    global model
    for file in files[1:]:
        Xs_train = np.load(file, allow_pickle=True)
        ys_train = np.load(replace_last(file, 'xs', 'ys'), allow_pickle=True)
        print(Xs_train.shape)
        print(ys_train.shape)
        #Reload model before each 'fit' call due to keras memory leak
        model.save("my_model")
        reset_keras(model)    
        model = load_model("my_model")
        
        model.fit(Xs_train, ys_train, epochs=1, batch_size=64)
        del Xs_train
        del ys_train
        gc.collect()    
    score = model.evaluate(Xs_test, ys_test)
    print(f'Test set evaluation: {score}')

(195572, 20, 8, 8, 34)
(195572,)


In [None]:
epoch_count = 10
for i in range(epoch_count):
    model_train_epoch()

In [6]:
model = load_model("my_model")

2023-06-25 18:10:35.233952: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-25 18:10:35.692460: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 46503 MB memory:  -> device: 0, name: NVIDIA RTX 6000 Ada Generation, pci bus id: 0000:61:00.0, compute capability: 8.9


In [7]:
from sklearn.metrics import roc_auc_score, average_precision_score

In [8]:
y_pred = model.predict(Xs_test)

auc = roc_auc_score(ys_test, y_pred)
print("AUC:", auc)

auc_pr = average_precision_score(ys_test, y_pred)
print("AUC-PR:", auc_pr)

2023-06-25 18:11:52.791571: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2023-06-25 18:12:03.188228: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8401
2023-06-25 18:12:04.637999: I tensorflow/stream_executor/cuda/cuda_blas.cc:1760] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


AUC: 0.5972911886234157
AUC-PR: 0.16229304492045582
