In [1]:
import sys
sys.path.append('..')

from scripts.global_funcs import load_data_config, load_model_config, get_num_of_classes
from glob import glob
import os



import nvtabular as nvt
import dask_cudf

from nvtabular.utils import device_mem_size

from dask_cuda import LocalCUDACluster
from dask.distributed import Client

import shutil

# import rmm
import pathlib
import numpy as np

In [2]:
model_config = load_model_config()
model_config

{'preblock_filters': 32,
 'preblock_kernel_sizes': [7],
 'preblock_pool_size': 3,
 'idblock_kernel_sizes': [5],
 'idblock_filters': [32, 64, 128],
 'idblock_activation': 'gelu',
 'idblock_avg_pool_sizes': [11],
 'last_activation': 'softmax',
 'batch_size': 4096,
 'learn_rate': 0.0005,
 'epochs': 300,
 'patience': 299,
 'num_warmup_epochs': 2,
 'warmup_lr_multiplier': 0.001,
 'from_logits': False,
 'TF_MEMORY_ALLOCATION': '0.8',
 'TF_VISIBLE_DEVICE': '0',
 'TF_FORCE_GPU_ALLOW_GROWTH': 'true',
 'device_spill_frac': 0.15,
 'protocol': 'tcp',
 'visible_devices': '0',
 'enable_tcp_over_ucx': False,
 'enable_nvlink': False,
 'enable_infiniband': False,
 'rmm_pool_size': '1GB',
 'clear_models_dirs': True,
 'clear_tensorboard': True}

In [3]:
preblock_filters = model_config['preblock_filters']
preblock_kernel_sizes = model_config['preblock_kernel_sizes']
preblock_pool_size = model_config['preblock_pool_size']
idblock_kernel_sizes = model_config['idblock_kernel_sizes']
idblock_filters = model_config['idblock_filters']
idblock_activation = model_config['idblock_activation']
idblock_avg_pool_sizes = model_config['idblock_avg_pool_sizes']
last_activation = model_config['last_activation']
batch_size = model_config['batch_size']
learn_rate = model_config['learn_rate']
epochs = model_config['epochs']
patience = model_config['patience']
clear_models_dirs = model_config['clear_models_dirs']
clear_tensorboard = model_config['clear_tensorboard']
num_warmup_epochs = model_config['num_warmup_epochs']
warmup_lr_multiplier = model_config['warmup_lr_multiplier']
from_logits = model_config['from_logits']

In [4]:
os.environ["TF_MEMORY_ALLOCATION"] = model_config['TF_MEMORY_ALLOCATION']  # fraction of free memory
os.environ["TF_VISIBLE_DEVICE"] = model_config['TF_VISIBLE_DEVICE'] 
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = model_config['TF_FORCE_GPU_ALLOW_GROWTH']

In [5]:
import tensorflow as tf
from scripts.cosme_model import COSMELayer
from nvtabular.loader.tensorflow import KerasSequenceLoader, KerasSequenceValidater

2022-05-26 01:31:25.124006: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-26 01:31:25.124291: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-26 01:31:25.124402: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [6]:
config = load_data_config()
config

{'clean_fasta_file': '/media/jcosme/Data/MarRef_parquet_10_cats',
 'output_dir': '/media/jcosme/Data',
 'project_name': 'small_mer_1',
 'base_col_names': ['seq', 'label'],
 'label_col_name': 'label',
 'input_col_name': 'seq',
 'label_regex': '(?:[^a-zA-Z0-9]+)([a-zA-Z]+[0-9]+)(?:[^a-zA-Z0-9]+)',
 'k_mer': 1,
 'possible_gene_values': ['A', 'C', 'G', 'T'],
 'max_seq_len': 150,
 'data_splits': {'train': 0.9, 'val': 0.05, 'test': 0.05},
 'random_seed': 42,
 'fasta_sep': '>',
 'unq_labs_dir': '/media/jcosme/Data/small_mer_1/data/unq_labels',
 'unq_labs_dir_csv': '/media/jcosme/Data/small_mer_1/data/unq_labels.csv',
 'data_dir': '/media/jcosme/Data/small_mer_1/data/small_mer_1',
 'nvtab_dir': '/media/jcosme/Data/small_mer_1/nvtab',
 'dask_dir': '/media/jcosme/Data/small_mer_1/dask',
 'tensorboard_dir': '/media/jcosme/Data/small_mer_1/tensorboard',
 'model_checkpoints_dir': '/media/jcosme/Data/small_mer_1/checkpoints/model_checkpoints',
 'model_checkpoints_parent_dir': '/media/jcosme/Data/sma

In [7]:
nvtab_dir = config['nvtab_dir']
label_col_name = config['label_col_name']
input_col_name = config['input_col_name']
dask_dir = config['dask_dir']
tensorboard_dir = config['tensorboard_dir']
model_checkpoints_dir = config['model_checkpoints_dir']
model_weights_dir = config['model_weights_dir']
max_seq_len = config['max_seq_len']
model_checkpoints_parent_dir = config['model_checkpoints_parent_dir']
possible_gene_values = config['possible_gene_values']
val_split = config['data_splits']['val']

In [8]:
if clear_models_dirs:
    try:
        shutil.rmtree(model_checkpoints_dir)
    except:
        pass
    try:
        shutil.rmtree(model_weights_dir)
    except:
        pass
    try:
        shutil.rmtree(model_checkpoints_parent_dir)
    except:
        pass
    
    
if clear_tensorboard: 
    try:
        shutil.rmtree(tensorboard_dir)
    except:
        pass


In [9]:
n_classes = get_num_of_classes()

In [10]:
# define some information about where to get our data
dask_workdir = pathlib.Path(nvtab_dir, "dask", "workdir")
stats_path = pathlib.Path(nvtab_dir, "dask", "stats")

# Make sure we have a clean worker space for Dask
if pathlib.Path.is_dir(dask_workdir):
    shutil.rmtree(dask_workdir)
dask_workdir.mkdir(parents=True)

# Make sure we have a clean stats space for Dask
if pathlib.Path.is_dir(stats_path):
    shutil.rmtree(stats_path)
stats_path.mkdir(parents=True)

cluster = LocalCUDACluster(
    protocol=model_config['protocol'],
    CUDA_VISIBLE_DEVICES=model_config['visible_devices'],
    device_memory_limit=device_mem_size(kind="total") * model_config['device_spill_frac'],
    enable_tcp_over_ucx=model_config['enable_tcp_over_ucx'],
    enable_nvlink=model_config['enable_nvlink'],
    enable_infiniband=model_config['enable_infiniband'],
    # rmm_pool_size=model_config['rmm_pool_size'],
    local_directory=dask_workdir,
)

client = Client(cluster)

# def _rmm_pool():
#     rmm.reinitialize(
#         pool_allocator=True,
#         initial_pool_size=None,  # Use default size
#     )
    
# client.run(_rmm_pool)
client

  f"A CUDA context for device {ctx} already exists on process ID "
distributed.preloading - INFO - Import preload module: dask_cuda.initialize


0,1
Connection method: Cluster object,Cluster type: dask_cuda.LocalCUDACluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 1
Total threads: 1,Total memory: 31.21 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:34321,Workers: 1
Dashboard: http://127.0.0.1:8787/status,Total threads: 1
Started: Just now,Total memory: 31.21 GiB

0,1
Comm: tcp://192.168.0.190:44141,Total threads: 1
Dashboard: http://192.168.0.190:44085/status,Memory: 31.21 GiB
Nanny: tcp://127.0.0.1:32869,
Local directory: /media/jcosme/Data/small_mer_1/nvtab/dask/workdir/dask-worker-space/worker-61zj79_y,Local directory: /media/jcosme/Data/small_mer_1/nvtab/dask/workdir/dask-worker-space/worker-61zj79_y
GPU: NVIDIA GeForce RTX 3080 Laptop GPU,GPU memory: 16.00 GiB


In [11]:
TRAIN_PATHS = sorted(glob(f"{nvtab_dir}/train/*.parquet"))
VAL_PATHS = sorted(glob(f"{nvtab_dir}/val/*.parquet"))

In [12]:
# feed them to our datasets
train_dataset = KerasSequenceLoader(
    nvt.Dataset(TRAIN_PATHS, part_size="10MB"), # you could also use a glob pattern
    batch_size=batch_size,
    label_names=[label_col_name],
    cat_names=[input_col_name],
    shuffle=True,
    buffer_size=0.001,  # amount of data, as a fraction of GPU memory, to load at once,
    device=0,
    parts_per_chunk=1,
    engine="parquet",
)


  f"Row group memory size ({rg_byte_size_0}) (bytes) of parquet file is bigger"


In [13]:
valid_dataset = KerasSequenceLoader(
    nvt.Dataset(VAL_PATHS, part_size="10MB"),   # you could also use a glob pattern
    batch_size=int(batch_size*val_split),
    label_names=[label_col_name],
    cat_names=[input_col_name],
    shuffle=False,
    buffer_size=0.001,  # amount of data, as a fraction of GPU memory, to load at once,
    device=0,
    parts_per_chunk=1,
    engine="parquet",
)

In [14]:
# batch = next(iter(train_dataset))

In [15]:
# tf.squeeze(batch[0]['seq'][1])

In [16]:
# dir(train_dataset)

In [17]:
@tf.function
def preprocess_data(inputs, targets):
    out = tf.RaggedTensor.from_row_lengths(tf.squeeze(inputs[input_col_name][0]),
                                                                  tf.squeeze(inputs[input_col_name][1]),
                                                                 ).to_tensor(max_seq_len)
    # out = tf.cast(out, tf.float32)
    out = tf.expand_dims(out, 1)
    out = tf.expand_dims(out, -1)
    
    # return out, targets
    
    tars = tf.one_hot(targets, n_classes) 
    tars = tf.cast(tars, tf.float32)
    tars = tf.squeeze(tars, axis=1)
    
    return out, tars


    

In [18]:
# inputs = batch[0]
# targets = batch[1]

In [19]:
# tf.squeeze(inputs[input_col_name][0])

In [20]:
# make_batches(*batch)

In [21]:
train_dataset = train_dataset.map(preprocess_data)
valid_dataset = valid_dataset.map(preprocess_data)

In [22]:
# batch = next(iter(train_dataset))

In [23]:
# train_dataset_reshaped = tf.RaggedTensor.from_row_lengths(train_dataset[0][input_col_name][0], train_dataset[0][input_col_name][1]).to_tensor(max_seq_len)

In [24]:
inputs = tf.keras.Input(name=f"{input_col_name}", dtype=tf.float32, shape=(1, max_seq_len, 1,))

inputs = tf.cast(inputs, tf.float32)

inputs = tf.math.multiply(inputs, 1/len(possible_gene_values))

2022-05-26 01:31:28.458927: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-05-26 01:31:28.459483: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-26 01:31:28.459663: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-26 01:31:28.459796: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so retur

In [25]:
# first_filter_size = 64
# block_reps = [3, 4, 6, 3]
# activation = 'relu'
# gen_filter_size = 12
# bn_momen = 0.5

# outputs = tf.keras.layers.Conv2D(first_filter_size, (1, 7), padding='same', strides=1)(inputs)
# outputs = tf.keras.layers.BatchNormalization(momentum=bn_momen)(outputs)
# outputs = tf.keras.layers.Activation(activation)(outputs)
# outputs = tf.keras.layers.MaxPool2D(pool_size=(1,3), padding='same', strides=1)(outputs)

# cur_filter_size = first_filter_size
# for br_i, block_rep in enumerate(block_reps):
#     # print(block_rep)
#     for r_i in np.arange(block_rep):
#         # print(f"\t{r_i}")
        
#         if r_i == 0:
#             # print(f"\t\t{cur_filter_size * 4}")
#             skip_layer = outputs
#             skip_layer = tf.keras.layers.Conv2D(cur_filter_size*4, (1, 1), padding='same', strides=1)(skip_layer)
#             skip_layer = tf.keras.layers.BatchNormalization(momentum=bn_momen)(skip_layer)
#         else:
#             skip_layer = outputs
        
#         outputs = tf.keras.layers.Conv2D(cur_filter_size, (1, 1), padding='same', strides=1)(outputs)
#         outputs = tf.keras.layers.BatchNormalization(momentum=bn_momen)(outputs)
#         outputs = tf.keras.layers.Activation(activation)(outputs)
        
#         outputs = tf.keras.layers.Conv2D(cur_filter_size, (1, gen_filter_size), padding='same', strides=1)(outputs)
#         outputs = tf.keras.layers.BatchNormalization(momentum=bn_momen)(outputs)
        
#         outputs = tf.keras.layers.Conv2D(cur_filter_size * 4, (1, 1), padding='same', strides=1)(outputs)
#         outputs = tf.keras.layers.BatchNormalization(momentum=bn_momen)(outputs)
        
#         outputs = tf.keras.layers.Add()([outputs, skip_layer])
#         outputs = tf.keras.layers.Activation(activation)(outputs)
        
#     cur_filter_size *= 2

# outputs = tf.keras.layers.GlobalAveragePooling2D()(outputs)
# outputs = tf.keras.layers.Dense(n_classes)(outputs)
# outputs = tf.keras.layers.Activation('sigmoid')(outputs)


In [26]:
# cosme_model = tf.keras.Model(inputs=inputs, outputs=outputs)

In [27]:
# cosme_model(batch[0])

In [28]:
resblock_filters = [16, 32, 64]
bn_momen = 0.1
activation = 'relu'

preblock_a = tf.keras.layers.Conv1D(16, 3, padding='same', strides=1)(inputs)
preblock_a = tf.keras.layers.BatchNormalization(momentum=bn_momen)(preblock_a)
preblock_a = tf.keras.layers.Activation(activation)(preblock_a)
# preblock_a = tf.keras.layers.BatchNormalization(momentum=bn_momen)(preblock_a)
preblock_a = tf.keras.layers.MaxPool2D(pool_size=(1,3), padding='same', strides=1)(preblock_a)


resblock_a_kernel = 3
resblock_a_1_skip = tf.keras.layers.Conv1D(resblock_filters[0], 1, padding='same', strides=1)(preblock_a)
resblock_a_1 = tf.keras.layers.Conv1D(resblock_filters[0], resblock_a_kernel, padding='same', strides=1)(resblock_a_1_skip)
resblock_a_1 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_a_1)
resblock_a_1 = tf.keras.layers.Activation(activation)(resblock_a_1)
# resblock_a_1 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_a_1)
resblock_a_1 = tf.keras.layers.Conv1D(resblock_filters[0], resblock_a_kernel, padding='same', strides=1)(resblock_a_1)
resblock_a_1 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_a_1)
resblock_a_1 = tf.keras.layers.Add()([resblock_a_1, resblock_a_1_skip])
resblock_a_1 = tf.keras.layers.Activation(activation)(resblock_a_1)
# resblock_a_1 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_a_1)

resblock_a_2_skip = tf.keras.layers.Conv1D(resblock_filters[1], 1, padding='same', strides=1)(resblock_a_1)
resblock_a_2 = tf.keras.layers.Conv1D(resblock_filters[1], resblock_a_kernel, padding='same', strides=1)(resblock_a_2_skip)
resblock_a_2 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_a_2)
resblock_a_2 = tf.keras.layers.Activation(activation)(resblock_a_2)
# resblock_a_2 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_a_2)
resblock_a_2 = tf.keras.layers.Conv1D(resblock_filters[1], resblock_a_kernel, padding='same', strides=1)(resblock_a_2)
resblock_a_2 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_a_2)
resblock_a_2 = tf.keras.layers.Add()([resblock_a_2, resblock_a_2_skip])
resblock_a_2 = tf.keras.layers.Activation(activation)(resblock_a_2)
# resblock_a_2 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_a_1)

resblock_a_3_skip = tf.keras.layers.Conv1D(resblock_filters[2], 1, padding='same', strides=1)(resblock_a_2)
resblock_a_3 = tf.keras.layers.Conv1D(resblock_filters[2], resblock_a_kernel, padding='same', strides=1)(resblock_a_3_skip)
resblock_a_3 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_a_3)
resblock_a_3 = tf.keras.layers.Activation(activation)(resblock_a_3)
# resblock_a_3 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_a_3)
resblock_a_3 = tf.keras.layers.Conv1D(resblock_filters[2], resblock_a_kernel, padding='same', strides=1)(resblock_a_3)
resblock_a_3 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_a_3)
resblock_a_3 = tf.keras.layers.Add()([resblock_a_3, resblock_a_3_skip])
resblock_a_3 = tf.keras.layers.Activation(activation)(resblock_a_3)
# resblock_a_3 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_a_3)

# resblock_a_3 = tf.keras.layers.AveragePooling2D(pool_size=(1, 2), strides=1, padding='valid')(resblock_a_3)

resblock_b_kernel = 5
resblock_b_1_skip = tf.keras.layers.Conv1D(resblock_filters[0], 1, padding='same', strides=1)(preblock_a)
resblock_b_1 = tf.keras.layers.Conv1D(resblock_filters[0], resblock_b_kernel, padding='same', strides=1)(resblock_b_1_skip)
resblock_b_1 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_b_1)
resblock_b_1 = tf.keras.layers.Activation(activation)(resblock_b_1)
# resblock_b_1 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_b_1)
resblock_b_1 = tf.keras.layers.Conv1D(resblock_filters[0], resblock_b_kernel, padding='same', strides=1)(resblock_b_1)
resblock_b_1 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_b_1)
resblock_b_1 = tf.keras.layers.Add()([resblock_b_1, resblock_b_1_skip])
resblock_b_1 = tf.keras.layers.Activation(activation)(resblock_b_1)
# resblock_b_1 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_b_1)

resblock_b_2_skip = tf.keras.layers.Conv1D(resblock_filters[1], 1, padding='same', strides=1)(resblock_b_1)
resblock_b_2 = tf.keras.layers.Conv1D(resblock_filters[1], resblock_b_kernel, padding='same', strides=1)(resblock_b_2_skip)
resblock_b_2 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_b_2)
resblock_b_2 = tf.keras.layers.Activation(activation)(resblock_b_2)
# resblock_b_2 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_b_2)
resblock_b_2 = tf.keras.layers.Conv1D(resblock_filters[1], resblock_b_kernel, padding='same', strides=1)(resblock_b_2)
resblock_b_2 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_b_2)
resblock_b_2 = tf.keras.layers.Add()([resblock_b_2, resblock_b_2_skip])
resblock_b_2 = tf.keras.layers.Activation(activation)(resblock_b_2)
# resblock_b_2 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_b_1)

resblock_b_3_skip = tf.keras.layers.Conv1D(resblock_filters[2], 1, padding='same', strides=1)(resblock_b_2)
resblock_b_3 = tf.keras.layers.Conv1D(resblock_filters[2], resblock_b_kernel, padding='same', strides=1)(resblock_b_3_skip)
resblock_b_3 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_b_3)
resblock_b_3 = tf.keras.layers.Activation(activation)(resblock_b_3)
# resblock_b_3 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_b_3)
resblock_b_3 = tf.keras.layers.Conv1D(resblock_filters[2], resblock_b_kernel, padding='same', strides=1)(resblock_b_3)
resblock_b_3 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_b_3)
resblock_b_3 = tf.keras.layers.Add()([resblock_b_3, resblock_b_3_skip])
resblock_b_3 = tf.keras.layers.Activation(activation)(resblock_b_3)
# resblock_b_3 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_b_3)

# resblock_b_3 = tf.keras.layers.AveragePooling2D(pool_size=(1, 2), strides=1, padding='valid')(resblock_b_3)

resblock_c_kernel = 7
resblock_c_1_skip = tf.keras.layers.Conv1D(resblock_filters[0], 1, padding='same', strides=1)(preblock_a)
resblock_c_1 = tf.keras.layers.Conv1D(resblock_filters[0], resblock_c_kernel, padding='same', strides=1)(resblock_c_1_skip)
resblock_c_1 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_c_1)
resblock_c_1 = tf.keras.layers.Activation(activation)(resblock_c_1)
# resblock_c_1 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_c_1)
resblock_c_1 = tf.keras.layers.Conv1D(resblock_filters[0], resblock_c_kernel, padding='same', strides=1)(resblock_c_1)
resblock_c_1 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_c_1)
resblock_c_1 = tf.keras.layers.Add()([resblock_c_1, resblock_c_1_skip])
resblock_c_1 = tf.keras.layers.Activation(activation)(resblock_c_1)
# resblock_c_1 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_c_1)

resblock_c_2_skip = tf.keras.layers.Conv1D(resblock_filters[1], 1, padding='same', strides=1)(resblock_c_1)
resblock_c_2 = tf.keras.layers.Conv1D(resblock_filters[1], resblock_c_kernel, padding='same', strides=1)(resblock_c_2_skip)
resblock_c_2 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_c_2)
resblock_c_2 = tf.keras.layers.Activation(activation)(resblock_c_2)
# resblock_c_2 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_c_2)
resblock_c_2 = tf.keras.layers.Conv1D(resblock_filters[1], resblock_c_kernel, padding='same', strides=1)(resblock_c_2)
resblock_c_2 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_c_2)
resblock_c_2 = tf.keras.layers.Add()([resblock_c_2, resblock_c_2_skip])
resblock_c_2 = tf.keras.layers.Activation(activation)(resblock_c_2)
# resblock_c_2 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_c_2)

resblock_c_3_skip = tf.keras.layers.Conv1D(resblock_filters[2], 1, padding='same', strides=1)(resblock_c_2)
resblock_c_3 = tf.keras.layers.Conv1D(resblock_filters[2], resblock_c_kernel, padding='same', strides=1)(resblock_c_3_skip)
resblock_c_3 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_c_3)
resblock_c_3 = tf.keras.layers.Activation(activation)(resblock_c_3)
# resblock_c_3 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_c_3)
resblock_c_3 = tf.keras.layers.Conv1D(resblock_filters[2], resblock_c_kernel, padding='same', strides=1)(resblock_c_3)
resblock_c_3 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_c_3)
resblock_c_3 = tf.keras.layers.Add()([resblock_c_3, resblock_c_3_skip])
resblock_c_3 = tf.keras.layers.Activation(activation)(resblock_c_3)
# resblock_c_3 = tf.keras.layers.BatchNormalization(momentum=bn_momen)(resblock_c_3)

# resblock_c_3 = tf.keras.layers.AveragePooling2D(pool_size=(1, 2), strides=1, padding='valid')(resblock_c_3)

                                                       
# resblock_d_kernel = 9
# resblock_d_1 = tf.keras.layers.Conv1D(resblock_filters[0], resblock_d_kernel, padding='same', strides=1)(preblock_a)
# # resblock_d_1 = tf.keras.layers.BatchNormalization()(resblock_d_1)
# resblock_d_1 = tf.keras.layers.Activation('gelu')(resblock_d_1)
# resblock_d_1 = tf.keras.layers.BatchNormalization()(resblock_d_1)
# resblock_d_1 = tf.keras.layers.Conv1D(resblock_filters[0], resblock_d_kernel, padding='same', strides=1)(resblock_d_1)
# # resblock_d_1 = tf.keras.layers.BatchNormalization()(resblock_d_1)
# resblock_d_1 = tf.keras.layers.Concatenate()([resblock_d_1, preblock_a])
# resblock_d_1 = tf.keras.layers.Activation('gelu')(resblock_d_1)
# resblock_d_1 = tf.keras.layers.BatchNormalization()(resblock_d_1)

# resblock_d_2 = tf.keras.layers.Conv1D(resblock_filters[1], resblock_d_kernel, padding='same', strides=1)(resblock_d_1)
# # resblock_d_2 = tf.keras.layers.BatchNormalization()(resblock_d_2)
# resblock_d_2 = tf.keras.layers.Activation('gelu')(resblock_d_2)
# resblock_d_2 = tf.keras.layers.BatchNormalization()(resblock_d_2)
# resblock_d_2 = tf.keras.layers.Conv1D(resblock_filters[1], resblock_d_kernel, padding='same', strides=1)(resblock_d_2)
# # resblock_d_2 = tf.keras.layers.BatchNormalization()(resblock_d_2)
# resblock_d_2 = tf.keras.layers.Concatenate()([resblock_d_2, resblock_d_1])
# resblock_d_2 = tf.keras.layers.Activation('gelu')(resblock_d_2)
# resblock_d_2 = tf.keras.layers.BatchNormalization()(resblock_d_1)

# resblock_d_3 = tf.keras.layers.Conv1D(resblock_filters[2], resblock_d_kernel, padding='same', strides=1)(resblock_d_2)
# # resblock_d_3 = tf.keras.layers.BatchNormalization()(resblock_d_3)
# resblock_d_3 = tf.keras.layers.Activation('gelu')(resblock_d_3)
# resblock_d_3 = tf.keras.layers.BatchNormalization()(resblock_d_3)
# resblock_d_3 = tf.keras.layers.Conv1D(resblock_filters[2], resblock_d_kernel, padding='same', strides=1)(resblock_d_3)
# # resblock_d_3 = tf.keras.layers.BatchNormalization()(resblock_d_3)
# resblock_d_3 = tf.keras.layers.Concatenate()([resblock_d_3, resblock_d_2])
# resblock_d_3 = tf.keras.layers.Activation('gelu')(resblock_d_3)
# resblock_d_3 = tf.keras.layers.BatchNormalization()(resblock_d_3)

# resblock_d_3 = tf.keras.layers.AveragePooling2D(pool_size=(1, 2), strides=1, padding='valid')(resblock_d_3)


outputs = tf.keras.layers.Concatenate()([resblock_a_3, resblock_b_3, resblock_c_3])#, resblock_d_3])
outputs = tf.keras.layers.GlobalAveragePooling2D()(outputs)
# outputs = tf.keras.layers.Flatten()(outputs)
outputs = tf.keras.layers.Dense(n_classes, activation='sigmoid')(outputs)


In [29]:
cosme_model = tf.keras.Model(inputs=inputs, outputs=outputs)

In [30]:
# # preproc = tf.keras.layers.Lambda(lambda x: tf.RaggedTensor.from_row_lengths(x[input_col_name][0], x[input_col_name][1]).to_tensor(max_seq_len))(inputs)
# preproc = tf.RaggedTensor.from_row_lengths(inputs[input_col_name][0], inputs[input_col_name][1]).to_tensor(max_seq_len)
# preproc = tf.reshape(preproc, [max_seq_len,])
# preproc = tf.expand_dims(preproc, 1)
# preproc = tf.expand_dims(preproc, 3)
# preproc = tf.math.multiply(preproc, 1/len(possible_gene_values))

In [31]:
# cosme_model = tf.keras.Model(inputs=inputs, outputs=preproc)

In [32]:
# cosme_model(batch[0])

In [33]:
# preproc = tf.keras.layers.Lambda(lambda x: x['seq'][0])(inputs)
# throw_way = tf.keras.layers.Lambda(lambda x: x['seq'][1])(inputs)
# shape = [tf.shape(throw_way)[k] for k in range(2)]
# preproc = tf.reshape(preproc, [shape[0], max_seq_len])
# preproc = tf.cast(preproc, tf.float32)
# preproc = tf.expand_dims(preproc, 0)
# preproc = tf.reshape(preproc, [shape[0], 1, max_seq_len])
# preproc = tf.math.multiply(preproc, 1/len(possible_gene_values))

In [34]:
# cosme_layer = COSMELayer(
#                 preblock_filters=preblock_filters,
#                 preblock_kernel_sizes=preblock_kernel_sizes,
#                 preblock_pool_size=preblock_pool_size,
#                 idblock_kernel_sizes=idblock_kernel_sizes,
#                 idblock_filters = idblock_filters,
#                 idblock_activation=idblock_activation,
#                 idblock_avg_pool_sizes=idblock_avg_pool_sizes,
#                 last_activation=last_activation,
#                 n_classes=n_classes,
#                 )
# output_layer = cosme_layer(inputs)

In [35]:
# cosme_model = tf.keras.Model(inputs=inputs, outputs=output_layer)

In [36]:
# cosme_model.layers[-1]._name = 'COSMELayer'

In [37]:
optimizer = tf.keras.optimizers.Adam(learning_rate=learn_rate, amsgrad=True)

In [38]:
# step = tf.Variable(0, trainable=False)
# schedule = tf.optimizers.schedules.PiecewiseConstantDecay(
#     [10000, 15000], [1e-0, 1e-1, 1e-2])
# # lr and wd can be a function or a tensor
# lr = 1e-1 * schedule(step)
# wd = lambda: 1e-4 * schedule(step)


# optimizer = tf.optimizers.AdamW(learning_rate=lr, weight_decay=wd)
# optimizer = tf.keras.optimizers.experimental.AdamW()

In [39]:
cosme_model.compile(optimizer, 
              # loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=from_logits),#, reduction=tf.keras.losses.Reduction.NONE), 
                    # loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),#, reduction=tf.keras.losses.Reduction.NONE), 
                    loss=tf.keras.losses.BinaryFocalCrossentropy(),
              # metrics=['sparse_categorical_accuracy'],
                    metrics=['categorical_accuracy'],
             )

In [40]:
validation_callback = KerasSequenceValidater(valid_dataset)

In [41]:
earlystopping_cb = tf.keras.callbacks.EarlyStopping(
                        monitor='val_loss',
                        patience=patience,
                        restore_best_weights=True
                    )

checkpoints_cb = tf.keras.callbacks.ModelCheckpoint(
    filepath=model_checkpoints_dir,
    save_weights_only=True,
    monitor='val_accuracy',
   )

tensorboard_cb = tf.keras.callbacks.TensorBoard(
    log_dir=tensorboard_dir,
    histogram_freq=1,
)

restore_cb = tf.keras.callbacks.BackupAndRestore(backup_dir=model_checkpoints_dir)

def lr_scheduler(epoch, lr):
    if epoch < num_warmup_epochs:
        return learn_rate * warmup_lr_multiplier
    else:
        return learn_rate #* (0.9**int(epoch / 100))
    
scheduler_cb = tf.keras.callbacks.LearningRateScheduler(lr_scheduler)

In [42]:
# cosme_model.summary()

In [43]:
history = cosme_model.fit(
    train_dataset,
    epochs=epochs,
    validation_data=valid_dataset,
    callbacks=[ earlystopping_cb, checkpoints_cb, tensorboard_cb, restore_cb, scheduler_cb]
)

Epoch 1/300


2022-05-26 01:31:34.967485: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8201
2022-05-26 01:31:35.641513: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-05-26 01:31:36.500614: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78/300
Epoch 7

In [44]:
cosme_model.save_weights(model_weights_dir)

In [45]:
cosme_model.evaluate(valid_dataset)



[0.0726083368062973, 0.39719802141189575]

In [46]:
cosme_model.evaluate(train_dataset)



[0.07215527445077896, 0.4098244607448578]

In [47]:
cosme_model.training = False

In [48]:
cosme_model.evaluate(valid_dataset)



[0.0726083368062973, 0.39719802141189575]

In [49]:
cosme_model.evaluate(train_dataset)



[0.07215527445077896, 0.4098244607448578]

In [50]:
# cosme_model.predict(valid_dataset).argmax(1)