In [1]:
import sys
sys.path.append('..')

from scripts.global_funcs import load_data_config, load_model_config, get_num_of_classes
from glob import glob
import os



import nvtabular as nvt
import dask_cudf

from nvtabular.utils import device_mem_size

from dask_cuda import LocalCUDACluster
from dask.distributed import Client

In [2]:
model_config = load_model_config()
model_config

{'preblock_filters': 64,
 'preblock_kernel_sizes': [3, 5, 7, 9],
 'preblock_pool_size': 2,
 'idblock_kernel_sizes': [3, 5, 7, 9],
 'idblock_filters': [64, 128, 256, 512],
 'idblock_activation': 'gelu',
 'idblock_avg_pool_size': 2,
 'last_activation': 'softmax',
 'batch_size': 8192,
 'learn_rate': 0.0001,
 'epochs': 28,
 'patience': 10,
 'num_warmup_epochs': 3,
 'warmup_lr_multiplier': 0.01,
 'TF_MEMORY_ALLOCATION': '0.7',
 'TF_VISIBLE_DEVICE': '0',
 'TF_FORCE_GPU_ALLOW_GROWTH': 'true',
 'device_spill_frac': 0.2,
 'protocol': 'ucx',
 'visible_devices': '0',
 'enable_tcp_over_ucx': True,
 'enable_nvlink': False,
 'enable_infiniband': False,
 'rmm_pool_size': '1GB',
 'clear_models_dirs': True,
 'clear_tensorboard': True}

In [3]:
preblock_filters = model_config['preblock_filters']
preblock_kernel_sizes = model_config['preblock_kernel_sizes']
preblock_pool_size = model_config['preblock_pool_size']
idblock_kernel_sizes = model_config['idblock_kernel_sizes']
idblock_filters = model_config['idblock_filters']
idblock_activation = model_config['idblock_activation']
idblock_avg_pool_size = model_config['idblock_avg_pool_size']
last_activation = model_config['last_activation']
batch_size = model_config['batch_size']
# batch_size = 1
learn_rate = model_config['learn_rate']
epochs = model_config['epochs']
patience = model_config['patience']

In [4]:
os.environ["TF_MEMORY_ALLOCATION"] = model_config['TF_MEMORY_ALLOCATION']  # fraction of free memory
os.environ["TF_VISIBLE_DEVICE"] = model_config['TF_VISIBLE_DEVICE'] 
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = model_config['TF_FORCE_GPU_ALLOW_GROWTH']

In [5]:
import tensorflow as tf
from scripts.cosme_model import COSMELayer
from nvtabular.loader.tensorflow import KerasSequenceLoader, KerasSequenceValidater

2022-05-23 15:22:44.402391: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-23 15:22:44.402669: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-23 15:22:44.402774: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [6]:
config = load_data_config()
config

{'clean_fasta_file': '/media/jcosme/Data/MarRef_parquet_10_cats',
 'output_dir': '/media/jcosme/Data',
 'project_name': 'MarRef_parquet_10_cats',
 'base_col_names': ['seq', 'label'],
 'label_col_name': 'label',
 'input_col_name': 'seq',
 'label_regex': '(?:[^a-zA-Z0-9]+)([a-zA-Z]+[0-9]+)(?:[^a-zA-Z0-9]+)',
 'k_mer': 1,
 'possible_gene_values': ['A', 'C', 'G', 'T'],
 'max_seq_len': 150,
 'data_splits': {'train': 0.9, 'val': 0.05, 'test': 0.05},
 'random_seed': 42,
 'fasta_sep': '>',
 'unq_labs_dir': '/media/jcosme/Data/MarRef_parquet_10_cats/data/unq_labels',
 'unq_labs_dir_csv': '/media/jcosme/Data/MarRef_parquet_10_cats/data/unq_labels.csv',
 'data_dir': '/media/jcosme/Data/MarRef_parquet_10_cats/data/MarRef_parquet_10_cats',
 'nvtab_dir': '/media/jcosme/Data/MarRef_parquet_10_cats/nvtab',
 'dask_dir': '/media/jcosme/Data/MarRef_parquet_10_cats/dask',
 'tensorboard_dir': '/media/jcosme/Data/MarRef_parquet_10_cats/tensorboard',
 'model_checkpoints_dir': '/media/jcosme/Data/MarRef_parqu

In [7]:
nvtab_dir = config['nvtab_dir']
label_col_name = config['label_col_name']
input_col_name = config['input_col_name']
dask_dir = config['dask_dir']
tensorboard_dir = config['tensorboard_dir']
model_checkpoints_dir = config['model_checkpoints_dir']
model_weights_dir = config['model_weights_dir']
max_seq_len = config['max_seq_len']
possible_gene_values = config['possible_gene_values']

In [8]:
n_classes = get_num_of_classes()

In [9]:
cluster = LocalCUDACluster(
    protocol=model_config['protocol'],
    CUDA_VISIBLE_DEVICES=model_config['visible_devices'],
    device_memory_limit=device_mem_size(kind="total") * model_config['device_spill_frac'],
    enable_tcp_over_ucx=model_config['enable_tcp_over_ucx'],
    enable_nvlink=model_config['enable_nvlink'],
    enable_infiniband=model_config['enable_infiniband'],
    # rmm_pool_size=model_config['rmm_pool_size'],
    local_directory=dask_dir,
)

client = Client(cluster)

# def _rmm_pool():
#     rmm.reinitialize(
#         pool_allocator=True,
#         initial_pool_size=None,  # Use default size
#     )
    
# client.run(_rmm_pool)
client

0,1
Client  Scheduler: ucx://127.0.0.1:56309  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 1  Cores: 1  Memory: 31.21 GiB


In [10]:
TEST_PATHS = sorted(glob(f"{nvtab_dir}/test/*.parquet"))
VAL_PATHS = sorted(glob(f"{nvtab_dir}/val/*.parquet"))

In [11]:
valid_dataset = KerasSequenceLoader(
    nvt.Dataset(VAL_PATHS, part_size="10MB"),   # you could also use a glob pattern
    batch_size=batch_size,
    label_names=[label_col_name],
    shuffle=False,
    buffer_size=0.001,  # amount of data, as a fraction of GPU memory, to load at once,
    device=0,
    parts_per_chunk=1,
    engine="parquet",
)

test_dataset = KerasSequenceLoader(
    nvt.Dataset(TEST_PATHS, part_size="10MB"),   # you could also use a glob pattern
    batch_size=batch_size,
    label_names=[label_col_name],
    shuffle=False,
    buffer_size=0.001,  # amount of data, as a fraction of GPU memory, to load at once,
    device=0,
    parts_per_chunk=1,
    engine="parquet",
)

In [12]:
inputs = {}
inputs[input_col_name] = \
    (tf.keras.Input(name=f"{input_col_name}__values", dtype=tf.int64, shape=(1,)),
     tf.keras.Input(name=f"{input_col_name}__nnzs", dtype=tf.int32, shape=(1,)))

inputs2 = tf.keras.layers.Lambda(lambda x: x['seq'][0])(inputs)
throw_way = tf.keras.layers.Lambda(lambda x: x['seq'][1])(inputs)
shape = [tf.shape(throw_way)[k] for k in range(2)]
inputs2 = tf.reshape(inputs2, [shape[0], 150])
inputs2 = tf.cast(inputs2, tf.float32)
inputs2 = tf.expand_dims(inputs2, 0)
inputs2 = tf.reshape(inputs2, [shape[0], 1, max_seq_len])
inputs2 = tf.math.multiply(inputs2, 1/len(possible_gene_values))

2022-05-23 15:22:47.294886: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-05-23 15:22:47.295529: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-23 15:22:47.295796: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-23 15:22:47.295891: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

In [13]:
cosme_layer = COSMELayer(
                preblock_filters=preblock_filters,
                preblock_kernel_sizes=preblock_kernel_sizes,
                preblock_pool_size=preblock_pool_size,
                idblock_kernel_sizes=idblock_kernel_sizes,
                idblock_filters = idblock_filters,
                idblock_activation=idblock_activation,
                idblock_avg_pool_size=idblock_avg_pool_size,
                last_activation=last_activation,
                n_classes=n_classes,
                )
output_layer = cosme_layer(inputs2)

In [14]:
cosme_model = tf.keras.Model(inputs=inputs, outputs=output_layer)
optimizer = tf.keras.optimizers.Adam(learning_rate=learn_rate, amsgrad=True)
cosme_model.compile(optimizer, 
              tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), 
              metrics=['sparse_categorical_accuracy'],
             )

In [15]:
cosme_model.load_weights(model_weights_dir)

In [16]:
cosme_model.trainable = False

In [17]:
cosme_model.evaluate(test_dataset)

2022-05-23 15:22:55.022838: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8201
2022-05-23 15:22:55.630606: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory




2022-05-23 15:22:57.062114: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


[2.166696071624756, 0.23971128463745117]

In [18]:
cosme_model.evaluate(valid_dataset)



[2.1800763607025146, 0.24940048158168793]