In [1]:
import re
import json
import numpy as np
import pandas as pd
import tensorflow as tf
from baskerville import seqnn
from baskerville import layers
from baskerville import transfer_helper

2024-06-17 16:32:27.975305: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# test if trainable param match expectation
def test_add_ia3(model_final):

    # expected trainable
    params_added = 0
    for l in model_final.layers:
        if re.match('multihead_attention', l.name): # kv layers
            params_added += transfer_helper.param_count(l._k_layer._ia3_layer)
            params_added += transfer_helper.param_count(l._v_layer._ia3_layer)
        elif re.match('dense', l.name) and l.input_shape[-1]==1536: # ff layers
            params_added += transfer_helper.param_count(l._ia3_layer)
    
    params_head = transfer_helper.param_count(model_final.layers[-2])
    print('expect params (ia3): %d'%params_added)
    print('expect params (head): %d' % params_head)
    print('expect params (total): %d' % (params_head + params_added))

    # observed trainable
    c1 = transfer_helper.param_count(model_final, 'trainable')
    print('trainable count: %d' %c1)
    
    assert c1==(params_head+params_added)
    print("assert passed. trainable params match expectation.")    

# test at initialization, output is the same
def test_add_ia3_2(model_final):
    random_input = np.random.rand(1, model_final.input_shape[-2], model_final.input_shape[-1])
    output_original = seqnn_model.model(random_input).numpy()
    output_ia3 = model_final(random_input).numpy()
    
    assert np.allclose(output_original, output_ia3)
    print("assert passed. at initialization, ia3 output same as pre-train.")

In [3]:
test_data_dir = '/home/yuanh/programs/source/python_packages/baskerville/tests/data/transfer'
params_file = '%s/params.json' %test_data_dir
targets_file = '%s/targets.txt' %test_data_dir

In [4]:
###################
# pre-train model #
###################
targets_df = pd.read_csv(targets_file, index_col=0, sep="\t")
with open(params_file) as params_open:
    params = json.load(params_open)
params_model = params["model"]
params_train = params["train"]
params_model['verbose'] = False

# set strand pairs
if "strand_pair" in targets_df.columns:
    params_model["strand_pair"] = [np.array(targets_df.strand_pair)]

seqnn_model = seqnn.SeqNN(params_model)
strand_pair = np.array(targets_df.strand_pair)

#############
# ia3 model #
#############
model_final = transfer_helper.add_ia3(seqnn_model.model, strand_pair)

2024-06-17 16:32:52.777625: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22807 MB memory:  -> device: 0, name: NVIDIA TITAN RTX, pci bus id: 0000:1a:00.0, compute capability: 7.5


params added/unfrozen by ia3: 20480


In [5]:
test_add_ia3(model_final)
test_add_ia3_2(model_final)

expect params (ia3): 20480
expect params (head): 52292
expect params (total): 72772
trainable count: 72772
assert passed. trainable params match expectation.


2024-06-17 16:33:00.103821: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8600


assert passed. at initialization, ia3 same as pre-train.
