# Tutorial to perform TFModisco and create a report #

In [1]:
%cd ../..

/data/projects/c02/sander.thierens


In [2]:
%load_ext autoreload
%autoreload 2

import sys
import os

from Bio import SeqIO
from evoaug_tf import evoaug, augment
from modiscolite import tfmodisco, report
from DeepDifE.src.deepExplain_tool import getDeepExplainerBackground, deepExplain, plotResults
from DeepDifE.src.diff_expression_model import load_model, get_siamese_model
import numpy as np
import importlib
import pickle
import modiscolite


2025-01-31 10:58:11.664822: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-01-31 10:58:11.789694: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2025-01-31 10:58:11.789727: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2025-01-31 10:58:14.538271: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2025-

Retrieve the siamese model based on the weight of Helder

In [3]:

model_path = "DeepDifE/data/model_single.json"
weights_path = "DeepDifE/data/weights_single.h5"

augment_list = [
	augment.RandomRC(rc_prob=0.5),
	augment.RandomInsertionBatch(insert_min=0, insert_max=20),
	augment.RandomDeletion(delete_min=0, delete_max=30),
	augment.RandomTranslocationBatch(shift_min=0, shift_max=20),
	augment.RandomMutation(mutate_frac=0.05),
	augment.RandomNoise()
]

model = load_model(model_path, weights_path)
siamese_model = get_siamese_model(model)


2025-01-31 10:58:27.116509: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2025-01-31 10:58:27.116541: W tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: UNKNOWN ERROR (303)
2025-01-31 10:58:27.116576: I tensorflow/compiler/xla/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (p-vibcompute-1.vib.local): /proc/driver/nvidia/version does not exist
2025-01-31 10:58:27.117002: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.



Loaded model from disk


Generate a background based on N random samples. In our case we start from a pickle containing 100 one-hot-encoded sequences with both the forward and reverse strand.

The numpy array passed to getDeepExplainerBackground should have the following shape (2, #samples, #sequence length, 4)

In [4]:
bg_path = "DeepDifE/data/background_100samples.pkl"
with open(bg_path, 'rb') as f:
    background_data = pickle.load(f)

# Conver to numpy
np_bg = np.array(background_data)

# Remove axis of dimension 1
np_bg = np.squeeze(np_bg)

# Convert to float 64, to be compatible with deepExplain
np_bg = np.float64(np_bg)

background = getDeepExplainerBackground(background_samples=np_bg, shuffle=False, post_hoc_conjoining=True)

Retrieve the sequences, to speed up the SeqIO parsing you could chunk larger fasta files

In [6]:
sequence_list = []
gene_id_list = []

for chunk in range(1,3):
	fasta_path = f"DeepDifE/data/fasta/example.fasta_c{chunk}"
	fasta_sequences = SeqIO.parse(open(fasta_path),'fasta')
	for index, fasta in enumerate(fasta_sequences):
		if index > 100: break
		name, sequence = fasta.id, str(fasta.seq)
		gene_id_list.append(name)
		sequence_list.append(sequence)

One hot encode the sequences and creat ethe reverse compliment

In [7]:
ohe_list = []
ohe_rc_list = []
for sequence in sequence_list:
	ohe_seq = []
	ohe_rc_seq = []
	for i in sequence:
		if i == 'A':
			ohe_seq.append([1., 0., 0., 0.])
			ohe_rc_seq.append([0., 0., 0., 1.])
		elif i == 'C':
			ohe_seq.append([0., 1., 0., 0.])
			ohe_rc_seq.append([0., 0., 1., 0.])
		elif i == 'G':
			ohe_seq.append([0., 0., 1., 0.])
			ohe_rc_seq.append([0., 1., 0., 0.])
		elif i == 'T':
			ohe_seq.append([0., 0., 0., 1.])
			ohe_rc_seq.append([1., 0., 0., 0.])
		else:
			ohe_seq.append([0., 0., 0., 0.])
			ohe_rc_seq.append([0., 0., 0., 0.])
	ohe_list.append(ohe_seq)
	ohe_rc_list.append(list(reversed(ohe_rc_seq)))
ohe_np = np.array(ohe_list)
ohe_rc_np = np.array(ohe_rc_list)

np_deepexplain_samples = np.stack((ohe_np, ohe_rc_np))

In [8]:
shap_values = deepExplain(samples=np_deepexplain_samples,
                          loaded_model=siamese_model, 
                          bg=background, 
                          post_hoc_conjoining=True, 
                          augment_list=augment_list, 
                          pad_background=True, 
                          pad_samples=False,
                          evo_aug=True)

keras is no longer supported, please use tf.keras instead.
Your TensorFlow version is newer than 2.4.0 and so graph support has been removed in eager mode and some static graphs may not be supported. See PR #1483 for discussion.
`tf.keras.backend.set_learning_phase` is deprecated and will be removed after 2020-10-11. To update it, simply pass a True/False value to the `training` argument of the `__call__` method of your layer or model.


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


In [9]:
np.array(shap_values).shape

(202, 2, 620, 4)

In [10]:
shap_values_forward = np.array(shap_values)[:,0,:,:]
pos_patterns, neg_patterns = tfmodisco.TFMoDISco(
                                    min_metacluster_size = 2,
                                    hypothetical_contribs=shap_values_forward, 
                                    one_hot=np_deepexplain_samples[0],
                                    verbose=True
                                )
pos_patterns


Using 630 positive seqlets
Extracted 720 negative seqlets


[<modiscolite.core.SeqletSet at 0x7efca5638e20>,
 <modiscolite.core.SeqletSet at 0x7efca55c3d90>,
 <modiscolite.core.SeqletSet at 0x7efca7f98a00>,
 <modiscolite.core.SeqletSet at 0x7efca5cf2f70>,
 <modiscolite.core.SeqletSet at 0x7efca55c3ee0>,
 <modiscolite.core.SeqletSet at 0x7efca593bf40>,
 <modiscolite.core.SeqletSet at 0x7efca477a310>,
 <modiscolite.core.SeqletSet at 0x7efca477a5e0>,
 <modiscolite.core.SeqletSet at 0x7efca477a280>,
 <modiscolite.core.SeqletSet at 0x7efca58e7760>,
 <modiscolite.core.SeqletSet at 0x7efca55ae730>]

In [11]:
neg_patterns

[<modiscolite.core.SeqletSet at 0x7efca55d68e0>,
 <modiscolite.core.SeqletSet at 0x7efca4786c10>,
 <modiscolite.core.SeqletSet at 0x7efca56389d0>,
 <modiscolite.core.SeqletSet at 0x7efca4786880>,
 <modiscolite.core.SeqletSet at 0x7efcc0096be0>,
 <modiscolite.core.SeqletSet at 0x7efca582f520>,
 <modiscolite.core.SeqletSet at 0x7efca582f4f0>,
 <modiscolite.core.SeqletSet at 0x7efca56e7700>]

In [12]:
h5_result_path = "DeepDifE/results/modisco_output.h5"

modiscolite.io.save_hdf5(h5_result_path, pos_patterns, neg_patterns, 3)

In [22]:
! module load MEME

In [24]:
output_dir = "DeepDifE/results/TFModisco"
if not os.path.isdir(output_dir):
    os.mkdir(output_dir)

report.report_motifs(modisco_h5py = h5_result_path,
                    output_dir = output_dir,
                    img_path_suffix = h5_result_path,
                    meme_motif_db = "DeepDifE/data/jaspar_cisbp_other_source_no_high_similarity_tfbs_annot.meme",
                    is_writing_tomtom_matrix = True
                    )

ValueError: `tomtom` executable could not be called globally or locally. Please install it and try again. You may install it using conda with `conda install -c bioconda meme`