# Interaction Map

## Load RNA Data

In [None]:
import tensorflow as tf
import os
cwd = os.getcwd()
os.chdir('../../src/')
from dataloader import OneHotEncoder, load_fasta_data, RNASeqDataGenerator
os.chdir('../experiments/InteractionMap/')

In [None]:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
config = tf.ConfigProto(log_device_placement=True)
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)

In [None]:
from tqdm import tqdm
be_type='ABE'
sample='156B'
be_class='ABEmax'

data_dir = "../../data/raw/{0}/{0}-sequence/".format(be_type)

train_chr = [str(x) for x in range(1, 20)]
test_chr = [str(x) for x in range(20, 23)] + ['X']

train_seqs, train_ers = [], []
for rep in tqdm(train_chr):
        seq, er = load_fasta_data(sample, rep, data_dir)
        train_seqs = seq+train_seqs
        train_ers = er+train_ers
        
test_seqs, test_ers = [], []
for rep in tqdm(test_chr):
        seq, er = load_fasta_data(sample, rep, data_dir)
        test_seqs = seq+test_seqs
        test_ers = er+test_ers

## Load Model

In [None]:
import tensorflow as tf
model = tf.keras.models.load_model('../CrossValidation/ABEmax/156BFold-1.h5', compile=False)

## Build Background Set

In [None]:
import shap
train_dat_gen = RNASeqDataGenerator(train_seqs, train_ers)

In [None]:
from tqdm import tqdm
import numpy as np
X_bg = list()
y_bg = list()
th = 0.01
inp = np.arange(len(train_dat_gen))
np.random.shuffle(inp)
for x in tqdm(inp):
    inp_data = train_dat_gen[x][0]
    gt = train_dat_gen[x][1]
    preds = model.predict(inp_data)
    for index in range(inp_data.shape[0]):
        if (preds[index]<th):
            X_bg.append(inp_data[index])
            y_bg.append(gt[index])
    if len(y_bg)>4096:
        break;
X_bg_set = np.array(X_bg)

In [None]:
shap_explainer = shap.DeepExplainer(model, X_bg_set)

# Build Testing Set

In [None]:
from tqdm import trange
import numpy as np
ohe = OneHotEncoder(101, {'A':0, 'C':1, 'G':2, 'T':3})
vals = list()

for x in trange(len(test_seqs)):
    if test_ers[x] < 0.10:
        continue
    pred = model.predict(np.expand_dims(ohe.encode(test_seqs[x]), axis=0))
    if np.abs(pred - test_ers[x])/(test_ers[x]) < 0.05:
        vals.append(x)
    if len(vals)>200:
        break;

In [None]:
len(vals)

In [None]:
model.predict(np.expand_dims(ohe.encode(test_seqs[vals[0]]), axis=0))

In [None]:
def mutate(seq):
    amt = 50
    start = 50-amt
    end = 50+amt
    ret = list()
    ret.append(seq)
    for x in range(start, end+1):
        if x == 50:
            continue
        for base in ['A', 'C', 'G', 'T']:
            ret.append(seq[0:x]+base+seq[x+1:])
    return ret

In [None]:
from tqdm import tqdm
ohe = OneHotEncoder(101, {'A':0, 'C':1, 'G':2, 'T':3})
result = list()
ct = 0
for i in tqdm(vals):
    reter = mutate(test_seqs[i])
    inp = np.stack([ohe.encode(reter[x]) for x in range(401)])
    res = shap_explainer.shap_values(inp)
    imp_scores = res[0] * inp
    delta = np.abs(np.stack([imp_scores[x] - imp_scores[0] for x in range(1, 401)]))
    delta = np.concatenate([delta[:, 0:50, :], delta[:, 51:101, :]], axis=1)
    result.append(np.stack([delta[x].flatten() for x in range(delta.shape[0])]))
    ct+=1

In [None]:
comb = np.stack(result)

In [None]:
mean = np.mean(comb, axis=0)

In [None]:
mean.shape

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

figure(figsize=(8, 6), dpi=1000)
plt.imshow(mean)
plt.colorbar()
plt.clim(0.0, 0.10)
plt.show()
plt.savefig('line_plot.pdf') 

In [None]:
from matplotlib.backends.backend_pdf import PdfPages
pdf = PdfPages('156BImportanceMap.pdf')
for x in comb:
    fig = plt.figure(figsize=(8, 6), dpi=1000)
    plt.imshow(x)
    plt.colorbar()
    plt.clim(0.0, 0.10)
    pdf.savefig(fig)
    
fig = plt.figure(figsize=(8, 6), dpi=1000)
plt.imshow(mean)
plt.colorbar()
plt.clim(0.0, 0.10)
pdf.savefig(fig)
pdf.close()    