In [None]:
import tensorflow as tf

In [None]:
# Make sure TensorFlow is BELOW 2.16! Otherwise the model won't be able to load.
print(tf.__version__)

In [None]:
# GPU info:
!nvidia-smi

In [None]:
# Assign GPU to use:
GPU_id = '7'
import os
os.environ["CUDA_VISIBLE_DEVICES"] = GPU_id

# check GPU:
gpu_devices = tf.config.list_physical_devices('GPU')
print(f"Num GPUs Available: {len(gpu_devices)}")

if gpu_devices:
    print("GPU working")
    for device in gpu_devices:
        print(f"Device name: {device.name}")
        print(f"Device type: {device.device_type}")
else:
    print("GPU not working")

In [None]:
import pandas as pd
L5_8mer_df = pd.read_table('/rd4/users/liangn/mywork/L5_2-8mer.tsv')
L5_8mer_df

In [None]:
# 8-mer mutagenesis:

# Function to generate a list of all possible mutations for a seq:
def all_possible_mutations(dna_seq):
    mutated_seqs = []
    for i in range(len(dna_seq)):
        for nucleotide in ["A", "T", "C", "G"]:
            if nucleotide != dna_seq[i]:
                mutated_seq = dna_seq[:i] + nucleotide + dna_seq[i+1:]
                mutated_seqs.append(mutated_seq)
    #
    return mutated_seqs


# functions to get a df of the regulatory relevance of each nucleotide of 8-mer:
import pandas as pd
import numpy as np
def kmer_saliency_df(kmer = 'GGTAAGTA', target = 'delta.log2expression'):
    # generate all point mutations:
    all_mutants = all_possible_mutations(kmer)
    # add the original seq to the mutant list:
    all_mutants.insert(0, kmer)
    # create empty df:
    kmer_values = pd.DataFrame(columns=['kmer', 'value'])
    # find values by each k-mer:
    for the_kmer in all_mutants:
        match = L5_8mer_df.loc[L5_8mer_df['kmer'] == the_kmer, target]
        new_row = pd.DataFrame({'kmer': [the_kmer], 'value': [match.values[0]]})
        kmer_values = pd.concat([kmer_values, new_row], ignore_index=True)
    # calculate delta:
    values = kmer_values['value'].values
    deltas = values[0] - values[1:]
    # median deltas of each original nucleotide/position:
    delta_medians = []
    for i in range(3, len(deltas)+1, 3):
        median = np.median(deltas[i-3:i])
        delta_medians.append(median)
    # form the final data.frame suitable for logomaker:
    position_list = list(kmer)
    final_df = pd.DataFrame(columns=['A', 'C', 'G', 'T'])
    for i, letter in enumerate(position_list):
        final_df.at[i, letter] = delta_medians[i]
    final_df = final_df.fillna(0)
    final_df = final_df.astype('float64')
    # change row index to 1,2,3...:
    final_df.index = range(1, len(final_df) + 1)
    #
    return final_df


# function to plot nucleotide saliencies:
import logomaker
import matplotlib.pyplot as plt
def plot_saliency(df, negative=False,
                  start=None, end=None, figsize=[8,2],
                  xticks=False, yticks=False,
                  spines=False, ylim=None):
    # make Figure and Axes objects:
    fig, ax = plt.subplots(1,1,figsize=figsize)
    # limit x range, if defined:
    if start is not None and end is not None:
        df = df[start : end+1]
    elif start is not None:
        df = df[start : ]
    elif end is not None:
        df = df[ : end+1]
    # flip saliencies if defined:
    if negative == True:
        df = -df
    #
    logo = logomaker.Logo(df, ax=ax)
    #
    if ylim is not None:
        logo.ax.set_ylim(ylim)
    #
    if spines==False:
        logo.style_spines(visible=False)
    #
    if xticks==False:
        ax.set_xticks([])
    #
    if yticks==False:
        ax.set_yticks([])
    #
    return logo.fig.tight_layout()

In [None]:
# define the k-mer:
kmer = 'AA'

# check expression:
target = 'delta.log2expression'
negative = False

# run:
the_kmer_saliency_df = kmer_saliency_df(kmer=kmer, target=target)
# convert negative values:
if negative == True:
    the_kmer_saliency_df = -the_kmer_saliency_df
#
the_kmer_saliency_df

In [None]:
plot_saliency(df=the_kmer_saliency_df, figsize=[3,2])

In [None]:
# save:
the_kmer_saliency_df.to_csv("/rd4/users/liangn/mywork/ACCCAGAA.csv", index=True)

In [None]:
# check export:
target = 'delta.log2export'
negative = True

# run:
the_kmer_saliency_df = kmer_saliency_df(kmer=kmer, target=target)
# convert negative values:
if negative == True:
    the_kmer_saliency_df = -the_kmer_saliency_df
#
the_kmer_saliency_df

In [None]:
plot_saliency(df=the_kmer_saliency_df, figsize=[3,2])

In [None]:
# save:
the_kmer_saliency_df.to_csv("/rd4/users/liangn/mywork/AACCACGT.csv", index=True)