In [1]:
import os
import numpy as np
import pandas as pd
from pyvis.network import Network
import seaborn as sns

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# set a working directory
wdir = "/Users/samibening/Projects/Bachelor/"
os.chdir( wdir )

f_adj = "SCENICfiles/adj.csv"

In [3]:
net = Network(notebook=True, select_menu=True, cdn_resources='in_line', height='750px', width='100%', bgcolor="#222222", font_color="white")
net.barnes_hut()

In [4]:
def clean_target_genes(row):
    return eval(row['TargetGenes'])

In [5]:
adjacencies = pd.read_csv(os.path.join(wdir, f_adj))
regulon = pd.read_csv(os.path.join(wdir, "SCENICfiles/reg.csv"))
regulon.apply(clean_target_genes, axis=1)
regulon.head()

Unnamed: 0,TF,MotifID,AUC,NES,MotifSimilarityQvalue,OrthologousIdentity,Annotation,Context,TargetGenes,RankAtMax
0,ARNT,taipale_tf_pairs__GCM1_MAX_NNCACGTGNNNNNNNNNNR...,0.08678,3.028261,7.55372e-06,1.0,gene is annotated for similar motif transfac_p...,"frozenset({'activating', 'hg38_10kbp_up_10kbp_...","[('SNAPC1', 0.3106342842966851), ('MCOLN1', 0....",287
1,ARNT,cisbp__M01719,0.098043,3.626383,0.0,0.918885,gene is orthologous to ENSMUSG00000015522 in M...,"frozenset({'activating', 'hg38_10kbp_up_10kbp_...","[('KIAA1328', 0.3970861519089653), ('FANCL', 0...",939
2,ATF3,jaspar__MA1951.1,0.056881,3.044702,4.40322e-09,1.0,gene is annotated for similar motif taipale_cy...,"frozenset({'activating', 'hg38_10kbp_up_10kbp_...","[('C17orf97', 0.3100105285841538), ('DACT2', 0...",4771
3,ATOH1,tfdimers__MD00001,0.102387,3.660276,7.46225e-06,0.888889,motif similar to jaspar__MA1467.2 ('Atoh1'; q-...,"frozenset({'activating', 'hg38_10kbp_up_10kbp_...","[('SENP8', 0.7902819247600439), ('SPDEF', 0.36...",1173
4,BCLAF1,taipale_tf_pairs__E2F1_ELK1_SGCGCNNNNNNNNNNCGG...,0.086991,3.065279,7.2019e-06,1.0,motif similar to transfac_pro__M04704 ('V$BCLA...,"frozenset({'activating', 'hg38_10kbp_up_10kbp_...","[('KTN1', 0.4799956278331701), ('PHF3', 0.5868...",4808


In [6]:
# find all regulons that have GOI (CASP8) in their target genes
def find_regulons(df, GOI):
    goi_regulons = regulon[regulon['TargetGenes'].str.contains(str(GOI+'\''))]
    return goi_regulons['TF'].values

def make_regulon_dataframe(TF):
    reg_df = pd.DataFrame()
    for i in regulon[regulon['TF'] == TF]['TargetGenes']:
        temp = eval(i)
        reg_df = pd.concat([reg_df, pd.DataFrame(temp)], axis=0)
    reg_df = reg_df.reset_index(drop=True)
    reg_df = reg_df.drop_duplicates()
    reg_df['TF'] = TF
    reg_df = reg_df.rename(columns={0: 'target', 1: 'importance', 2: 'TF'})
    reg_df = reg_df.sort_values(by='importance', ascending=False)
    reg_df = reg_df.head(50) # select top 10 'important' genes, threshold can be adjusted
    reg_df['group'] = str(TF + "_regulon")
    return reg_df

def make_adj_df(GOI):
    adj_interest = adjacencies[adjacencies['target'] == GOI]
    adj_interest = adj_interest.sort_values(by='importance', ascending=False)
    adj_interest = adj_interest.head(15) # select top 10 'important' TFs, threshold can be adjusted
    adj_interest['group'] = 'adjacencies'
    return adj_interest

In [7]:
def make_goi_grn(GOI):
    goi_regulons = find_regulons(regulon, GOI)
    goi_grn = pd.DataFrame()
    for i in goi_regulons:
        goi_grn = pd.concat([goi_grn, make_regulon_dataframe(i)], axis=0)
    goi_grn = pd.concat([goi_grn, make_adj_df(GOI)], axis=0)
    return goi_grn

In [8]:
goi_grn = make_goi_grn('CASP8')
goi_grn.drop_duplicates(subset=['importance', 'TF', 'group'], keep="first", inplace=True)
#goi_grn = goi_grn[goi_grn['target'] == 'CASP8']
goi_grn = goi_grn[goi_grn.duplicated(subset=['target'], keep=False)] # drop singletons
goi_grn

Unnamed: 0,target,importance,TF,group
1957,STK17A,1.944499,IKZF1,IKZF1_regulon
893,CYTIP,1.895823,IKZF1,IKZF1_regulon
38,FAM107B,1.71777,IKZF1,IKZF1_regulon
1547,FCMR,1.592036,IKZF1,IKZF1_regulon
124,RAC2,1.434774,IKZF1,IKZF1_regulon
2,PIK3R5,1.340695,IKZF1,IKZF1_regulon
187,BCL2L11,1.282193,IKZF1,IKZF1_regulon
94,AQP3,1.431182,IRF1,IRF1_regulon
871,PDE7A,0.908579,IRF1,IRF1_regulon
6,CCDC88C,0.998542,ATF4,ATF4_regulon


In [9]:
groups = {}
colors = sns.color_palette("Set2", len(goi_grn['group'].unique())).as_hex()
i = 0
for group in goi_grn['group'].unique():
    groups[group] = colors[i]
    i = i + 1

groups

{'IKZF1_regulon': '#66c2a5',
 'IRF1_regulon': '#fc8d62',
 'ATF4_regulon': '#8da0cb',
 'MSC_regulon': '#e78ac3',
 'IKZF3_regulon': '#a6d854',
 'TBP_regulon': '#ffd92f',
 'TFDP1_regulon': '#e5c494',
 'adjacencies': '#b3b3b3'}

In [10]:
sources = goi_grn['TF']
targets = goi_grn['target']
weights = goi_grn['importance']
group = goi_grn['group']

edge_data = zip(sources, targets, weights, group)

In [11]:
for e in edge_data:
                src = e[0]
                dst = e[1]
                w = e[2]

                net.add_node(src, src, title=src, color=groups[e[3]])
                net.add_node(dst, dst, title=dst, color=groups[e[3]])
                net.add_edge(src, dst, value=w, color=groups[e[3]])

neighbor_map = net.get_adj_list()

In [12]:
# add neighbor data to node hover data
for node in net.nodes:
                node["title"] = node['id']
                node["value"] = len(neighbor_map[node["id"]])
                node['label'] = node['id']

In [13]:
net.show_buttons(filter_=['physics'])
net.show("example.html")