In [1]:
import os
import numpy as np
import pandas as pd
from pyvis.network import Network
import seaborn as sns

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# set GOI

GOI = 'CASP8'

In [3]:
# set a working directory
wdir = "/Users/samibening/Projects/Bachelor/"
os.chdir( wdir )

f_adj = "SCENICfiles/adj.csv"

In [4]:
net = Network(notebook=True, select_menu=True, cdn_resources='remote', height='750px', width='100%', bgcolor="#222222", font_color="white")
net.barnes_hut(gravity=-4000, central_gravity=1)

In [5]:
def clean_target_genes(row):
    return eval(row['TargetGenes'])

In [6]:
adjacencies = pd.read_csv(os.path.join(wdir, f_adj))
regulon = pd.read_csv(os.path.join(wdir, "SCENICfiles/reg.csv"))
regulon.apply(clean_target_genes, axis=1)

0       [(SNAPC1, 0.3106342842966851), (MCOLN1, 0.5420...
1       [(KIAA1328, 0.3970861519089653), (FANCL, 0.461...
2       [(C17orf97, 0.3100105285841538), (DACT2, 0.425...
3       [(SENP8, 0.7902819247600439), (SPDEF, 0.367617...
4       [(KTN1, 0.4799956278331701), (PHF3, 0.58684373...
                              ...                        
3367    [(TRIM26, 0.1201918626925179), (PGM5, 0.232152...
3368    [(RNF146, 0.2550295040430109), (COL16A1, 0.382...
3369    [(SLCO2B1, 0.3591370907224389), (NRXN2, 0.1067...
3370    [(ETV3, 0.103677774272525), (FAAP100, 0.473556...
3371    [(ZFYVE27, 0.1691535174831882), (OSER1, 0.0855...
Length: 3372, dtype: object

In [7]:
# find all regulons that have GOI (CASP8) in their target genes
def find_regulons(df, GOI):
    goi_regulons = regulon[regulon['TargetGenes'].str.contains(str(GOI+'\''))]
    return goi_regulons['TF'].values

def make_regulon_dataframe(TF):
    reg_df = pd.DataFrame()
    for i in regulon[regulon['TF'] == TF]['TargetGenes']:
        temp = eval(i)
        reg_df = pd.concat([reg_df, pd.DataFrame(temp)], axis=0)
    reg_df = reg_df.reset_index(drop=True)
    reg_df = reg_df.drop_duplicates()
    reg_df['TF'] = TF
    reg_df = reg_df.rename(columns={0: 'target', 1: 'importance', 2: 'TF'})
    reg_df = reg_df.sort_values(by='importance', ascending=False)
    reg_df = reg_df.head(100) # keep top 100ish here? supported by 
    reg_df['group'] = str(TF + "_regulon")
    return reg_df

def make_adj_df(GOI):
    adj_interest = adjacencies[adjacencies['target'] == GOI]
    adj_interest = adj_interest.sort_values(by='importance', ascending=False)
    adj_interest = adj_interest.head(15) # select top 10 'important' TFs, threshold can be adjusted
    adj_interest['group'] = 'adjacencies'
    return adj_interest

In [8]:
def make_goi_grn(GOI):
    goi_regulons = find_regulons(regulon, GOI)
    goi_grn = pd.DataFrame()
    for i in goi_regulons:
        goi_grn = pd.concat([goi_grn, make_regulon_dataframe(i)], axis=0)
    #goi_grn = pd.concat([goi_grn, make_adj_df(GOI)], axis=0)
    return goi_grn

In [9]:
goi_grn = make_goi_grn(GOI)
goi_grn.drop_duplicates(subset=['importance', 'TF', 'group'], keep="first", inplace=True)
goi_grn = goi_grn.sort_values(by='importance', ascending=False)
goi_grn = goi_grn[goi_grn.duplicated(subset=['target'], keep=False)] # drop singletons
len(goi_grn)

80

In [None]:
# TODO: visualize distribution of importance scores

In [10]:
goi_grn[goi_grn['target'] == GOI]

Unnamed: 0,target,importance,TF,group
23,CASP8,0.534655,MSC,MSC_regulon
131,CASP8,0.374171,IKZF3,IKZF3_regulon
60,CASP8,0.230572,TBP,TBP_regulon


In [11]:
groups = {}
colors = sns.color_palette("Set2", len(goi_grn['group'].unique())).as_hex()
i = 0
for group in goi_grn['group'].unique():
    groups[group] = colors[i]
    i = i + 1

groups

{'MSC_regulon': '#66c2a5',
 'IKZF3_regulon': '#fc8d62',
 'IKZF1_regulon': '#8da0cb',
 'IRF1_regulon': '#e78ac3',
 'TFDP1_regulon': '#a6d854',
 'ATF4_regulon': '#ffd92f',
 'TBP_regulon': '#e5c494'}

In [12]:
sources = goi_grn['TF']
targets = goi_grn['target']
weights = goi_grn['importance']
group = goi_grn['group']

edge_data = zip(sources, targets, weights, group)

In [13]:
for e in edge_data:
                src = e[0]
                dst = e[1]
                w = e[2]

                net.add_node(src, src, title=src, color=groups[e[3]])
                net.add_node(dst, dst, title=dst, color=groups[e[3]])
                net.add_edge(src, dst, value=w, color=groups[e[3]])

neighbor_map = net.get_adj_list()

In [14]:
# add neighbor data to node hover data
for node in net.nodes:
                node["title"] = node['id']
                node["value"] = len(neighbor_map[node["id"]])
                node['label'] = node['id']

In [15]:
net.show_buttons(filter_=['physics'])
net.show("src/casp8_network.html")