## Automate cytoscape network visualization with Python

We first activate the enviroment where we installed `py4cytoscape`

```
conda create --name cytoscape
conda activate cytoscape
conda install pip
pip install py4cytoscape
conda install ipykernel
```

Then, start Cytoscape and run the code below.

In [3]:
import numpy as np
import pandas as pd

import py4cytoscape as p4c
p4c.cytoscape_ping()
p4c.cytoscape_version_info()

You are connected to Cytoscape!


{'apiVersion': 'v1',
 'cytoscapeVersion': '3.10.3',
 'automationAPIVersion': '1.11.0',
 'py4cytoscapeVersion': '1.11.0'}

Load TFs annotation

In [4]:
tfs_dat = pd.read_csv('annotation/Nematostella_DToL_TFs_FINAL.tsv', sep='\t')
tfs_dat

Unnamed: 0,gene,gene_name,common_name,og,pfam
0,Nvec_vc1.1_XM_048733866.1,Aff2/Aff4/Aff3/Aff1,,AF-4.HG1.0:AFF1/AFF2/AFF3/AFF4,AF-4/AF-4_C
1,Nvec_vc1.1_XM_001633894.3,Tfap2d/Tfap2c/Tfap2e/Tfap2b/Tfap2a,,AP-2.HG1.0:TFAP2A/TFAP2B/TFAP2C/TFAP2D/TFAP2E,TF_AP-2
2,Nvec_vc1.1_XM_048731339.1,,,AP-2.HG1.3:like:TFAP2A/TFAP2B/TFAP2C/TFAP2D/TF...,TF_AP-2
3,Nvec_vc1.1_XM_001630131.3,Arid4a/Arid4b,,ARID_BRIGHT.HG1.2:ARID4A/ARID4B,RBB1NT/ARID/Tudor-knot
4,Nvec_vc1.1_XM_048722594.1,Arid2,,ARID_BRIGHT.HG1.3:ARID2,ARID/RFX_DNA_binding
...,...,...,...,...,...
649,Nvec_vc1.1_XM_032372078.2,,,zf-C4_Nuclear_receptors.HG1.34:like:NR2E1/NR2E3,zf-C4/Hormone_recep
650,Nvec_vc1.1_XM_032371461.2,Nr6a1,,zf-C4_Nuclear_receptors.HG1.4:AR/NR3C1/NR3C2/N...,zf-C4/Hormone_recep
651,Nvec_vc1.1_XM_001630336.3,,,zf-C4_Nuclear_receptors.HG1.40:NR2E1,zf-C4/Hormone_recep
652,Nvec_vc1.1_XM_032363271.2,Hnf4g/Hnf4a,,zf-C4_Nuclear_receptors.HG1.8:HNF4A/HNF4G,zf-C4/Hormone_recep


Load GRN table

In [5]:
grn_dir = 'results/GRN/networks/'
grn_dat = pd.read_csv(
    grn_dir + 'grn_genes_expression_fc_0.4_accessibility_access_0.4_chromvar_5.tsv.gz',
    sep = '\t'
)
grn_dat = grn_dat.loc[~grn_dat['target_gene'].isna(), :]

Cell types

In [6]:
cell_types = grn_dat['cell_type'].unique()
cell_types

array(['cnidocyte', 'cnidocyte_gastrula', 'ecto_pharynx', 'ectoderm',
       'ecto_aboral', 'EMS', 'EMS_ecto_boundary',
       'gastro_circular_muscle_1', 'gastro_circular_muscle_2',
       'gastro_parietal_muscle', 'gastro_IRF1_2', 'gastro_somatic_gonad',
       'muscle_tentacle_retractor', 'muscle_mesentery_retractor',
       'digestive_filaments_1', 'digestive_filaments_2',
       'digestive_filaments_3', 'epidermis_1', 'epidermis_2',
       'precursors_PGC', 'precursors_endoNPC', 'precursors_NPC', 'NPC_1',
       'NPC_2', 'neuron_GATA_Islet_1', 'neuron_GATA_Islet_2',
       'neuron_Pou4_FoxL2_1', 'neuron_Pou4_FoxL2_2',
       'neuron_Pou4_FoxL2_3', 'neuronal', 'gland', 'gland_mucin'],
      dtype=object)

For every cell type, we filter GRN data by gene expression, and generate networks.

In [15]:
grn_cts = grn_dat[grn_dat['cell_type'] == ct]
grn_cts

Unnamed: 0,gene,cell_type,stage,gene_name,common_name,og,pfam,expression_fc,expression_umifrac,gene_score,...,target_accessibility,id,motif_score,peak_tf_correlation_score,in_silico_chip_score,exp_thrs,acc_thrs,target_self,target_TF,target_active_TF
0,Nvec_vc1.1_XM_032387167.2,cnidocyte,adult,,FoxA/D?,Forkhead.HG1.1:FOXA1/FOXA2/FOXA3/FOXB1/FOXB2/F...,Forkhead,1.327223,0.030066,4.971002,...,0.598411,Nvec_vc1.1_XM_032387167.2__ARCH1102_Foxa3/Foxa...,1.00,0.17,0.12,0.983757,0.065008,True,False,False
1,Nvec_vc1.1_XM_032387167.2,cnidocyte,adult,,FoxA/D?,Forkhead.HG1.1:FOXA1/FOXA2/FOXA3/FOXB1/FOXB2/F...,Forkhead,1.327223,0.030066,4.971002,...,1.068735,Nvec_vc1.1_XM_032387167.2__ARCH1102_Foxa3/Foxa...,1.00,0.26,0.18,0.983757,0.065008,True,False,False
2,Nvec_vc1.1_XM_032387167.2,cnidocyte,adult,,FoxA/D?,Forkhead.HG1.1:FOXA1/FOXA2/FOXA3/FOXB1/FOXB2/F...,Forkhead,1.327223,0.030066,4.971002,...,1.769140,Nvec_vc1.1_XM_032387167.2__ARCH1102_Foxa3/Foxa...,1.00,0.23,0.17,0.983757,0.065008,True,False,False
3,Nvec_vc1.1_XM_032387167.2,cnidocyte,adult,,FoxA/D?,Forkhead.HG1.1:FOXA1/FOXA2/FOXA3/FOXB1/FOXB2/F...,Forkhead,1.327223,0.030066,4.971002,...,0.338176,Nvec_vc1.1_XM_032387167.2__ARCH1102_Foxa3/Foxa...,1.00,0.18,0.12,0.983757,0.065008,True,True,False
4,Nvec_vc1.1_XM_032387167.2,cnidocyte,adult,,FoxA/D?,Forkhead.HG1.1:FOXA1/FOXA2/FOXA3/FOXB1/FOXB2/F...,Forkhead,1.327223,0.030066,4.971002,...,0.406546,Nvec_vc1.1_XM_032387167.2__ARCH1102_Foxa3/Foxa...,1.00,0.21,0.14,0.983757,0.065008,True,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5452,Nvec_vc1.1_XM_001631093.3,cnidocyte,adult,Atf3/Jdp2,,bZIP.HG1.17:ATF3/JDP2,bZIP_1,2.686165,0.263078,3.937506,...,4.036893,Nvec_vc1.1_XM_001631093.3__ARCH947_Creb3l2/Cre...,0.46,0.67,0.25,0.983757,0.065008,False,False,False
5453,Nvec_vc1.1_XM_001631093.3,cnidocyte,adult,Atf3/Jdp2,,bZIP.HG1.17:ATF3/JDP2,bZIP_1,2.686165,0.263078,3.937506,...,5.771508,Nvec_vc1.1_XM_001631093.3__ARCH947_Creb3l2/Cre...,0.63,0.66,0.34,0.983757,0.065008,False,False,False
5454,Nvec_vc1.1_XM_001631093.3,cnidocyte,adult,Atf3/Jdp2,,bZIP.HG1.17:ATF3/JDP2,bZIP_1,2.686165,0.263078,3.937506,...,0.673730,Nvec_vc1.1_XM_001631093.3__ARCH947_Creb3l2/Cre...,0.51,0.51,0.17,0.983757,0.065008,False,False,False
5455,Nvec_vc1.1_XM_001631093.3,cnidocyte,adult,Atf3/Jdp2,,bZIP.HG1.17:ATF3/JDP2,bZIP_1,2.686165,0.263078,3.937506,...,1.163025,Nvec_vc1.1_XM_001631093.3__ARCH947_Creb3l2/Cre...,0.54,0.56,0.21,0.983757,0.065008,False,False,False


In [None]:
# expression threshold
fc_thr = 1.4

for ct in cell_types[0:1]:

    print(ct)

    # subset by cell type
    grn_cts = grn_dat[grn_dat['cell_type'] == ct]

    # filter genes by expression
    grn_cts = grn_cts.loc[grn_cts['expression_fc'] > fc_thr, :]
    grn_cts = grn_cts.loc[grn_cts['target_expression_fc'] > fc_thr, :]

    # set limits for expression: max 6
    grn_cts['expression_fc'] = grn_cts['expression_fc'].clip(upper=6)
    grn_cts['target_expression_fc'] = grn_cts['target_expression_fc'].clip(upper=6)

    # use common_name for gene name when available
    grn_cts.loc[grn_cts['common_name'].notna(), 'gene_name'] = grn_cts.loc[grn_cts['common_name'].notna(), 'common_name']
    grn_cts.loc[grn_cts['target_common_name'].notna(), 'target_gene_name'] = grn_cts.loc[grn_cts['target_common_name'].notna(), 'target_common_name']

    # use og tf family for tf gene name
    grn_cts.loc[grn_cts['gene_name'].isna(), 'gene_name'] = grn_cts.loc[grn_cts['gene_name'].isna(), 'og'].str.split('.').str[0]
    grn_cts.loc[grn_cts['target_gene_name'].isna(), 'target_gene_name'] = grn_cts.loc[grn_cts['target_gene_name'].isna(), 'target_og'].str.split('.').str[0]

    # if not a TF, set gene name to empty string
    grn_cts.loc[~grn_cts['gene'].isin(tfs_dat['gene']), 'gene_name'] = ''
    grn_cts = grn_cts.replace(np.nan, '', regex=True)

    # NODES

    # get unique source gene values in the network
    nodes_source = grn_cts.loc[:, ['gene', 'gene_name', 'expression_fc', 'zscore']]
    nodes_source.rename(columns={'gene': 'id', 'gene_name': 'name'}, inplace=True)
    nodes_source.drop_duplicates(ignore_index=True, inplace=True)
    nodes_source['type'] = 'source'

    # get unique target gene values in the network
    nodes_target = grn_cts.loc[:, ['target_gene', 'target_gene_name', 'target_expression_fc']]
    nodes_target.rename(columns={'target_gene': 'id', 'target_gene_name': 'name', 'target_expression_fc': 'expression_fc'}, inplace=True)
    nodes_target.drop_duplicates(ignore_index=True, inplace=True)
    nodes_target['zscore'] = 1e-5
    nodes_target['expression_fc'] = 1
    nodes_target['type'] = 'target'

    # get nodes_target that are not in nodes_source
    nodes_target = nodes_target.loc[~nodes_target['id'].isin(nodes_source['id'])]

    # combine nodes
    nodes = pd.concat([nodes_source, nodes_target], ignore_index=True)

    # change to str
    nodes['id'] = nodes['id'].astype(str)
    nodes['name'] = nodes['name'].astype(str)

    # EDGES

    # get edges
    edges = grn_cts.loc[:, ['gene', 'target_gene', 'in_silico_chip_score']]
    edges.rename(columns={'gene': 'source', 'target_gene': 'target'}, inplace=True)
    edges.drop_duplicates(ignore_index=True, inplace=True)

    # change to str
    edges['source'] = edges['source'].astype(str)
    edges['target'] = edges['target'].astype(str)

    # check that all edges sources are in node data.frame
    source_nodes = edges['source'].isin(nodes['id'])
    target_nodes = edges['target'].isin(nodes['id'])

    if source_nodes.all():
        print('All source nodes are in the node data.frame')
    else: 
        print(f'{sum(source_nodes == False)} target nodes are not in the node data.frame')

    if target_nodes.all():
        print('All target nodes are in the node data.frame')
    else: 
        print(f'{sum(target_nodes == False)} target nodes are not in the node data.frame')
        
    # NETWORK

    p4c.create_network_from_data_frames(nodes, edges, title=ct, collection='GRN')

    # style
    style_name = "grnStyle"
    defaults = {
        'NODE_SIZE': 1,
        'NODE_FILL_COLOR': '#D3D3D3',
        'NODE_BORDER_COLOR': '#D3D3D3',
        'EDGE_TRANSPARENCY': 200,
        'EDGE_COLOR': '#D3D3D3',
        'NODE_LABEL_POSITION': "W,E,c,0.00,0.00",
        'NODE_LABEL_FONT_SIZE': 28,
        'ARROW_SHAPE': "ARROW_SHORT",
        'ARROW_COLOR': '#D3D3D3'
    }
    nodeLabels = p4c.map_visual_property('node label', 'name', 'p')
    edgeWidth = p4c.map_visual_property('edge width', 'in_silico_chip_score', 'p')
    edgeTransparency = p4c.map_visual_property('edge transparency', 'in_silico_chip_score', 'p')
    p4c.create_visual_style(style_name, defaults, [nodeLabels, edgeWidth])
    p4c.set_visual_style(style_name)

    from py4cytoscape import gen_node_color_map
    from py4cytoscape import gen_node_size_map
    from py4cytoscape import gen_edge_arrow_map

    # source nodes
    p4c.set_node_color_mapping(**gen_node_color_map('expression_fc', p4c.palette_color_brewer_s_YlOrRd(), style_name=style_name))
    p4c.set_node_size_mapping(**gen_node_size_map('zscore', style_name=style_name))
    p4c.set_node_font_size_default(32, style_name=style_name)

    # target nodes
    target_nodes_ids = nodes.loc[nodes['type'] == 'target', 'name'].to_list()
    p4c.set_node_color_bypass(target_nodes_ids, '#D3D3D3')
    p4c.set_node_size_bypass(target_nodes_ids, 5)
    p4c.set_node_label_bypass(target_nodes_ids, '')

    # edges
    p4c.set_edge_target_arrow_shape_mapping(**gen_edge_arrow_map('interaction', style_name=style_name))
    net_cols = p4c.get_table_columns('edge')

    # loops
    loops = net_cols.loc[net_cols['source'] == net_cols['target'], 'SUID'].to_list()
    p4c.set_edge_color_bypass(loops, '#00A300')

    # TF-TF noodes
    #tftfs = net_cols.loc[net_cols['target'].isin(tfs_dat['gene']), 'SUID'].to_list()
    #p4c.set_edge_color_bypass(tftfs, '#00A300')

    # layout
    p4c.layout_network('kamada-kawai')
    p4c.bundle_edges()
    #p4c.clear_edge_bends()

    # show image
    p4c.notebook_export_show_image()

cnidocyte
All source nodes are in the node data.frame
All target nodes are in the node data.frame
Applying default style...
Applying preferred layout


Save session

In [11]:
p4c.session.save_session(grn_dir + f'cytoscape_session_expression_fc_{str(fc_thr)}.cys')

This file has been overwritten.


{}