### Import dependencies

In [1]:
import pandas as pd
import os 

import py4cytoscape as p4c
dir(p4c)
p4c.cytoscape_ping()
p4c.cytoscape_version_info()

You are connected to Cytoscape!


{'apiVersion': 'v1',
 'cytoscapeVersion': '3.10.2',
 'automationAPIVersion': '1.9.0',
 'py4cytoscapeVersion': '1.9.0'}

### Read variants list 

In [2]:
variant_list = "../data/variant_list.txt"

with open(f'{variant_list}', 'r') as file:
    rsids = [line.strip() for line in file]

### Initialize BioMart server and dataset
- host: homosapiens
- dataset: hsapiens_snp

In [3]:
from pybiomart import Dataset  

dataset = Dataset(name='hsapiens_snp', host='http://www.ensembl.org')

attributes = ['refsnp_id', 'associated_gene']

snp_df = dataset.query(
    attributes=attributes,
    filters={'snp_filter': rsids}
)

snp_df.columns = attributes

### Manipulate the data:
- explode df to contain one gene per row
- drop duplicate rows
- drop rows with missing gene values if variant is not unique

In [4]:
snp_df['associated_gene'] = snp_df['associated_gene'].str.split(',')
snp_df = snp_df.explode('associated_gene')
snp_df = snp_df.drop_duplicates()

duplicate_refsnp_id = snp_df['refsnp_id'].duplicated(keep=False)
nan_entries_gene = snp_df['associated_gene'].isna()
snp_df = snp_df[~(duplicate_refsnp_id & nan_entries_gene)] ### Remove entries with duplicated refsnp_id and nan associated_gene

snp_df = snp_df.reset_index(drop=True)

In [5]:
snp_df

Unnamed: 0,refsnp_id,associated_gene
0,rs11136000,CLU
1,rs139237860,
2,rs141088742,FOXG1
3,rs143223844,FOXG1
4,rs147154860,FOXG1
5,rs148157138,FOXG1
6,rs150277632,
7,rs157580,TOMM40
8,rs157580,APOcluster
9,rs157580,APOE


### Create base for network

In [6]:
variants = snp_df['refsnp_id'].dropna().tolist()
genes = snp_df['associated_gene'].dropna().tolist()

all_ids = set(variants + genes)

nodes = pd.DataFrame({'id': list(all_ids),
                      'group': ['SNP' if node in set(variants) else 'Gene' for node in set(all_ids)]})

edges = snp_df.dropna().rename(columns={'refsnp_id': 'source', 'associated_gene': 'target'})

edges['source'] = edges['source'].astype(str)
edges['target'] = edges['target'].astype(str)

p4c.create_network_from_data_frames(nodes, edges, title="Variant-Gene Network", collection="network-task")

Applying default style...
Applying preferred layout


3858

### Import wikipathways

In [7]:
wp_path = os.path.join(os.getcwd(), "../wikipathways_set", "wikipathways_hsa_20240410.xgmml")
ctl_extend_cmd = f'cytargetlinker extend idAttribute="shared name" linkSetFiles="{wp_path}" network=current direction=SOURCES'
p4c.commands_run(ctl_extend_cmd)
p4c.layout_network('force-directed')

{}

### Import style

In [8]:
custom_style = os.path.join(os.getcwd(), "../data", "custom_style.xml")
load_style_cmd = f'vizmap load file file="{custom_style}"'
p4c.commands_run(load_style_cmd)
p4c.commands_run('vizmap apply styles="custom_style"')
p4c.layout_network('force-directed')

{}

### Save the network and session

In [9]:
image_path = os.path.join(os.getcwd(), "../results", "variant_gene_network.pdf")
session_path = os.path.join(os.getcwd(), "../results", "variant_gene_network.cys")

p4c.export_image(image_path, type="PDF", overwrite_file=True)
p4c.save_session(session_path, overwrite_file=True)

{}

In [10]:
p4c.get_layout_names()

['attribute-circle',
 'stacked-node-layout',
 'attribute-grid',
 'degree-circle',
 'circular',
 'attributes-layout',
 'kamada-kawai',
 'force-directed',
 'cose',
 'grid',
 'hierarchical',
 'fruchterman-rheingold',
 'isom',
 'force-directed-cl']