In [1]:
import requests
import json
from tqdm import tqdm

In [2]:
genes = ['TSC22D1',
 'KLF1',
 'MAP2K6',
 'CEBPE',
 'RUNX1T1',
 'MAML2',
 'CBL',
 'PTPN9',
 'TGFBR2',
 'ETS2',
 'SGK1',
 'TBX3',
 'DUSP9',
 'SPI1',
 'ELMSAN1',
 'UBASH3B',
 'PTPN12',
 'FOXA1',
 'FOXA3',
 'IGDCC3',
 'BCORL1',
 'MEIS1',
 'GLB1L2',
 'IKZF3',
 'BAK1',
 'FEV',
 'MAP2K3',
 'SLC38A2',
 'SET',
 'LHX1',
 'TBX2',
 'SLC4A1',
 'RREB1',
 'ZNF318',
 'MAPK1',
 'COL2A1',
 'ZBTB25',
 'MAP4K5',
 'SLC6A9',
 'MIDN',
 'OSR2',
 'DLX2',
 'CBFA2T3',
 'HES7',
 'FOXL2',
 'AHR',
 'FOXO4',
 'RHOXF2BB',
 'S1PR2',
 'POU3F2',
 'LYL1',
 'IER5L',
 'CNN1',
 'CELF2',
 'JUN',
 'CEBPA',
 'MAP4K3',
 'ZC3HAV1',
 'CDKN1A',
 'UBASH3A',
 'PRTG',
 'PTPN1',
 'TP73',
 'MAP7D1',
 'FOSB',
 'C19orf26',
 'IRF1',
 'TMSB4X',
 'BPGM',
 'SAMD1',
 'HOXB9',
 'HOXC13',
 'CKS1B',
 'CLDN6',
 'KIF18B',
 'KIF2C',
 'BCL2L11',
 'COL1A1',
 'CEBPB',
 'FOXF1',
 'ZBTB1',
 'PLK4',
 'ARRDC3',
 'C3orf72',
 'KIAA1804',
 'HNF4A',
 'SNAI1',
 'KMT2A',
 'ISL2',
 'CSRNP1',
 'ARID1A',
 'CNNM4',
 'NCL',
 'ZBTB10',
 'STIL',
 'ATL1',
 'NIT1',
 'CDKN1B',
 'PTPN13',
 'HOXA13',
 'CITED1',
 'PRDM1',
 'HK2',
 'CDKN1C',
 'EGR1']

In [3]:
result = {}
for gene in tqdm(genes):
    gene_code = gene

    headers = {
        'Accept': 'application/json',
    }
    
    params = {
        'query': gene_code,
        'format': 'json',
    }
    
    response = requests.get('https://webservice.wikipathways.org/findInteractions', params=params, headers=headers)
    if response.status_code != 200:
        print(response.status_code)
    else:
        r = json.loads(response.content.decode())
        result[gene] = r    
    

100%|██████████████████████████████████████████████████████████████████| 105/105 [02:45<00:00,  1.57s/it]


In [4]:
with open('wikipaths_dump.json', 'w') as json_file:
    json.dump(result, json_file, indent=4)

## Building the similarity/distance matrix

In [5]:
import json
import pandas as pd

In [6]:
with open('./wikipaths_dump.json', 'r') as f:
    data = json.load(f)

In [38]:
all_genes = []
for g in list(data.keys()):
    for path in data[g]['result']:
        if path['species'] == 'Homo sapiens':
            all_genes.extend(path['fields']['left']['values'])
            #all_genes.extend(path['fields']['right']['values'])
            if "mediator" in path['fields']:
                all_genes.extend(path['fields']['mediator']['values'])

In [39]:
inter_mtx = pd.DataFrame(columns = list(set(all_genes)), index = list(data.keys()))
inter_mtx.loc[:,:] = 0

In [40]:
for g in list(data.keys()):
    for path in data[g]['result']:
        if path['species'] == 'Homo sapiens':
            for i in path['fields']['left']['values']:
                inter_mtx.loc[g,i] +=1

In [41]:
inter_mtx

Unnamed: 0,Unnamed: 1,INHBA,APJ,PPARG,MAP2K6,RPS6KA4,PMAIP1,IFITM2,RAC2,FOXO3,...,cbLA,DMRT1,MAP3K14,MSX1,MAP3K10,MAPK14,NCOR2,ZAP70,Hsp90,LTBP1
TSC22D1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
KLF1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
MAP2K6,24,0,0,0,30,0,0,0,1,0,...,0,0,4,0,5,4,0,0,0,0
CEBPE,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
RUNX1T1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CITED1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
PRDM1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
HK2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
CDKN1C,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [42]:
sim_mtx = pd.DataFrame(index = list(data.keys()), columns = list(data.keys()))

In [43]:
for gene1 in sim_mtx.index:
    for gene2 in sim_mtx.columns:
        t1 = inter_mtx.loc[gene1]
        t1 = t1[t1>0].index.tolist()
        t2 = inter_mtx.loc[gene2]
        t2 = t2[t2>0].index.tolist()
        sim_mtx.loc[gene1, gene2] = len(set(t1).intersection(set(t2)))
sim_mtx.fillna(0, inplace=True)

In [44]:
sim_mtx

Unnamed: 0,TSC22D1,KLF1,MAP2K6,CEBPE,RUNX1T1,MAML2,CBL,PTPN9,TGFBR2,ETS2,...,ATL1,NIT1,CDKN1B,PTPN13,HOXA13,CITED1,PRDM1,HK2,CDKN1C,EGR1
TSC22D1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
KLF1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
MAP2K6,0,0,77,1,1,1,2,0,1,2,...,0,0,1,0,0,0,1,0,1,2
CEBPE,0,0,1,4,1,1,1,0,1,1,...,0,0,1,0,0,0,1,0,1,1
RUNX1T1,0,0,1,1,5,1,1,0,1,1,...,0,0,1,0,0,0,1,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CITED1,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
PRDM1,0,0,1,1,1,1,1,0,1,1,...,0,0,1,0,0,0,6,0,1,1
HK2,0,0,0,0,0,0,1,0,0,0,...,0,0,1,0,0,0,0,8,0,1
CDKN1C,0,0,1,1,1,1,1,0,1,1,...,0,0,14,0,0,0,1,0,16,1


In [45]:
sim_mtx.to_csv('wikipaths_distance_mtx.csv')