In [1]:
import numpy as np
import pandas as pd
import json
import collections

### **Generate pathway input from NCATS bioplanet csv**

In [2]:
#read pathway from bioplanetcsv
df_pathway = pd.read_csv('/data/scPAFA_paper/pathway/bioplanet_pathway.csv')

In [3]:
#show bioplanet csv
df_pathway

Unnamed: 0,PATHWAY_ID,PATHWAY_NAME,GENE_ID,GENE_SYMBOL
0,bioplanet_1,Inhibition of matrix metalloproteinases,3265,HRAS
1,bioplanet_1,Inhibition of matrix metalloproteinases,4313,MMP2
2,bioplanet_1,Inhibition of matrix metalloproteinases,4318,MMP9
3,bioplanet_1,Inhibition of matrix metalloproteinases,4323,MMP14
4,bioplanet_1,Inhibition of matrix metalloproteinases,7076,TIMP1
...,...,...,...,...
74143,bioplanet_999,Nectin adhesion pathway,9076,CLDN1
74144,bioplanet_999,Nectin adhesion pathway,9855,FARP2
74145,bioplanet_999,Nectin adhesion pathway,23396,PIP5K1C
74146,bioplanet_999,Nectin adhesion pathway,25945,PVRL3


In [4]:
#generate pathway input
df_pathway['pathway_full_name'] = df_pathway['PATHWAY_ID']+'_'+df_pathway['PATHWAY_NAME']
pathwaydict_bioplanet = dict(collections.Counter(df_pathway['pathway_full_name']))
for i in pathwaydict_bioplanet.keys():
    pathwaydict_bioplanet[i] = df_pathway[df_pathway['pathway_full_name'] == i]['GENE_SYMBOL'].to_list()

In [6]:
#show pathway input dict
pathwaydict_bioplanet

{'bioplanet_1_Inhibition of matrix metalloproteinases': ['HRAS',
  'MMP2',
  'MMP9',
  'MMP14',
  'TIMP1',
  'TIMP2',
  'TIMP3',
  'TIMP4',
  'RECK'],
 'bioplanet_10_Adhesion and diapedesis of granulocytes': ['C5',
  'CSF3',
  'ICAM1',
  'ICAM2',
  'IFNG',
  'IL1A',
  'CXCL8',
  'ITGAL',
  'ITGAM',
  'ITGB2',
  'PECAM1',
  'SELL',
  'SELP',
  'SELPLG',
  'TNF'],
 'bioplanet_100_Phospholipase C epsilon pathway': ['ADCY1',
  'ADRB2',
  'PRKACB',
  'PRKACG',
  'PRKAR1A',
  'PRKAR1B',
  'PRKAR2A',
  'PRKAR2B',
  'PTGER1',
  'RAP2B',
  'RAPGEF3',
  'PLCE1'],
 'bioplanet_1000_Platelet adhesion to exposed collagen': ['COL1A1',
  'COL1A2',
  'FCER1G',
  'FYN',
  'GP1BA',
  'GP1BB',
  'GP5',
  'GP9',
  'ITGA2',
  'ITGB1',
  'LYN',
  'VWF',
  'GP6'],
 'bioplanet_1002_Integrin-mediated cell adhesion': ['AKT1',
  'AKT2',
  'ARAF',
  'BRAF',
  'CAPN5',
  'CAPN1',
  'CAPN2',
  'CAPN3',
  'CAPNS1',
  'CAPN6',
  'CAV1',
  'CAV2',
  'CAV3',
  'CDC42',
  'CRK',
  'CSK',
  'DOCK1',
  'FYN',
  'GRB2',
  '

In [7]:
#check unique
print(len(pathwaydict_bioplanet) == len(set(pathwaydict_bioplanet.keys())))
print(len(pathwaydict_bioplanet))

True
1658


In [8]:
#save pathway input dictionary
with open('../example_file/pathwaydict_bioplanet.json', "w") as json_file:
    json.dump(pathwaydict_bioplanet, json_file)

### **Generate pathway input from MsigDB JSON**

In [9]:
#read pathway from MsigDB json(hallmark 50 pathways)
with open('/data/scPAFA_paper/pathway/h.all.v2023.2.Hs.json', 'r') as file:
    hallmark_json = json.load(file)
pathwaydict_hallmark ={i:hallmark_json[i]['geneSymbols'] for i in hallmark_json.keys()}

In [10]:
#show pathway input dict
pathwaydict_hallmark

{'HALLMARK_TNFA_SIGNALING_VIA_NFKB': ['ABCA1',
  'ACKR3',
  'AREG',
  'ATF3',
  'ATP2B1',
  'B4GALT1',
  'B4GALT5',
  'BCL2A1',
  'BCL3',
  'BCL6',
  'BHLHE40',
  'BIRC2',
  'BIRC3',
  'BMP2',
  'BTG1',
  'BTG2',
  'BTG3',
  'CCL2',
  'CCL20',
  'CCL4',
  'CCL5',
  'CCN1',
  'CCND1',
  'CCNL1',
  'CCRL2',
  'CD44',
  'CD69',
  'CD80',
  'CD83',
  'CDKN1A',
  'CEBPB',
  'CEBPD',
  'CFLAR',
  'CLCF1',
  'CSF1',
  'CSF2',
  'CXCL1',
  'CXCL10',
  'CXCL11',
  'CXCL2',
  'CXCL3',
  'CXCL6',
  'DENND5A',
  'DNAJB4',
  'DRAM1',
  'DUSP1',
  'DUSP2',
  'DUSP4',
  'DUSP5',
  'EDN1',
  'EFNA1',
  'EGR1',
  'EGR2',
  'EGR3',
  'EHD1',
  'EIF1',
  'ETS2',
  'F2RL1',
  'F3',
  'FJX1',
  'FOS',
  'FOSB',
  'FOSL1',
  'FOSL2',
  'FUT4',
  'G0S2',
  'GADD45A',
  'GADD45B',
  'GCH1',
  'GEM',
  'GFPT2',
  'GPR183',
  'HBEGF',
  'HES1',
  'ICAM1',
  'ICOSLG',
  'ID2',
  'IER2',
  'IER3',
  'IER5',
  'IFIH1',
  'IFIT2',
  'IFNGR2',
  'IL12B',
  'IL15RA',
  'IL18',
  'IL1A',
  'IL1B',
  'IL23A',
  'IL6',


In [None]:
#show pathway input dict
print(len(pathwaydict_hallmark) == len(set(pathwaydict_hallmark.keys())))
print(len(pathwaydict_hallmark))