# Pathway to Pathway (Reactome)

In [1]:
import pandas as pd 
import numpy as np
import csv
import json
import biomedkg_utils
from biomedkg_utils import *
import matplotlib.pyplot as plt
from matplotlib_venn import venn3, venn2

### Human Reactome Pathways


In [4]:
pw_prefix = 'Reactome_Pathway:'
headers = ['Pathway (Reactome)','Pathway (Reactome)', 'Relationship', 'Score']
os.system('wget -N -P input/ https://reactome.org/download/current/ReactomePathwaysRelation.txt')

pw_tree_df = pd.read_table('input/ReactomePathwaysRelation.txt', header=None)
pw_tree_df.columns = ['Parent', 'Child']
human_pw_tree_df = pw_tree_df[pw_tree_df['Parent'].str.contains('-HSA-')].copy()
human_pw_tree_df['Parent'] = [pw_prefix+pw for pw in human_pw_tree_df['Parent']]
human_pw_tree_df['Child'] = [pw_prefix+pw for pw in human_pw_tree_df['Child']]
relations = ['-pathway_is_parent_of->']*len(human_pw_tree_df)
scores = [1.0]*len(human_pw_tree_df)
human_pw_tree_df.insert(2, 'relation', relations)
human_pw_tree_df.insert(3, 'scores', scores)
human_pw_tree_df.columns = headers
human_pw_tree_df

human_pw_tree_df.to_csv('output/pathway2pathway/edges_reactomePathwayHierarchy.csv', index = False)
human_pw_tree_df.to_csv('output/edges_to_use/Pathway_(Reactome)_2_Pathway_(Reactome).csv', index = False)
df = pd.read_csv('output/pathway2pathway/edges_reactomePathwayHierarchy.csv')
df.tail()

--2023-06-18 23:38:45--  https://reactome.org/download/current/ReactomePathwaysRelation.txt
Resolving reactome.org (reactome.org)... 100.25.71.177
Connecting to reactome.org (reactome.org)|100.25.71.177|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 593673 (580K) [text/plain]
Saving to: ‘input/ReactomePathwaysRelation.txt’

     0K .......... .......... .......... .......... ..........  8%  394K 1s
    50K .......... .......... .......... .......... .......... 17%  787K 1s
   100K .......... .......... .......... .......... .......... 25%  786K 1s
   150K .......... .......... .......... .......... .......... 34%  789K 1s
   200K .......... .......... .......... .......... .......... 43%  241M 0s
   250K .......... .......... .......... .......... .......... 51%  205M 0s
   300K .......... .......... .......... .......... .......... 60%  793K 0s
   350K .......... .......... .......... .......... .......... 68%  278M 0s
   400K .......... .......... .........

Unnamed: 0,Pathway (Reactome),Pathway (Reactome).1,Relationship,Score
2642,Reactome_Pathway:R-HSA-983705,Reactome_Pathway:R-HSA-983695,-pathway_is_parent_of->,1.0
2643,Reactome_Pathway:R-HSA-983712,Reactome_Pathway:R-HSA-2672351,-pathway_is_parent_of->,1.0
2644,Reactome_Pathway:R-HSA-983712,Reactome_Pathway:R-HSA-936837,-pathway_is_parent_of->,1.0
2645,Reactome_Pathway:R-HSA-991365,Reactome_Pathway:R-HSA-170670,-pathway_is_parent_of->,1.0
2646,Reactome_Pathway:R-HSA-991365,Reactome_Pathway:R-HSA-997272,-pathway_is_parent_of->,1.0


# Pathway to Pathway (KEGG)

In [2]:
! curl https://rest.kegg.jp/list/pathway/hsa > input/kegg_human_pathways.tsv

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 20907    0 20907    0     0  21381      0 --:--:-- --:--:-- --:--:-- 21377


In [7]:
# INSTRUCTIONS: Obtain the KEGG pathway hierarchy, manually formatted from the website
# https://www.kegg.jp/kegg/pathway.html or downloaded from the GitHub: kegg_pathway_hierarchy.csv

In [8]:
import networkx as nx

kegg_hierarchy_file_name  = "input/kegg_pathway_hierarchy.csv"
kegg_pathway_G = parse_kegg_hierarchy(kegg_hierarchy_file_name)

In [18]:
# output kegg pathway hierarchy
with open("input/kegg_pathway_hierchy.tsv","w") as out_file:
    out_file.write("\n".join(["\t".join([x,y]) for x,y in kegg_pathway_G.edges()]))

In [10]:
kegg_pathway_mapping_file = "input/kegg_human_pathways.tsv"
kegg_pathway_mapping = parse_mapping_table(kegg_pathway_mapping_file)

from_human_pathway_mapping = set(int(k.strip("path:hsa")) for k in kegg_pathway_mapping.keys())
from_kegg_pathway_hierarchy = set(int(k.split(" ")[0]) for k in kegg_pathway_G.nodes() if "." not in k)
intersection = from_human_pathway_mapping.intersection(from_kegg_pathway_hierarchy)

print("%d human_pathways.tsv; %d intersection; %d kegg_pathway_hierarchy.csv"%(len(from_human_pathway_mapping),len(intersection),len(from_kegg_pathway_hierarchy)))


human_pathways =  kegg_pathway_mapping.keys()
kegg_pathway_ids = [k for k in kegg_pathway_G.nodes() if "." not in k]
human_pathway_to_kegg_pathway_id = map_human_pathways_to_kegg_pathway_id(human_pathways, kegg_pathway_ids)

353 human_pathways.tsv; 347 intersection; 551 kegg_pathway_hierarchy.csv


In [11]:
# output kegg pathway to human pathway
with open("input/kegg_human_pathway_to_pathway_id.tsv","w") as out_file:
    out_file.write("\n".join(["\t".join([x,y]) for x,y in human_pathway_to_kegg_pathway_id.items()]))

In [13]:
'''Align Pathway ID -is- Pathway Name'''
pathway_id2name = dict()
pathway_name2id = dict()

for line in open('input/kegg_human_pathway_to_pathway_id.tsv'):
    line = line.strip().split('\t')
    
    # Pathway ID, Pathway Name
    pathway_id = line[0].replace('path:','path_')
    pathway_name = process_pathway_name(line[1])
    
    # Pathway Name -is- Pathway ID
    pathway_id2name[pathway_id] = pathway_name
    pathway_name2id[pathway_name] = pathway_id

In [19]:
''' Pathway -parent of-> Pathway '''
parent2child_kegg_pathway = dict()

for line in open('input/kegg_pathway_hierchy.tsv'):
    line = line.strip().split('\t')
    
    # Pathways' Names
    parent_pathway_name = line[0].replace('path:','path_')
    child_pathway_name  = line[1].strip().replace('path:','path_')
    
    # Is the parent pathway good (human or a category)?
    cleaned_parent_pathway_name = process_pathway_name(parent_pathway_name)
    if cleaned_parent_pathway_name in pathway_name2id: 
        parent_human_pathway = True
        parent_pathway_category = False
    else:
        parent_human_pathway = False
        parent_pathway_category = check_if_pathway_category(parent_pathway_name)
    
    if parent_pathway_category:
        parent_pathway_id = parent_pathway_name
    elif parent_human_pathway:
        parent_pathway_id = pathway_name2id[cleaned_parent_pathway_name]
    else:
        continue
    
    
    # Is the child pathway good (human or a category)?
    cleaned_child_pathway_name = process_pathway_name(child_pathway_name)
    if cleaned_child_pathway_name in pathway_name2id:
        child_human_pathway = True
        child_pathway_category = False
    else:
        child_human_pathway = False
        child_pathway_category = check_if_pathway_category(child_pathway_name)

    if child_pathway_category:
        child_pathway_id = child_pathway_name
    elif child_human_pathway:
        child_pathway_id = pathway_name2id[cleaned_child_pathway_name]
    else:
        continue
        
    # Parent Pathway - Child Pathway
    parent2child_kegg_pathway.setdefault(parent_pathway_id, set()).add(child_pathway_id)
    
    
# Output edges    
file = 'Pathway_(KEGG)_2_Pathway_(KEGG).csv'
outpath = os.path.join('output/pathway2pathway/', file)
output_edgefile_onerel_noweight(
    outpath = outpath,
    columns = ['Pathway (KEGG)','Pathway (KEGG)','Relationship'],
    dictionary = parent2child_kegg_pathway,
    rel = '-parent_of->',
    prefix_col1='KEGG_Pathway:',
    prefix_col2='KEGG_Pathway:'
)
df = pd.read_csv(outpath)
df.to_csv(os.path.join('output/edges', file), index=False)
df.to_csv(os.path.join('output/edges_to_use/', file), index=False)