In [61]:
class ReactomePathway:
    def __init__(self, reactome_sID):
        self.is_Root= False
        self.is_Leaf= False
        self.name= ''
        self.reactome_sID= reactome_sID
        self.children= []
        self.parents= []
        self.level = -1
        self.root_id = ''
    def asdict(self):
        """ return the ReactomePathway  as dictionary 
        """
        return {'reactome_sID': self.reactome_sID, 'children': self.children, 'parents': self.parents, 'is_leaf': self.is_Leaf, 'is_root': self.is_Root, 'level': self.level, 'root_id': self.root_id}        
    def assert_leaf_root_state(self):
        if len(self.children) == 0:
            self.is_Leaf= True
        if len(self.parents) == 0:
            self.is_Root = True



In [63]:
class PathwayHierarchy(dict):
    def __init__(self, *arg, **kw):
       super(PathwayHierarchy, self).__init__(*arg, **kw)
    def add_hierarchy_levels(self):
        for k,v in self.items():
            level = 0
            not_at_root = True
            next_elem = k
            partent_name = ''
            while not_at_root:
                if len(self[next_elem].parents) < 1:
                    not_at_root = False
                    partent_name = next_elem
                    break
                else:
                    level += 1
                    next_elem = self[next_elem].parents[0] # [0] is temp fix disregards more than one parent
            v.level = level
            v.root_name = next_elem


    def hierarchyInfo(self):
        entries = len(self.keys()) 
        leafs = len([v for k,v in self.items() if v.is_Leaf])
        roots = len([v for k,v in self.items() if v.is_Root])
        return {'size': entries, 'leafs': leafs, 'roots': roots}
    def getLevel(self, key):
        print('test')


In [65]:
pathways = PathwayHierarchy()

organism = "MMU"

with open('ReactomePathwaysRelation.txt') as fh:
    for line in fh:
        line_list = line.strip().split('\t')
        left_entry = line_list[0]
        right_entry = line_list[1]
        if(organism in left_entry):
            if left_entry not in pathways.keys():
                pathways[left_entry] = ReactomePathway(left_entry)
                pathways[left_entry].children.append(right_entry)
            else:
                pathways[left_entry].children.append(right_entry)
            if right_entry not in pathways.keys():
                pathways[right_entry] = ReactomePathway(right_entry)
                pathways[right_entry].parents.append(left_entry)
            else:
                pathways[right_entry].parents.append(left_entry)

for k,v in pathways.items():
    v.assert_leaf_root_state()

pathways.add_hierarchy_levels()

In [79]:
uniprot_2_reactome = {}

with open('UniProt2Reactome_PE_Pathway.txt') as fh:
    for line in fh:
        line_split = line.strip().split('\t')
        uniprot_ID = line_split[0]
        reactome_entity_ID = line_split[1]
        entity_name = line_split[2]
        reactome_pathway_ID = line_split[3]
        reactome_pathway_Name = line_split[5]
        organism = line_split[7].replace(' ', '_')
        if organism not in uniprot_2_reactome:
            uniprot_2_reactome[organism] = {}
        if uniprot_ID in uniprot_2_reactome[organism]:
            if reactome_entity_ID in uniprot_2_reactome[organism][uniprot_ID]:
                uniprot_2_reactome[organism][uniprot_ID][reactome_entity_ID]['pathways'].append((reactome_pathway_ID, reactome_pathway_Name))
            else:
                uniprot_2_reactome[organism][uniprot_ID][reactome_entity_ID] = {'reactome_id':reactome_entity_ID, 'name': entity_name, 'pathways': [(reactome_pathway_ID, reactome_pathway_Name)]}
        else:
            uniprot_2_reactome[organism][uniprot_ID] = {}
            uniprot_2_reactome[organism][uniprot_ID][reactome_entity_ID] = {'reactome_id':reactome_entity_ID, 'name': entity_name, 'pathways': [(reactome_pathway_ID, reactome_pathway_Name)]}



In [80]:
uniprot_2_reactome.keys()

dict_keys(['Drosophila_melanogaster', 'Rattus_norvegicus', 'Danio_rerio', 'Caenorhabditis_elegans', 'Canis_familiaris', 'Mus_musculus', 'Homo_sapiens', 'Sus_scrofa', 'Bos_taurus', 'Gallus_gallus', 'Plasmodium_falciparum', 'Xenopus_tropicalis', 'Schizosaccharomyces_pombe', 'Dictyostelium_discoideum', 'Saccharomyces_cerevisiae', 'Mycobacterium_tuberculosis'])

In [39]:
import pickle

In [81]:
for key in uniprot_2_reactome.keys():
    with open('{}_uniprot2reactome.pickle'.format(key), 'wb') as handle:
        pickle.dump(uniprot_2_reactome[key], handle, protocol=pickle.HIGHEST_PROTOCOL)



In [82]:
uniprot_2_reactome["Mus_musculus"]

{'A0A075B5J3': {'R-MMU-198188': {'reactome_id': 'R-MMU-198188',
   'name': 'Trbc1 [plasma membrane]',
   'pathways': [('R-MMU-198933',
     'Immunoregulatory interactions between a Lymphoid and a non-Lymphoid cell'),
    ('R-MMU-202424', 'Downstream TCR signaling'),
    ('R-MMU-202427', 'Phosphorylation of CD3 and TCR zeta chains'),
    ('R-MMU-202430', 'Translocation of ZAP-70 to Immunological synapse'),
    ('R-MMU-202433', 'Generation of second messenger molecules'),
    ('R-MMU-389948', 'PD-1 signaling')]}},
 'A0A075B5J4': {'R-MMU-198188-2': {'reactome_id': 'R-MMU-198188-2',
   'name': 'Trbc2 [plasma membrane]',
   'pathways': [('R-MMU-198933',
     'Immunoregulatory interactions between a Lymphoid and a non-Lymphoid cell'),
    ('R-MMU-202424', 'Downstream TCR signaling'),
    ('R-MMU-202427', 'Phosphorylation of CD3 and TCR zeta chains'),
    ('R-MMU-202430', 'Translocation of ZAP-70 to Immunological synapse'),
    ('R-MMU-202433', 'Generation of second messenger molecules'),
   

In [181]:
class ReactomeQuery:
    def __init__(self, query_data, target_organism, id_database):
        self.query_data = query_data
        self.query_results = {}
        self.all_contained_pathways = []
        self.get_query_results(target_organism, id_database)
        self.calc_all_pathways()

    def asdict(self):
        """ return the ReactomePathway  as dictionary 
        """
        return {}        
    def get_query_results(self, tar, db):
        with open('{}_{}2reactome.pickle'.format(tar,db), 'rb') as handle:
            reactome_data = pickle.load(handle)
            for elem in self.query_data:
                reactome_elem = reactome_data[elem[0]]
                for pathway_id in reactome_elem:
                    reactome_elem[pathway_id]['measurement'] = elem[1]
                self.query_results[elem[0]] = reactome_elem
    def calc_all_pathways(self):
        pathways = []
        for k, v in self.query_results.items():
            for reactome_id, physical_entity in v.items():
                pathways.extend(physical_entity['pathways'])
        self.all_contained_pathways = list(set(pathways))

    def get_levels_of_query(self, hierarchy, level):
        out_pathways = []
        for elem in self.all_contained_pathways:
            if hierarchy[elem[0]].level == level:
                out_pathways.append(elem)

        return out_pathways
    def get_target_level_for_query_pathways(self, hierarchy, tar_level):
        out_pathways = {}
        for uniprot_ID, query_result in self.query_results.items():
            for entity_ID, entity_entry in query_result.items():
                for pathway in entity_entry['pathways']:
                    if hierarchy[pathway[0]].level == tar_level:
                        current_id = pathway[0]
                        current_level = hierarchy[pathway[0]].level
                        if current_id in out_pathways:
                            if uniprot_ID not in out_pathways[hierarchy[current_id].reactome_sID]:
                                out_pathways[hierarchy[current_id].reactome_sID][uniprot_ID] = {'entity_IDs':[entity_ID] ,'entity_names': [entity_entry['name']] , 'measurement': entity_entry['measurement']}
                            elif entity_ID not in out_pathways[hierarchy[current_id].reactome_sID][uniprot_ID]['entity_IDs']:
                                out_pathways[hierarchy[current_id].reactome_sID][uniprot_ID]['entity_IDs'].append(entity_ID)
                                out_pathways[hierarchy[current_id].reactome_sID][uniprot_ID]['entity_names'].append(entity_entry['name'])
                        else: 
                            out_pathways[hierarchy[current_id].reactome_sID] = {uniprot_ID: {'entity_IDs':[entity_ID] ,'entity_names': [entity_entry['name']] , 'measurement': entity_entry['measurement']}}

                    else:
                        current_id = pathway[0]
                        current_level = hierarchy[pathway[0]].level
                        while current_level > tar_level:
                            current_id = hierarchy[current_id].parents[0]
                            current_level = hierarchy[current_id].level
                        if current_id in out_pathways:
                            if uniprot_ID not in out_pathways[hierarchy[current_id].reactome_sID]:
                                out_pathways[hierarchy[current_id].reactome_sID][uniprot_ID] = {'entity_IDs':[entity_ID] ,'entity_names': [entity_entry['name']] , 'measurement': entity_entry['measurement']}
                            elif entity_ID not in out_pathways[hierarchy[current_id].reactome_sID][uniprot_ID]['entity_IDs']:
                                out_pathways[hierarchy[current_id].reactome_sID][uniprot_ID]['entity_IDs'].append(entity_ID)
                                out_pathways[hierarchy[current_id].reactome_sID][uniprot_ID]['entity_names'].append(entity_entry['name'])
                        else: 
                            out_pathways[hierarchy[current_id].reactome_sID] = {uniprot_ID: {'entity_IDs':[entity_ID] ,'entity_names': [entity_entry['name']] , 'measurement': entity_entry['measurement']}}


        return out_pathways
        
        

In [172]:
testUniprot = [("Q8BWN8", 1.0),
("P47740", 2.0),
("Q9R0H0", 3.0),
("Q9DBK0", 4.0),
("O88844", 5.0),
("Q9D379", 6.0),
("P34914", 7.0),
("Q9QZD8", 8.0),
("O08756", 9.0),
("P45952", 10.0)
]

In [182]:
tar_organism = 'Mus_musculus'
test_query = ReactomeQuery(testUniprot, tar_organism, 'uniprot')

In [158]:
test_query.query_results

{'Q8BWN8': {'R-MMU-5690054': {'reactome_id': 'R-MMU-5690054',
   'name': 'Acot4 [peroxisomal matrix]',
   'pathways': [('R-MMU-390247',
     'Beta-oxidation of very long chain fatty acids'),
    ('R-MMU-9033241', 'Peroxisomal protein import')],
   'measurement': 1.0},
  'R-MMU-9033145': {'reactome_id': 'R-MMU-9033145',
   'name': 'Acot4 [cytosol]',
   'pathways': [('R-MMU-9033241', 'Peroxisomal protein import')],
   'measurement': 1.0}},
 'P47740': {'R-MMU-6811616': {'reactome_id': 'R-MMU-6811616',
   'name': 'Aldh3a2 [endoplasmic reticulum membrane]',
   'pathways': [('R-MMU-1660661', 'Sphingolipid de novo biosynthesis')],
   'measurement': 2.0},
  'R-MMU-6811617': {'reactome_id': 'R-MMU-6811617',
   'name': 'Aldh3a2 [peroxisomal membrane]',
   'pathways': [('R-MMU-389599', 'Alpha-oxidation of phytanate'),
    ('R-MMU-9603798', 'Class I peroxisomal membrane protein import')],
   'measurement': 2.0},
  'R-MMU-9603785': {'reactome_id': 'R-MMU-9603785',
   'name': 'Aldh3a2 [cytosol]',
  

In [129]:
len(test_query.all_contained_pathways)

22

In [183]:
test_query.get_target_level_for_query_pathways(pathways, 1)

{'R-MMU-556833': {'Q8BWN8': {'entity_IDs': ['R-MMU-5690054'],
   'entity_names': ['Acot4 [peroxisomal matrix]'],
   'measurement': 1.0},
  'P47740': {'entity_IDs': ['R-MMU-6811616', 'R-MMU-6811617'],
   'entity_names': ['Aldh3a2 [endoplasmic reticulum membrane]',
    'Aldh3a2 [peroxisomal membrane]'],
   'measurement': 2.0},
  'Q9R0H0': {'entity_IDs': ['R-MMU-390263'],
   'entity_names': ['Acox1 [peroxisomal matrix]'],
   'measurement': 3.0},
  'Q9DBK0': {'entity_IDs': ['R-MMU-5690072'],
   'entity_names': ['Acot12 [cytosol]'],
   'measurement': 4.0},
  'P34914': {'entity_IDs': ['R-MMU-2142819'],
   'entity_names': ['Ephx2 [cytosol]'],
   'measurement': 7.0},
  'P45952': {'entity_IDs': ['R-MMU-49491'],
   'entity_names': ['Acadm [mitochondrial matrix]'],
   'measurement': 10.0}},
 'R-MMU-9033241': {'Q8BWN8': {'entity_IDs': ['R-MMU-5690054', 'R-MMU-9033145'],
   'entity_names': ['Acot4 [peroxisomal matrix]', 'Acot4 [cytosol]'],
   'measurement': 1.0},
  'Q9R0H0': {'entity_IDs': ['R-MMU-