# Hypergraph processing and filtering

In [1]:
import pickle
from copy import copy
import numpy as np
import networkx as nx
import os
import matplotlib.pyplot as plt
from itertools import product, permutations
from collections import defaultdict

from useful_functions import *

In [2]:
def filter_NF(dictionary, B, verbose=False, verbose_err=True):
    
    prev_dictionary = copy(dictionary)
    
    # Filtering: strongly connected component
    G = standard_network(prev_dictionary)
    if verbose:
        print(len(G.nodes), len(G.edges))
    
    largest = max(nx.strongly_connected_components(G), key=len)
    not_in_largest = set(G.nodes).difference(largest)
    
    if verbose:
        print("Nodes originally in strongly connected projection", len(largest))
    
    strong_conn = nx.is_strongly_connected(G)
    while not strong_conn:

        # Keep only LCC
        next_dictionary = {}
        for key, val in prev_dictionary.items():

            include = True
            for node in val[0] + val[1]:
                if node in not_in_largest:
                    include = False
                    break

            if len(val[0]) == B and include:
                next_dictionary[key] = val
                
        prev_dictionary = copy(next_dictionary)
        
        # Filtering with the new subhypergraph
        G = standard_network(prev_dictionary)
        if verbose:
            print(len(G.nodes), len(G.edges))
        AttributeError
        try: 
            largest = max(nx.strongly_connected_components(G), key=len)
            not_in_largest = set(G.nodes).difference(largest)
        except ValueError:
            if verbose_err:
                print(f"Error: A strongly connected hypergraph can't be constructed from this dataset with a restriction to {B} input nodes. Try with a different amount.")
            return None
        
        strong_conn = nx.is_strongly_connected(G)
        
    return next_dictionary

## Chemical reactions

### Astrochem

In [3]:
with open('ParsedHyperedges/Chemical/astrochem.pkl', 'rb') as f:
    astrochem_data = pickle.load(f)

#### 2-F hypergraph

In [4]:
hyper2F = filter_NF(astrochem_data, 2)
len(standard_network(hyper2F).nodes), len(hyper2F)

(439, 5123)

In [5]:
with open('ProcessedHypergraphs/Chemical/astrochem2F.pkl', 'wb') as f:
    pickle.dump(hyper2F, f)

### Pathways

#### PID

In [6]:
with open('ParsedHyperedges/Chemical/pid.pkl', 'rb') as f:
    pid_data = pickle.load(f)

##### 2-F hypergraph

In [7]:
hyper2F = filter_NF(pid_data, 2)
len(standard_network(hyper2F).nodes), len(hyper2F)

(4, 2)

In [8]:
hyper2F

{'e1008': (['SmallMolecule_183b1f102d3dea4f168192a09d3d97e3',
   'SmallMolecule_e9bcb452fc7ee059dc684e53e596e6e6'],
  ['SmallMolecule_632b165007a1a2082e2b24e2a4ff0719',
   'SmallMolecule_21d1714b69eec47d13f1ee5f51c33ad7']),
 'e992': (['SmallMolecule_632b165007a1a2082e2b24e2a4ff0719',
   'SmallMolecule_21d1714b69eec47d13f1ee5f51c33ad7'],
  ['SmallMolecule_183b1f102d3dea4f168192a09d3d97e3',
   'SmallMolecule_e9bcb452fc7ee059dc684e53e596e6e6'])}

In [9]:
with open('ProcessedHypergraphs/Chemical/pid2F.pkl', 'wb') as f:
    pickle.dump(hyper2F, f)

##### 3-F hypergraph

In [10]:
hyper3F = filter_NF(pid_data, 3, verbose=True)
len(standard_network(hyper3F).nodes), len(hyper3F)

9009 15858
Nodes originally in strongly connected projection 904
126 220
5 6
0 0
Error: A strongly connected hypergraph can't be constructed from this dataset with a restriction to 3 input nodes. Try with a different amount.


AttributeError: 'NoneType' object has no attribute 'values'

#### Reactome

In [11]:
with open('ParsedHyperedges/Chemical/reactome.pkl', 'rb') as f:
    reactome_data = pickle.load(f)

##### 2-F hypergraph

In [12]:
hyper2F = filter_NF(reactome_data, 2)
len(standard_network(hyper2F).nodes), len(hyper2F)

Error: A strongly connected hypergraph can't be constructed from this dataset with a restriction to 2 input nodes. Try with a different amount.


AttributeError: 'NoneType' object has no attribute 'values'

##### 3-F hypergraph

In [13]:
hyper3F = filter_NF(reactome_data, 3)
len(standard_network(hyper3F).nodes), len(hyper3F)

Error: A strongly connected hypergraph can't be constructed from this dataset with a restriction to 3 input nodes. Try with a different amount.


AttributeError: 'NoneType' object has no attribute 'values'

### KIDA

In [14]:
with open('ParsedHyperedges/Chemical/KIDA_termolecular.pkl', 'rb') as f:
    KIDA_dataT = pickle.load(f)
    
with open('ParsedHyperedges/Chemical/KIDA_unibimolecular.pkl', 'rb') as f:
    KIDA_dataU = pickle.load(f)
    
with open('ParsedHyperedges/Chemical/KIDA_surface.pkl', 'rb') as f:
    KIDA_dataS = pickle.load(f)
    
with open('ParsedHyperedges/Chemical/KIDA_uva.pkl', 'rb') as f:
    KIDA_dataV = pickle.load(f)

#### 2-F hypergraph

In [15]:
hyper2F = filter_NF(KIDA_dataT, 2)
len(standard_network(hyper2F).nodes), len(hyper2F)

Error: A strongly connected hypergraph can't be constructed from this dataset with a restriction to 2 input nodes. Try with a different amount.


AttributeError: 'NoneType' object has no attribute 'values'

In [16]:
hyper2F = filter_NF(KIDA_dataU, 2)
len(standard_network(hyper2F).nodes), len(hyper2F)

(467, 7329)

In [17]:
with open('ProcessedHypergraphs/Chemical/KIDA_unibimolecular2F.pkl', 'wb') as f:
    pickle.dump(hyper2F, f)

In [18]:
hyper2F = filter_NF(KIDA_dataS, 2)
len(standard_network(hyper2F).nodes), len(hyper2F)

(20, 37)

In [19]:
with open('ProcessedHypergraphs/Chemical/KIDA_surface2F.pkl', 'wb') as f:
    pickle.dump(hyper2F, f)

In [20]:
hyper2F = filter_NF(KIDA_dataV, 2)
len(standard_network(hyper2F).nodes), len(hyper2F)

(489, 7275)

In [21]:
with open('ProcessedHypergraphs/Chemical/KIDA_uva2F.pkl', 'wb') as f:
    pickle.dump(hyper2F, f)

### Reaction Template Generation

In [22]:
with open('ParsedHyperedges/Chemical/ReactionTemplate.pkl', 'rb') as f:
    RT_data = pickle.load(f)

#### 2-F hypergraph

In [23]:
hyper2F = filter_NF(RT_data, 2)
len(standard_network(hyper2F).nodes), len(hyper2F)

(55, 275)

In [24]:
with open('ProcessedHypergraphs/Chemical/ReactionTemplate2F.pkl', 'wb') as f:
    pickle.dump(hyper2F, f)

#### 3-F hypergraph

In [25]:
hyper3F = filter_NF(RT_data, 3)
len(standard_network(hyper3F).nodes), len(hyper3F)

Error: A strongly connected hypergraph can't be constructed from this dataset with a restriction to 3 input nodes. Try with a different amount.


AttributeError: 'NoneType' object has no attribute 'values'

## Metabolical reactions

### BiGG

In [27]:
folder = 'ParsedHyperedges/Metabolical-BiGG/'
for file in os.listdir(folder):
    
    print(file)
    
    with open(folder + file, 'rb') as f:
        BiGG_data = pickle.load(f)
    
    ## 3F
    
    hyper2F = filter_NF(BiGG_data, 2, verbose_err=False)
    
    if hyper2F:
        twoF = 'OK'
        with open(f'ProcessedHypergraphs/Metabolical-BiGG/{file.split(".pkl")[0]}-2F.pkl', 'wb') as f:
            pickle.dump(hyper2F, f)
    else:
        twoF = 'Fail'
    
            
    ## 3F
    
    hyper3F = filter_NF(BiGG_data, 3, verbose_err=False)            
    
    if hyper3F:
        threeF = 'OK'
        with open(f'ProcessedHypergraphs/Metabolical-BiGG/{file.split(".pkl")[0]}-3F.pkl', 'wb') as f:
            pickle.dump(hyper3F, f)
    else:
        threeF = 'Fail'
        
    print('--- 2:', twoF, '--- 3:', threeF)
        

iECBD_1354.pkl
--- 2: OK --- 3: Fail
iYL1228.pkl
--- 2: OK --- 3: Fail
iNF517.pkl
--- 2: Fail --- 3: Fail
iETEC_1333.pkl
--- 2: OK --- 3: Fail
iEC1368_DH5a.pkl
--- 2: OK --- 3: Fail
iECO26_1355.pkl
--- 2: OK --- 3: Fail
iAB_RBC_283.pkl
--- 2: OK --- 3: Fail
iAF1260b.pkl
--- 2: OK --- 3: Fail
iPC815.pkl
--- 2: Fail --- 3: Fail
iEcolC_1368.pkl
--- 2: OK --- 3: Fail
iECO103_1326.pkl
--- 2: OK --- 3: Fail
iSSON_1240.pkl
--- 2: OK --- 3: Fail
iRC1080.pkl
--- 2: OK --- 3: Fail
iJB785.pkl
--- 2: Fail --- 3: Fail
iCHOv1_DG44.pkl
--- 2: Fail --- 3: Fail
iECUMN_1333.pkl
--- 2: OK --- 3: Fail
iECDH1ME8569_1439.pkl
--- 2: OK --- 3: Fail
iECABU_c1320.pkl
--- 2: OK --- 3: Fail
iAPECO1_1312.pkl
--- 2: OK --- 3: Fail
iG2583_1286.pkl
--- 2: OK --- 3: Fail
iSynCJ816.pkl
--- 2: Fail --- 3: Fail
iECOK1_1307.pkl
--- 2: OK --- 3: Fail
iJN678.pkl
--- 2: OK --- 3: Fail
iNJ661.pkl
--- 2: OK --- 3: OK
iECSF_1327.pkl
--- 2: OK --- 3: Fail
iECs_1301.pkl
--- 2: OK --- 3: Fail
iAT_PLT_636.pkl
--- 2: Fail --- 3: Fai