# Hypergraph processing and filtering

In [1]:
import pickle
from copy import copy
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from itertools import product, permutations
from collections import defaultdict

from useful_functions import *

In [2]:
def filter_NF(dictionary, B, verbose=False):
    
    prev_dictionary = copy(dictionary)
    
    # Filtering: strongly connected component
    G = standard_network(prev_dictionary)
    if verbose:
        print(len(G.nodes), len(G.edges))
    
    largest = max(nx.strongly_connected_components(G), key=len)
    not_in_largest = set(G.nodes).difference(largest)
    
    if verbose:
        print("Nodes originally in strongly connected projection", len(largest))
    
    strong_conn = nx.is_strongly_connected(G)
    while not strong_conn:

        # Keep only LCC
        next_dictionary = {}
        for key, val in prev_dictionary.items():

            include = True
            for node in val[0] + val[1]:
                if node in not_in_largest:
                    include = False
                    break

            if len(val[0]) == B and include:
                next_dictionary[key] = val
                
        prev_dictionary = copy(next_dictionary)
        
        # Filtering with the new subhypergraph
        G = standard_network(prev_dictionary)
        if verbose:
            print(len(G.nodes), len(G.edges))
        
        try: 
            largest = max(nx.strongly_connected_components(G), key=len)
            not_in_largest = set(G.nodes).difference(largest)
        except ValueError:
            print(f"Error: A strongly connected hypergraph can't be constructed from this dataset with a restriction to {B} input nodes. Try with a different amount.")
            return None
        
        strong_conn = nx.is_strongly_connected(G)
        
    return next_dictionary

## Astrochem

In [4]:
with open('ParsedHyperedges/astrochem.pkl', 'rb') as f:
    astrochem_data = pickle.load(f)

### 2-F hypergraph

In [5]:
hyper2F = filter_NF(astrochem_data, 2)
len(standard_network(hyper2F).nodes), len(hyper2F)

(439, 5123)

In [6]:
with open('ProcessedHypergraphs/astrochem2F.pkl', 'wb') as f:
    pickle.dump(hyper2F, f)

## Pathways

### PID

In [14]:
with open('ParsedHyperedges/pid.pkl', 'rb') as f:
    pid_data = pickle.load(f)

#### 2-F hypergraph

In [15]:
hyper2F = filter_NF(pid_data, 2)
len(standard_network(hyper2F).nodes), len(hyper2F)

(4, 2)

In [19]:
hyper2F

{'e992': (['SmallMolecule_21d1714b69eec47d13f1ee5f51c33ad7',
   'SmallMolecule_632b165007a1a2082e2b24e2a4ff0719'],
  ['SmallMolecule_e9bcb452fc7ee059dc684e53e596e6e6',
   'SmallMolecule_183b1f102d3dea4f168192a09d3d97e3']),
 'e1008': (['SmallMolecule_e9bcb452fc7ee059dc684e53e596e6e6',
   'SmallMolecule_183b1f102d3dea4f168192a09d3d97e3'],
  ['SmallMolecule_21d1714b69eec47d13f1ee5f51c33ad7',
   'SmallMolecule_632b165007a1a2082e2b24e2a4ff0719'])}

In [11]:
with open('ProcessedHypergraphs/pid2F.pkl', 'wb') as f:
    pickle.dump(hyper2F, f)

#### 3-F hypergraph

In [30]:
hyper3F = filter_NF(pid_data, 3, verbose=True)
len(standard_network(hyper3F).nodes), len(hyper3F)

9009 15858
Nodes originally in strongly connected projection 904
126 220
5 6
0 0
Error: A strongly connected hypergraph can't be constructed from this dataset with a restriction to 3 input nodes. Try with a different amount.


AttributeError: 'NoneType' object has no attribute 'values'

### Reactome

In [6]:
with open('ParsedHyperedges/reactome.pkl', 'rb') as f:
    reactome_data = pickle.load(f)

#### 2-F hypergraph

In [7]:
hyper2F = filter_NF(reactome_data, 2)
len(standard_network(hyper2F).nodes), len(hyper2F)

Error: A strongly connected hypergraph can't be constructed from this dataset with a restriction to 2 input nodes. Try with a different amount.


AttributeError: 'NoneType' object has no attribute 'values'

## KIDA

In [42]:
with open('ParsedHyperedges/KIDA_termolecular.pkl', 'rb') as f:
    KIDA_dataT = pickle.load(f)
    
with open('ParsedHyperedges/KIDA_unibimolecular.pkl', 'rb') as f:
    KIDA_dataU = pickle.load(f)
    
with open('ParsedHyperedges/KIDA_surface.pkl', 'rb') as f:
    KIDA_dataS = pickle.load(f)
    
with open('ParsedHyperedges/KIDA_uva.pkl', 'rb') as f:
    KIDA_dataV = pickle.load(f)

### 2-F hypergraph

In [37]:
hyper2F = filter_NF(KIDA_dataT, 2)
len(standard_network(hyper2F).nodes), len(hyper2F)

Error: A strongly connected hypergraph can't be constructed from this dataset with a restriction to 2 input nodes. Try with a different amount.


AttributeError: 'NoneType' object has no attribute 'values'

In [40]:
hyper2F = filter_NF(KIDA_dataU, 2)
len(standard_network(hyper2F).nodes), len(hyper2F)

(467, 7329)

In [41]:
with open('ProcessedHypergraphs/KIDA_unibimolecular2F.pkl', 'wb') as f:
    pickle.dump(hyper2F, f)

In [38]:
hyper2F = filter_NF(KIDA_dataS, 2)
len(standard_network(hyper2F).nodes), len(hyper2F)

(20, 37)

In [39]:
with open('ProcessedHypergraphs/KIDA_surface2F.pkl', 'wb') as f:
    pickle.dump(hyper2F, f)

In [43]:
hyper2F = filter_NF(KIDA_dataV, 2)
len(standard_network(hyper2F).nodes), len(hyper2F)

(489, 7275)

In [44]:
with open('ProcessedHypergraphs/KIDA_uva2F.pkl', 'wb') as f:
    pickle.dump(hyper2F, f)

## Reaction Template Generation

In [3]:
with open('ParsedHyperedges/ReactionTemplate.pkl', 'rb') as f:
    RT_data = pickle.load(f)

### 2-F hypergraph

In [4]:
hyper2F = filter_NF(RT_data, 2)
len(standard_network(hyper2F).nodes), len(hyper2F)

(55, 275)

In [5]:
with open('ProcessedHypergraphs/ReactionTemplate2F.pkl', 'wb') as f:
    pickle.dump(hyper2F, f)