In [1]:
import numpy as np
import networkx as nx
import xgi
import json
from tqdm import tqdm
from os import listdir
from scipy.spatial import distance
from multiprocess import Pool

In [2]:
from hypergraphs_distances import *
from hypergraphs_null_models import *

# Projection-preserving null models

The aim is to measure the h-or dissimilarity between the original hypergraph (H) and a randomized
version of it (H_null). The randomization preserves the underlying projected networks, so that a standard
distance between two graphs (e.g. NetSimile) would return a distance 0 between H and H_null.
Here I choose one dataset and consider two type of projection-preserving null models. I try this approach with both measures: Hyper NetSimile (HNS) and Hyperedge Portrait Divergence (HPD).

In [6]:
with open('../data/SocioPatterns/aggr_15min_cliques_thr1_LH10.json') as file: 
            data = json.load(file)
H = xgi.from_hyperedge_list(data)
# relabel and remove eventual multiple edges
H.cleanup(isolates=True, singletons=True, connected=False)
tag = 'SocioPatterns_LH10'

## 1 - Random hyperedge projection (RHP)

Starting form the original hypergraph, I choose at random a fraction $f$ of hyperedges and project them down to a clique. Then I measure the distance between the original hypergraph and the progressively projected ones, as a function of $f$. I realize 10 curves of $d(H,H_{null}(f))$ as a function of $f$, and try with both measures: Hyper NetSimile and Edge-Portrait Divergence.

In [10]:
fv_data = feature_vec(H)   
hp_data = edge_portrait(H)
curves_HNS = []
curves_HPD = []
n_curves = 10
f_space = np.linspace(0., 1., 30, endpoint=False)[1:]  # exclude f=0, f=1

for i in tqdm(range(n_curves)):
    # if f=0, distances are 0
    curve_i_HNS = [0.]
    curve_i_HPD = [0.]

    Hs_null = [project_hedges(H, f, seed=i) for f in f_space]
    # parallelize computations
    p = Pool(processes=5)
    FVs_null = p.map(feature_vec, Hs_null)
    HPs_null = p.map(edge_portrait, Hs_null)
    
    curve_i_HNS += [ distance.canberra(fv_data, f_n) / len(fv_data) for f_n in FVs_null ]
    curve_i_HPD += [ hyper_portrait_divergence(hp_data, hp_n) for hp_n in HPs_null ] 
    curves_HNS.append(curve_i_HNS)
    curves_HPD.append(curve_i_HPD)

# re-add f=0 to have consistent results
f_space = np.linspace(0., 1., 30, endpoint=False)
results = (list(f_space), curves_HNS, curves_HPD)
    
# save results
with open(f"../results/null_rhp_HNS_HPD_{tag}.json", "w") as fp:
    json.dump(results, fp)

100%|██████████████████████████████████████████| 10/10 [19:01<00:00, 114.16s/it]


## 2 - Random clique promotion (RCP) of the projected graph

Randomization: First, I project the empirical hypergraph onto the corresponding pairwise network.
Then, I randomly promote a fraction $f$ of cliques to hyperedges, at every order that is present in the original hypergraph. $f$ is thus the number of promoted $d$-cliques over the number of $d$-hyperedges in the original hypergraph, and I take it constant across orders.
Note that hyperedges might overlap, because each d-clique always contain all the (d-1), (d-2), ..., (3) -cliques.

In [11]:
G = xgi.to_graph(H)
curves_HNS = []
curves_HPD = []
n_curves = 10
# number of h-edges of each size
s_count = [H.edges.size.aslist().count(i) for i in range(3, 1+xgi.unique_edge_sizes(H)[-1])]
f_space = np.linspace(0., 1.5, 41)[1:]

for i in tqdm(range(n_curves)):
    curve_i_HNS = [] 
    curve_i_HPD = []

    Hs_null = []
    for f in f_space:
        ns = [int(np.round(f*j)) for j in s_count]
        Hs_null.append( flag_hypergraph(G, ns, seed=i) )
    # parallelize computations
    p = Pool(processes=5)
    FVs_null = p.map(feature_vec, Hs_null)
    HPs_null = p.map(edge_portrait, Hs_null)
        
    curve_i_HNS = [ distance.canberra(fv_data, f_n) / len(fv_data) for f_n in FVs_null ]
    curve_i_HPD = [ hyper_portrait_divergence(hp_data, hp_n) for hp_n in HPs_null ]      
    curves_HNS.append(curve_i_HNS)
    curves_HPD.append(curve_i_HPD)

results = (list(f_space), curves_HNS, curves_HPD)
    
# save results
with open(f"../results/null_rcp_HNS_HPD_{tag}.json", "w") as fp:
    json.dump(results, fp)

100%|████████████████████████████████████████| 10/10 [1:22:03<00:00, 492.40s/it]
