In [1]:
import pandas as pd
import numpy as np
import igraph as ig
import hypernetx as hnx
import hypernetx.algorithms.hypergraph_modularity as hmod 
import xgi ## pip install xgi is required
import pickle
from collections import Counter
from functools import reduce
import itertools
from scipy.special import comb
import warnings
import random
from itertools import combinations
from itertools import combinations as combs

## Functions to compute various simpliciality measures are found in the included ```simpliciality.py``` file:
import simpliciality as spl 
import sr ## functions taken from Jordan's notebook

## Set this to the data directory
datadir='../Datasets/'


# XGI's hypergraphs



In [2]:
## read the edges and build H
fp = open(datadir+'Contacts/hyperedges-contact-primary.txt', 'r')
Lines = fp.readlines()
E = []
for line in Lines:
    E.append(set([x for x in line.strip().split(',')]))
H = hnx.Hypergraph(dict(enumerate(E)))
V = [x for x in H.nodes]
print('SF:',spl.get_simplicial_fraction(V,E),
      '\nES:',spl.get_edit_simpliciality(V,E),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=False),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=True))
print('simplicial ratio:',spl.get_simplicial_ratio(V,E,samples=100, multisets=False))


SF: 0.8470540758676351 
ES: 0.9175213057922866 
FES: 0.95064455704635 
FES: 0.9372632240823691
simplicial ratio: 2.702143666167907


### get the data directly from XGI

* nodes, edges, avg size
* FES: first including self
* Simplicial ratio: first via sampling from CL model, second with new approx. code

SR (sampling from CL):

* disgenenet: 28.0821
* diseasome: 6.6147
* contact-high-school: 6.5836
* email-eu: 5.1355
* email-enron: 4.8800
* congress-bills: 4.4154
* ndc-substances: 4.0749
* contact-primary-school: 2.7043
* hospital-lyon: 0.9464
* tags-ask-ubuntu:0.6652


In [3]:
def compute_all(fn=""):
    H = xgi.load_xgi_data(fn)
    E = list(set([tuple(sorted(e)) for e in H.edges.members() if len(e)<=11 and len(e)>=2]))
    E = [set(e) for e in E]
    H = hnx.Hypergraph(dict(enumerate(E)))
    V = [x for x in H.nodes]
    print('\nResults for',fn,':')
    print('n =',len(V),'\nm =',len(E),'\n<s> =',np.mean([len(e) for e in E]))
    print('SF:',spl.get_simplicial_fraction(V,E),
          '\nES:',spl.get_edit_simpliciality(V,E),
          '\nFES with max face:',spl.get_face_edit_simpliciality(V,E,exclude_self=False),
          '\nFES excluding max face:',spl.get_face_edit_simpliciality(V,E,exclude_self=True))
    print('simplicial ratio with CL model:',spl.get_simplicial_ratio(V, E, samples=1000, multisets=False))
    r, _a, _b, _c, _d = sr.simplicial_ratio(V,E)
    print('simplicial ratio with estimation:',r)
    

In [4]:
Datasets = ["contact-primary-school", "contact-high-school", "hospital-lyon", 
            "email-enron", "email-eu", "diseasome", "disgenenet", 
            "ndc-substances", "congress-bills", "tags-ask-ubuntu"]

for fn in Datasets:
    compute_all(fn)


Results for contact-primary-school :
n = 242 
m = 12704 
<s> = 2.4188444584382873
SF: 0.8470540758676351 
ES: 0.9175213057922866 
FES with max face: 0.9506445570463496 
FES excluding max face: 0.9372632240823682
simplicial ratio with CL model: 2.701514125371669
simplicial ratio with estimation: 2.6949838943727484

Results for contact-high-school :
n = 327 
m = 7818 
<s> = 2.3269378357636223
SF: 0.8064655172413793 
ES: 0.9270722162931341 
FES with max face: 0.9393321276864207 
FES excluding max face: 0.9230435574995463
simplicial ratio with CL model: 6.563978124480862
simplicial ratio with estimation: 6.639574180926302

Results for hospital-lyon :
n = 75 
m = 1824 
<s> = 2.4270833333333335
SF: 0.9121338912133892 
ES: 0.9544740973312402 
FES with max face: 0.9774536866642135 
FES excluding max face: 0.9718947368421053
simplicial ratio with CL model: 0.9450000188472282
simplicial ratio with estimation: 0.9518087976050635

Results for email-enron :
n = 143 
m = 1442 
<s> = 2.9743411927877

In [5]:
## Sanity check -- naive FES (as this is a tiny graph)

H = xgi.load_xgi_data("diseasome")
E = list(set([tuple(sorted(e)) for e in H.edges.members() if len(e)<=11 and len(e)>=2]))
E = [set(e) for e in E]

max_E = spl.max_subsets(E)
fes = 0
for e in max_E:
    num=den=0
    for k in range(2,len(e)): ## excluding self
        for f in combs(e,k):
            den +=1
            if set(f) in E:
                num += 1 
    fes += num/den
fes/len(max_E)


0.04065214786039885