In [1]:
import pandas as pd
import numpy as np
import igraph as ig
import hypernetx as hnx
import hypernetx.algorithms.hypergraph_modularity as hmod 
import xgi ## pip install xgi is required
import pickle
from collections import Counter
from functools import reduce
import itertools
from scipy.special import comb
import warnings
import random
from itertools import combinations
from itertools import combinations as combs

## Functions to compute various simpliciality measures are found in the included ```simpliciality.py``` file:
import simpliciality as spl

## Set this to the data directory
datadir='../Datasets/'


# XGI's hypergraphs



In [2]:
## read the edges and build H
fp = open(datadir+'Contacts/hyperedges-contact-primary.txt', 'r')
Lines = fp.readlines()
E = []
for line in Lines:
    E.append(set([x for x in line.strip().split(',')]))
H = hnx.Hypergraph(dict(enumerate(E)))
V = [x for x in H.nodes]
print('SF:',spl.get_simplicial_fraction(V,E),
      '\nES:',spl.get_edit_simpliciality(V,E),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=False),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=True))
print('simplicial ratio:',spl.get_simplicial_ratio(V,E,samples=100, multisets=False))


SF: 0.8470540758676351 
ES: 0.9175213057922866 
FES: 0.95064455704635 
FES: 0.9372632240823691
simplicial ratio: 2.6995678758255437


### get the data directly from XGI

* nodes, edges, avg size
* FES: first including self
* Simplicial ratio: first via sampling from CL model, second with new approx. code

SR (sampling from CL):

* disgenenet: 28.0821
* diseasome: 6.6147
* contact-high-school: 6.5836
* email-eu: 5.1355
* email-enron: 4.8800
* congress-bills: 4.4154
* ndc-substances: 4.0749
* contact-primary-school: 2.7043
* hospital-lyon: 0.9464
* tags-ask-ubuntu:0.6652


In [14]:
import sr


12704

In [18]:
## T1: SR = 1.89
H = xgi.load_xgi_data("contact-primary-school")
E = list(set([tuple(sorted(e)) for e in H.edges.members() if len(e)<=11 and len(e)>=2]))
E = [set(e) for e in E]
H = hnx.Hypergraph(dict(enumerate(E)))
V = [x for x in H.nodes]
print(len(V),len(E),np.mean([len(e) for e in E]))
print('SF:',spl.get_simplicial_fraction(V,E),
      '\nES:',spl.get_edit_simpliciality(V,E),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=False),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=True))
print('simplicial ratio:',spl.get_simplicial_ratio(V, E, samples=100, multisets=False))
r, _a, _b, _c, _d = sr.simplicial_ratio(V,E)
print('simplicial ratio:',r)


242 12704 2.4188444584382873
SF: 0.8470540758676351 
ES: 0.9175213057922866 
FES: 0.9506445570463492 
FES: 0.9372632240823682
simplicial ratio: 2.6970867999420594
simplicial ratio: 2.6758499268882394


In [19]:
## T1: SR = 6.16
H = xgi.load_xgi_data("contact-high-school")
E = list(set([tuple(sorted(e)) for e in H.edges.members() if len(e)<=11 and len(e)>=2]))
E = [set(e) for e in E]
H = hnx.Hypergraph(dict(enumerate(E)))
V = [x for x in H.nodes]
print(len(V),len(E),np.mean([len(e) for e in E]))
print('SF:',spl.get_simplicial_fraction(V,E),
      '\nES:',spl.get_edit_simpliciality(V,E),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=False),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=True))
print('simplicial ratio:',spl.get_simplicial_ratio(V, E, samples=100, multisets=False))
r, _a, _b, _c, _d = sr.simplicial_ratio(V,E)
print('simplicial ratio:',r)


327 7818 2.3269378357636223
SF: 0.8064655172413793 
ES: 0.9270722162931341 
FES: 0.9393321276864206 
FES: 0.9230435574995465
simplicial ratio: 6.520150730253598
simplicial ratio: 6.7325350528869805


In [57]:
## T1: SR = 0.97
H = xgi.load_xgi_data("hospital-lyon")
E = list(set([tuple(sorted(e)) for e in H.edges.members() if len(e)<=11 and len(e)>=2]))
E = [set(e) for e in E]
H = hnx.Hypergraph(dict(enumerate(E)))
V = [x for x in H.nodes]
print(len(V),len(E),np.mean([len(e) for e in E]))
print('SF:',spl.get_simplicial_fraction(V,E),
      '\nES:',spl.get_edit_simpliciality(V,E),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=False),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=True))
print('simplicial ratio:',spl.get_simplicial_ratio(V, E, samples=100, multisets=False))
r, _a, _b, _c, _d = sr.simplicial_ratio(V,E)
print('simplicial ratio:',r)


75 1824 2.4270833333333335
SF: 0.9121338912133892 
ES: 0.9544740973312402 
FES: 0.9774536866642132 
FES: 0.9718947368421053
simplicial ratio: 0.9452173585190211
simplicial ratio: 0.9558316153217449


In [91]:
H = xgi.load_xgi_data("hospital-lyon")
L = [tuple(sorted(e)) for e in H.edges.members()]
seen = set()
uniq = []
for x in L:
    if x not in seen:
        if len(x)<=11 and len(x)>=2:
            uniq.append(x)
        seen.add(x)
E = [set(e) for e in uniq]


In [92]:
H = hnx.Hypergraph(dict(enumerate(E)))
V = [x for x in H.nodes]
print(len(V),len(E),np.mean([len(e) for e in E]))
print('SF:',spl.get_simplicial_fraction(V,E),
      '\nES:',spl.get_edit_simpliciality(V,E),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=False),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=True))
print('simplicial ratio:',spl.get_simplicial_ratio(V, E, samples=100, multisets=False))
r, _a, _b, _c, _d = sr.simplicial_ratio(V,E)
print('simplicial ratio:',r)


75 1824 2.4270833333333335
SF: 0.9121338912133892 
ES: 0.9544740973312402 
FES: 0.9774536866642127 
FES: 0.9718947368421051
simplicial ratio: 0.9493371301769546
simplicial ratio: 0.9389181525560918


In [93]:
_a

1.710801005534993

In [94]:
_b

0.16703529957719065

In [23]:
## T1: SR = 5.20
H = xgi.load_xgi_data("email-enron")
E = list(set([tuple(sorted(e)) for e in H.edges.members() if len(e)<=11 and len(e)>=2]))
E = [set(e) for e in E]
H = hnx.Hypergraph(dict(enumerate(E)))
V = [x for x in H.nodes]
print(len(V),len(E),np.mean([len(e) for e in E]))
print('SF:',spl.get_simplicial_fraction(V,E),
      '\nES:',spl.get_edit_simpliciality(V,E),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=False),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=True))
print('simplicial ratio:',spl.get_simplicial_ratio(V, E, samples=100, multisets=False))
r, _a, _b, _c, _d = sr.simplicial_ratio(V,E)
print('simplicial ratio:',r)


143 1442 2.9743411927877945
SF: 0.3127962085308057 
ES: 0.052244483895511035 
FES: 0.5338142787783793 
FES: 0.4982287869950806
simplicial ratio: 4.830359005480855
simplicial ratio: 4.932440807313028


In [95]:
H = xgi.load_xgi_data("email-enron")
L = [tuple(sorted(e)) for e in H.edges.members()]
seen = set()
uniq = []
for x in L:
    if x not in seen:
        if len(x)<=11 and len(x)>=2:
            uniq.append(x)
        seen.add(x)
E = [set(e) for e in uniq]


In [96]:
H = hnx.Hypergraph(dict(enumerate(E)))
V = [x for x in H.nodes]
print(len(V),len(E),np.mean([len(e) for e in E]))
print('SF:',spl.get_simplicial_fraction(V,E),
      '\nES:',spl.get_edit_simpliciality(V,E),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=False),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=True))
print('simplicial ratio:',spl.get_simplicial_ratio(V, E, samples=100, multisets=False))
r, _a, _b, _c, _d = sr.simplicial_ratio(V,E)
print('simplicial ratio:',r)


143 1442 2.9743411927877945
SF: 0.3127962085308057 
ES: 0.052244483895511035 
FES: 0.5338142787783796 
FES: 0.49822878699508083
simplicial ratio: 4.804663183112939
simplicial ratio: 4.939131954763358


In [97]:
_a

5.959148997639497

In [98]:
_b

3.919114911887219

In [24]:
## T1: SR = 4.74
H = xgi.load_xgi_data("email-eu")
E = list(set([tuple(sorted(e)) for e in H.edges.members() if len(e)<=11 and len(e)>=2]))
E = [set(e) for e in E]
H = hnx.Hypergraph(dict(enumerate(E)))
V = [x for x in H.nodes]
print(len(V),len(E),np.mean([len(e) for e in E]))
print('SF:',spl.get_simplicial_fraction(V,E),
      '\nES:',spl.get_edit_simpliciality(V,E),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=False),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=True))
print('simplicial ratio:',spl.get_simplicial_ratio(V, E, samples=100, multisets=False))
r, _a, _b, _c, _d = sr.simplicial_ratio(V,E)
print('simplicial ratio:',r)


967 23729 3.1225083231488897
SF: 0.3170553935860058 
ES: 0.04924735333611438 
FES: 0.5543567609816558 
FES: 0.5217400302065671
simplicial ratio: 5.129422939919259
simplicial ratio: 5.176825059870849


In [31]:
## T1: SR = 6.46
H = xgi.load_xgi_data("diseasome")
E = list(set([tuple(sorted(e)) for e in H.edges.members() if len(e)<=11 and len(e)>=2]))
E = [set(e) for e in E]
H = hnx.Hypergraph(dict(enumerate(E)))
V = [x for x in H.nodes]
print(len(V),len(E),np.mean([len(e) for e in E]))
print('SF:',spl.get_simplicial_fraction(V,E),
      '\nES:',spl.get_edit_simpliciality(V,E),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=False),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=True))
print('simplicial ratio:',spl.get_simplicial_ratio(V, E, samples=100, multisets=False))
r, _a, _b, _c, _d = sr.simplicial_ratio(V,E)
print('simplicial ratio:',r)


516 314 3.0
SF: 0.0 
ES: 0.04538228067639832 
FES: 0.19501913364541054 
FES: 0.04065214786039885
simplicial ratio: 6.181818181818182
simplicial ratio: 6.481145743429135


In [32]:
## Sanity check -- naive FES (as this is a tiny graph)
max_E = spl.max_subsets(E)
fes = 0
for e in max_E:
    num=den=0
    for k in range(2,len(e)): ## excluding self
        for f in combs(e,k):
            den +=1
            if set(f) in E:
                num += 1 
    fes += num/den
fes/len(max_E)

0.04065214786039885

In [26]:
## T1: SR = 15.99
H = xgi.load_xgi_data("disgenenet")
E = list(set([tuple(sorted(e)) for e in H.edges.members() if len(e)<=11 and len(e)>=2]))
E = [set(e) for e in E]
H = hnx.Hypergraph(dict(enumerate(E)))
V = [x for x in H.nodes]
print(len(V),len(E),np.mean([len(e) for e in E]))
print('SF:',spl.get_simplicial_fraction(V,E),
      '\nES:',spl.get_edit_simpliciality(V,E),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=False),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=True))
print('simplicial ratio:',spl.get_simplicial_ratio(V, E, samples=100, multisets=False))
r, _a, _b, _c, _d = sr.simplicial_ratio(V,E)
print('simplicial ratio:',r)


1982 760 5.1381578947368425
SF: 0.0 
ES: 0.004779934338796714 
FES: 0.08263170659682112 
FES: 0.007864665990617562
simplicial ratio: 29.496402877697843
simplicial ratio: 28.484757297380565


In [27]:
## T1: SR = 10.30
H = xgi.load_xgi_data("ndc-substances")
E = list(set([tuple(sorted(e)) for e in H.edges.members() if len(e)<=11 and len(e)>=2]))
E = [set(e) for e in E]
H = hnx.Hypergraph(dict(enumerate(E)))
V = [x for x in H.nodes]
print(len(V),len(E),np.mean([len(e) for e in E]))
print('SF:',spl.get_simplicial_fraction(V,E),
      '\nES:',spl.get_edit_simpliciality(V,E),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=False),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=True))
print('simplicial ratio:',spl.get_simplicial_ratio(V, E, samples=100, multisets=False))
r, _a, _b, _c, _d = sr.simplicial_ratio(V,E)
print('simplicial ratio:',r)


2740 4754 5.155658392932268
SF: 0.020143487858719646 
ES: 0.005277033139634914 
FES: 0.11846931111425671 
FES: 0.07047994455353011
simplicial ratio: 4.08664205812363
simplicial ratio: 4.181305339903698


In [28]:
## T1: SR = 4.88
H = xgi.load_xgi_data("congress-bills")
E = list(set([tuple(sorted(e)) for e in H.edges.members() if len(e)<=11 and len(e)>=2]))
E = [set(e) for e in E]
H = hnx.Hypergraph(dict(enumerate(E)))
V = [x for x in H.nodes]
print(len(V),len(E),np.mean([len(e) for e in E]))
print('SF:',spl.get_simplicial_fraction(V,E),
      '\nES:',spl.get_edit_simpliciality(V,E),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=False),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=True))
print('simplicial ratio:',spl.get_simplicial_ratio(V, E, samples=100, multisets=False))
r, _a, _b, _c, _d = sr.simplicial_ratio(V,E)
print('simplicial ratio:',r)


1715 58788 4.947659386269307
SF: 0.02909811429970835 
ES: 0.005814213391956787 
FES: 0.14448924148307088 
FES: 0.10059626292667515
simplicial ratio: 4.405767626065907
simplicial ratio: 4.424199827071782


In [13]:
## T1: SR = .69
H = xgi.load_xgi_data("tags-ask-ubuntu")
E = list(set([tuple(sorted(e)) for e in H.edges.members() if len(e)<=11 and len(e)>=2]))
E = [set(e) for e in E]
H = hnx.Hypergraph(dict(enumerate(E)))
V = [x for x in H.nodes]
print(len(V),len(E),np.mean([len(e) for e in E]))
print('SF:',spl.get_simplicial_fraction(V,E),
      '\nES:',spl.get_edit_simpliciality(V,E),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=False),
      '\nFES:',spl.get_face_edit_simpliciality(V,E,exclude_self=True))
print('simplicial ratio:',spl.get_simplicial_ratio(V, E, samples=100, multisets=False))

3021 145053 3.42722315291652
SF: 0.15149467561903948 
ES: 0.24870932730705064 
FES: 0.5251377594076132 
FES: 0.4576852328034244
simplicial ratio: 0.6652540821821216


In [30]:
## T1: SR = .69
H = xgi.load_xgi_data("tags-ask-ubuntu")
E = list(set([tuple(sorted(e)) for e in H.edges.members() if len(e)<=11 and len(e)>=2]))
E = [set(e) for e in E]
H = hnx.Hypergraph(dict(enumerate(E)))
V = [x for x in H.nodes]
print(len(V),len(E),np.mean([len(e) for e in E]))
r, _a, _b, _c, _d = sr.simplicial_ratio(V,E)
print('simplicial ratio:',r)


3021 145053 3.42722315291652
simplicial ratio: 0.6952309635163351
