In [1]:
import pandas as pd
import numpy as np
import igraph as ig
import hypernetx as hnx
import hypernetx.algorithms.hypergraph_modularity as hmod 
import xgi ## pip install xgi is required
import pickle
from collections import Counter
from functools import reduce
import itertools
from scipy.special import comb
import warnings
import random
from itertools import combinations
from itertools import combinations as combs

## Functions to compute various simpliciality measures are found in the included ```simpliciality.py``` file:
import simpliciality as spl 
import sr ## functions taken from Jordan's notebook

## Set this to the data directory
datadir='../Datasets/'


# XGI's hypergraphs



In [2]:
def compute_all(fn=""):
    H = xgi.load_xgi_data(fn)
    E = list(set([tuple(sorted(e)) for e in H.edges.members() if len(e)<=11 and len(e)>=2]))
    E = [set(e) for e in E]
    H = hnx.Hypergraph(dict(enumerate(E)))
    V = [x for x in H.nodes]
    print('\nResults for',fn,':')
    print('n =',len(V),'\nm =',len(E),'\n<s> =',np.mean([len(e) for e in E]))
    print('SF:',spl.get_simplicial_fraction(V,E),
          '\nES:',spl.get_edit_simpliciality(V,E),
          '\nFES with max face:',spl.get_face_edit_simpliciality(V,E,exclude_self=False),
          '\nFES excluding max face:',spl.get_face_edit_simpliciality(V,E,exclude_self=True))
    print('simplicial ratio with CL model:',spl.get_simplicial_ratio(V, E, samples=1000, multisets=False))
    r, _a, _b, _c, _d = sr.simplicial_ratio(V,E)
    print('simplicial ratio with estimation:',r)
    

In [3]:
def compute_all_time(fn):

    ## load dataset
    H = xgi.load_xgi_data(fn)

    ## check if edges are time-ordered
    TS = [H.edges[e]['timestamp'] for e in H.edges]
    order = [i for i in np.argsort(TS)]
    edges = [e for e in H.edges]
    M = H.edges.members()
    #[(tuple(sorted(M[i])),H.edges[edges[i]]['timestamp']) for i in order]
    L = [tuple(sorted(M[i])) for i in order]

    ## keep only first instance for each edge
    seen = set()
    uniq = []
    for x in L:
        if x not in seen:
            if len(x)<=11 and len(x)>=2:
                uniq.append(x)
            seen.add(x)
    E = [set(e) for e in uniq]

    ## compute SR's
    H = hnx.Hypergraph(dict(enumerate(E)))
    V = [x for x in H.nodes]
    print(fn)
    print('simplicial ratio (CL):',spl.get_simplicial_ratio(V, E, samples=1000, multisets=False))
    r, _a, _b, _c, _d = sr.simplicial_ratio(V,E)
    print('simplicial ratios (estimate):',r,_a,_b)


In [4]:
Datasets = ["contact-primary-school", "contact-high-school", "hospital-lyon", 
            "email-enron", "email-eu",
            "congress-bills", "tags-ask-ubuntu"]

for fn in Datasets:
    compute_all_time(fn)

contact-primary-school
simplicial ratio (CL): 2.70086866632209
simplicial ratios (estimate): 2.7030980148686212 4.751071466099104 0.6551245636381386
contact-high-school
simplicial ratio (CL): 6.567289078967062
simplicial ratios (estimate): 6.45820197874982 10.821676143692093 2.0947278138075465
hospital-lyon
simplicial ratio (CL): 0.9464899080088828
simplicial ratios (estimate): 0.9418881726145176 1.7162126735154035 0.1675636717136318
email-enron
simplicial ratio (CL): 4.83482899188975
simplicial ratios (estimate): 4.927383667009127 6.93161196658567 2.923155367432584
email-eu
simplicial ratio (CL): 5.1291175372134274
simplicial ratios (estimate): 5.2560158151659815 7.871857019473126 2.640174610858838
congress-bills
simplicial ratio (CL): 4.408670406196367
simplicial ratios (estimate): 4.439908949310961 5.2076346409214676 3.672183257700455
tags-ask-ubuntu
simplicial ratio (CL): 0.6659422713576917
simplicial ratios (estimate): 0.7215811481609853 1.1375823416893094 0.305579954632661
