Allow libraries to change without having to restart kernel

In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import pickle as pk
import networkx as nx
import numpy as np
import hypergraphx as hgx
from tqdm import tqdm
from utils_preprocessing import *

import os

In [None]:
ROOT = 'put_your_personal_root_here/{ds}'

## Creating Temporal Higher-Order Networks

### Baboons

In [None]:
ds = 'babboons'
root = ROOT.format(ds=ds)

create_babboons_TN(root=root)

### Congress Bills

In [None]:
ds = "congress-bills"
root = ROOT.format(ds=ds)
create_congress_bills_TN(root=root)

### Copenhagen

In [None]:
ds = "Copenhagen"
create_Copenhagen_TN(root=ROOT.format(ds=ds))

### Friends & Family

In [None]:
ds = 'FnF'
root = ROOT.format(ds=ds)
create_FnF_TN(root=root)

### High Schools (HS11, HS12)

In [None]:
ds = 'HS11'
root = ROOT.format(ds=ds)
create_HS1X_TN(ds=ds, root=root, bin_size=200)

ds = 'HS12'
root = ROOT.format(ds=ds)
create_HS1X_TN(ds=ds, root=root, bin_size=200)


In [None]:
ds = 'Malawi'
root = ROOT.format(ds=ds)
bin_size = 1000

create_malawi_TN(root=root, bin_size=bin_size)

### Kenyan

In [None]:
ds = "Kenyan"
root = ROOT.format(ds)
create_Kenyan_TN(root=root, ds=ds)


### InVS13 & InVS15 (Workplaces, Sociopatterns)

In [None]:
ds = "InVS13"
root = ROOT.format(ds)
create_InVS_TN(ds=ds, root=root)

ds = "InVS15"
root = ROOT.format(ds)
create_InVS_TN(ds=ds, root=root)

### Sociopatterns (SFHH & LH10 & Thiers13)

In [None]:
for ds in ['SFHH', 'LyonSchool', 'Thiers13']:
    print(f"Dataset = {ds}")
    root = ROOT.format(ds=ds)
    create_Sociopatterns_TN(root=root, ds=ds)


### Utah Schools (Elem1 & Mid1)

In [None]:
ds = 'Mid1'
root = ROOT.format(ds=ds)
create_Utah_TN(root=root, ds=ds)

In [None]:
ds = 'Elem1'
root = ROOT.format(ds=ds)
create_Utah_TN(root=root, ds=ds)

## Creating Static Networks

Now discard all temporal information and make a static graph for each dataset

In [None]:
root = ROOT
DATASETS = sorted(os.listdir(root))
DATASETS = sorted([el for el in DATASETS if '.' not in el and 'ptho' not in el and 'non-temporal' not in el and '__' not in el])
print(len(DATASETS))
print(DATASETS)

In [None]:
for i, dataset in enumerate(DATASETS):
    if not os.path.isfile(f"{root}/{dataset}/processed/STATIC_{dataset}.pck"):
        fname = f'{root}/{dataset}/TS_{dataset}.pck'
        TN = pk.load(open(fname, 'rb'))
        elist = set([frozenset(e) for h in TN.values()  for e in h.get_edges()])
        elist = [list(e) for e in elist]
        sorted(elist, key=len, reverse=True)
        H = hgx.Hypergraph(edge_list=elist)
        # print(H)

        fname = f"{root}/{dataset}/processed/STATIC_{dataset}.pck"
        pk.dump(H, open(fname, 'wb'), protocol=-1)
        print(f"{i+1}. Saved for {dataset}")
    else:
        print(f"{i+1}. Already saved for {dataset}")