In [None]:
import sys; sys.path.append('..')

In [None]:
import pickle
from collections import defaultdict
from itertools import accumulate
from pathlib import Path
import numpy as np
from tqdm import tqdm
from util import ds_names

In [None]:
p_raw_data = Path('../data/raw_data')
p_incident = Path('../data/incident')
p_incident.mkdir(exist_ok=True)

for ds_name in ds_names:
    p_raw_data_ds = p_raw_data / ds_name
    p_incident_ds = p_incident / ds_name
    p_incident_ds.mkdir(exist_ok=True)

    nverts = []
    with (p_raw_data_ds / '{}-nverts.txt'.format(ds_name)).open() as f:
        while d := f.readline():
            nverts.append(int(d))
    m = len(nverts)

    simplices = []
    with (p_raw_data_ds / '{}-simplices.txt'.format(ds_name)).open() as f:
        while d := f.readline():
            simplices.append(int(d))

    segments = [0] + list(accumulate(nverts))
    edges = set()
    i2edges = dict()
    v2edges = defaultdict(set)

    for i in tqdm(range(m)):
        e = tuple(sorted(set(simplices[segments[i]:segments[i + 1]])))
        if len(e) < 2 or e in edges:
            continue
        edges.add(e)
        for v in e:
            v2edges[v].add(len(i2edges))
        i2edges[len(i2edges)] = set(e)
    degrees = [len(E_v) for E_v in v2edges.values()]
    sizes = [len(e) for e in i2edges.values()]
    with (p_incident_ds / 'i2edges.pkl').open('wb') as f:
        pickle.dump(i2edges, f)
    with (p_incident_ds / 'v2edges.pkl').open('wb') as f:
        pickle.dump(v2edges, f)
    print('Dataset {ds}, |V| = {n}, |E| = {m},'.format(ds=ds_name, n=len(v2edges), m=len(i2edges)) +
          ' max./avg. d(v) = {m} / {a:.2f},'.format(a=np.mean(degrees), m=max(degrees)) +
          ' max./avg. |e| = {m} / {a:.2f}'.format(a=np.mean(sizes), m=max(sizes)))