In [1]:
import os
import ogb
import json
import networkx as nx
import pandas as pd
from ogb.linkproppred import PygLinkPropPredDataset

from pact.spasmspace import SpasmSpace
from pact.graphwrapper import GraphWrapper
from pact.ui import default_progressbar
from pact.naive_exec import naive_pandas_plan_exec, _undir_df_degree_thres, sliced_pandas_homcount
import dill
import multiprocess as mp

import random

MAX_THREADS = 20
OUTPUT_FILE = f'collab/clique5_counts.json'
_SPARSIFY = None

# provide path to file for K4 counts on collab
CLIQUE4_COUNTS = 'counts/collab/collab_clique4_counts.json'


# Setup Data

In [2]:
dataset = PygLinkPropPredDataset(name = "ogbl-collab", root = 'dataset/')
data = dataset[0]

In [3]:
edges = [[x.item() for x in e] for e in dataset.get_edge_split()['train']['edge']]
G = nx.from_edgelist(edges)
print(G.order(), G.size())

def sparsify(sz):
    global G
    subg = random.sample(list(G.nodes), sz)
    G = nx.induced_subgraph(G, subg)
    print(G.order(), G.size())
    
if _SPARSIFY is not None:
    sparsify(_SPARSIFY)

235868 967632


In [4]:

with open(CLIQUE4_COUNTS, 'r') as f:
    allcounts = json.loads(f.read())

ink4 = {int(v) : sum(k4count)/24 for v,k4count in allcounts.items() if sum(k4count) > 0}

K4_vertices_set = set(ink4.keys())

In [5]:
K4_vertices = list(K4_vertices_set)
neighbors = dict()
for v in K4_vertices:
    neighbors[v] = set.intersection(K4_vertices_set, set(G.neighbors(v)))

In [6]:
pool = mp.Pool(MAX_THREADS)

def count_k5s_from(v1):
    acc = 0
    N1 = neighbors[v1]
    for v2 in N1:
        N2 = set.intersection(N1, neighbors[v2])
        for v3 in N2:
            N3 = set.intersection(N2, neighbors[v3])
            for v4 in N3:
                N4 = set.intersection(N3, neighbors[v4])
                for v5 in N4:
                    acc += 1
    return v1, acc

with default_progressbar() as progress:
    K5s = {}
    allvs = K4_vertices
    track = progress.track(pool.imap_unordered(count_k5s_from, allvs), total=len(allvs))
    for v, count in track:
        K5s[v] = [count * 4*3*2] 
        # we multiply by 4! to get the actual homomorphism count instead of the anchored subgraph count
#K5s

Output()

In [10]:
with open(OUTPUT_FILE, 'w') as f:
    f.write(json.dumps(K5s))