Setup

In [1]:
import itertools
import numpy as np
from numpy.random import *
import graph_tool.all as gt

import time

from multiprocessing import Pool

import pandas as pd

import os

import pickle

n = 100000

p = 512 / 99999
#d = 512
d = (int) (p * (n-1))
k = (int) (d * 32/512)

#r_approx = 5420
r_approx = (int) ((n / 100000) * (pow(2, 16) * k / d))

#if n >= 10^5:
#    p = d / n
#else:
#    p = d / (n - 1)

print("N : {}".format(n))
print("D : {}".format(d))
print("K : {}".format(k))
print("R : {}".format(r_approx))



rng = np.random.default_rng(seed=42)


def add_fast_gnp_edges():
    num_edges = rng.binomial(n*(n-1)/2, p)
    sources = rng.integers(0, n, num_edges*2)
    targets = rng.integers(0, n, num_edges*2)
    mask = sources != targets # removes self-loops
    g.add_edge_list(np.column_stack((sources[mask], targets[mask])))

g = gt.Graph(directed=True)
g.add_vertex(n)
add_fast_gnp_edges()

g.vp["mode_f"] = g.new_vp("int")
g.vp["mode_f"].a = 0


global trackMinK
global trackMaxK
trackMinK = 100
trackMaxK = 0

global maxInterference
maxInterference = 0.0




def num_fired_neighbors(s_i):
    F = np.array(g.get_in_neighbors(s_i, [g.vp["mode_f"]])[:,1], dtype=int)
    return F.sum()

def update_graph_SJOIN(k):
    B = []
    V = g.get_vertices()

    with Pool() as pool:
        Ws = pool.map(num_fired_neighbors, V)

    for i in range(len(V)):

        w_i = Ws[i]

        if w_i >= k:
            B.append(V[i])
    return B


def SJOIN_one_step(A):
    global trackMinK
    global trackMaxK

    #n = 100000
    min_k = 15
    max_k = 45

    #n = 50000
    #min_k = 10
    #max_k = 24

    #n = 20000
    #min_k = 1
    #max_k = 15

    min_memory = None
    max_memory = None
    while True:
        mid_k = round((min_k+max_k)/2)
        for i in A:
            g.vp["mode_f"][i] = 1
        
        
        
        B = update_graph_SJOIN(mid_k)

        #print("({} - {}) {} : {}".format(min_k, max_k, mid_k, len(B)))


        g.vp["mode_f"].a = 0

        if len(B) > r_approx:
            min_k = mid_k+1
            max_memory = B
        else:
            max_k = mid_k-1
            min_memory = B

        if max_k<min_k:

            #print("{}, {}".format(len(min_memory), len(max_memory)))
            if len(max_memory)-r_approx > r_approx-len(min_memory):
                B = min_memory
                mid_k = max_k
            else:
                B = max_memory
                mid_k = min_k
            
            print("{} : {}".format(mid_k, len(B)))
            if mid_k > trackMaxK:
                trackMaxK = mid_k
            if mid_k < trackMinK:
                trackMinK = mid_k
            return B


def SJOIN_sequence(A, L):
    S = [A]
    for i in range(L-1):
        B = SJOIN_one_step(S[-1])
        S.append(B)
    return S

def SJOIN_interference_check(M, A_i, i):
    global maxInterference
    this_max = 0
    for B_i in range(i):
        A = M[A_i]
        B = M[B_i]
        intersect = len(set(A) & set(B))
        interference = intersect / len(A)
        
        if interference >= this_max:
            this_max = interference
        
    if this_max > maxInterference:
        maxInterference = this_max
    return this_max

def SJOIN_simulation(L, num):
    global maxInterference
    
    g.load("brain.graphml")

    state = np.load("rng.npy" ,allow_pickle='TRUE').item()
    rng.bit_generator.__setstate__(state)

    with open('M.pkl', 'rb') as f:
        M = pickle.load(f)

    df = None

    if os.path.isfile('output.xlsx'):
        print("File found")
        df = pd.read_excel('output.xlsx')
        maxInterference = df.loc[len(df.index)-1][0]
    else:
        print("Create new file")
        columns = ["Max Interference", "Interference"]
        for i in range(L):
            columns.append("Memory {}".format(i+1))
        df = pd.DataFrame(columns=columns)
        maxInterference = 0

    sequence_count = 1
    while True:
        print("Sequence {}".format(sequence_count))
        
        A = rng.choice(np.arange(0,n-1), size=r_approx, replace=False)
        S = SJOIN_sequence(A, L)
        M.extend(S)

        seq_max = SJOIN_interference_check(M, len(M)-len(S), len(M)-len(S))
        
        if seq_max > 0.5:
            df.to_excel('output.xlsx', index=False)
            return M
        for B_i in range(len(M)-len(S)+1, len(M)):
            inter = SJOIN_interference_check(M, B_i, B_i-1)
            if inter > seq_max:
                seq_max = inter
            if inter > 0.5:
                df.to_excel('output.xlsx', index=False)
                return M
        
        new_row = [maxInterference, seq_max]
        for s in S:
            new_row.append(len(s))

        df.loc[len(df.index)] = new_row

        if sequence_count >= num:
            df.to_excel('output.xlsx', index=False)
            g.save("brain.graphml")
            np.save("rng.npy", rng.bit_generator.state)
            with open('M.pkl', 'wb') as f:
                pickle.dump(M, f)
            return M
        
        sequence_count = sequence_count+1

def SJOIN_long_sequence(num):
    global maxInterference

    g.load("brain.graphml")

    state = np.load("rng.npy" ,allow_pickle='TRUE').item()
    rng.bit_generator.__setstate__(state)

    with open('M.pkl', 'rb') as f:
        M = pickle.load(f)

    df = None

    if os.path.isfile('output.xlsx'):
        print("File found")
        df = pd.read_excel('output.xlsx')
        maxInterference = df.loc[len(df.index)-1][0]
    else:
        print("Create new file")
        columns = ["Max Interference", "Interference", "Memory"]
        df = pd.DataFrame(columns=columns)
        maxInterference = 0

    if len(M) == 0:
        M.append(rng.choice(np.arange(0,n-1), size=r_approx, replace=False))
        new_row = [0, 0, r_approx]
        df.loc[len(df.index)] = new_row

    memory_count = 1
    while True:
        print("Memory {}".format(memory_count))

        A = M[-1]
        B = SJOIN_one_step(A)
        M.append(B)

        this_max = SJOIN_interference_check(M, len(M)-1, len(M)-2)

        if this_max > 0.5:
            df.to_excel('output.xlsx', index=False)
            return M
        
        new_row = [maxInterference, this_max, len(B)]
        df.loc[len(df.index)] = new_row

        if memory_count >= num:
            df.to_excel('output.xlsx', index=False)
            g.save("brain.graphml")
            np.save("rng.npy", rng.bit_generator.state)
            with open('M.pkl', 'wb') as f:
                pickle.dump(M, f)
            return M
        
        memory_count = memory_count + 1
        


    


N : 100000
D : 512
K : 32
R : 4096


Create New Graph

In [38]:
rng = np.random.default_rng(seed=42)

g = gt.Graph(directed=True)
g.add_vertex(n)
add_fast_gnp_edges()

g.vp["mode_f"] = g.new_vp("int")
g.vp["mode_f"].a = 0


M = []


g.save("brain.graphml")

np.save("rng.npy", rng.bit_generator.state)

with open('M.pkl', 'wb') as f:
    pickle.dump(M, f)

Execution

In [3]:
global trackMinK
global trackMaxK


#M = SJOIN_simulation(5, 50)
M = SJOIN_long_sequence(1000)
print("{} : {}".format(trackMinK, trackMaxK))

File found
Memory 1


  maxInterference = df.loc[len(df.index)-1][0]


27 : 3741
Memory 2
27 : 3438
Memory 3
25 : 3599
Memory 4
26 : 3610
Memory 5
26 : 3688
Memory 6
28 : 4596
Memory 7
32 : 3877
Memory 8
29 : 4803
Memory 9
35 : 4208
Memory 10
31 : 4823
Memory 11
35 : 4183
Memory 12
31 : 4683
Memory 13
34 : 4622
Memory 14
32 : 3915
Memory 15
28 : 3578
Memory 16
26 : 3338
Memory 17
26 : 4232
Memory 18
30 : 3503
Memory 19
27 : 4334
Memory 20
32 : 4464
Memory 21
33 : 4135
Memory 22
31 : 4104
Memory 23
29 : 3789
Memory 24
27 : 3993
Memory 25
30 : 4176
Memory 26
31 : 4535
Memory 27
33 : 4759
Memory 28
33 : 3684
Memory 29
28 : 4410
Memory 30
31 : 3482
Memory 31
25 : 4024
Memory 32
30 : 4522
Memory 33
33 : 4699
Memory 34
34 : 4787
Memory 35
33 : 3915
Memory 36
28 : 3479
Memory 37
25 : 4008
Memory 38
30 : 4451
Memory 39
31 : 3981
Memory 40
30 : 4213
Memory 41
31 : 4888
Memory 42
34 : 3356
Memory 43
26 : 4476
Memory 44
33 : 4129
Memory 45
29 : 4058
Memory 46
29 : 3244
Memory 47
24 : 3223
Memory 48
25 : 4838
Memory 49
35 : 4391
Memory 50
31 : 3362
Memory 51
26 : 462