In [3]:
import numpy as np
import scipy.sparse as sp
from numpy.random import *
from multiprocessing import Pool
import pandas as pd
import pickle
import time
import sys
import os
import math


global trackMinK
global trackMaxK
trackMinK = 100
trackMaxK = 0

global maxInterference
maxInterference = 0.0

global trackTime
trackTime = 0

class NeuroidalModel:
    def __init__(self, n, new_seed=42):
        self.n = n
        self.p = 512 / 99999
        self.d = (int) (self.p * (n-1))
        self.k = (int) (self.d * 32/512)
        self.r_approx = (int) ((n / 100000) * (pow(2, 16) * self.k / self.d))
        self.M = []
        self.rng = default_rng(seed=new_seed)
        self.g = self.create_sparse_gnp_graph().toarray()
        print(self.n)
        print(self.d)
        print(self.k)
        print(self.r_approx)
 
    def create_sparse_gnp_graph(self) -> sp.csr_matrix:
        
        num_edges = self.rng.binomial(self.n * (self.n - 1) // 2, self.p)
        sources = self.rng.integers(0, self.n, num_edges * 2)
        targets = self.rng.integers(0, self.n, num_edges * 2)
        mask = sources != targets
        data = np.ones_like(sources[mask])
        adj_matrix = sp.coo_matrix((data, (targets[mask], sources[mask])), (self.n, self.n), dtype='bool')
        adj_matrix = adj_matrix.tocsr()
 
        return adj_matrix
    

    def SJOIN_interference_check(self, A, i):
        global maxInterference
        this_max = 0
        for B_i in range(i):
            B = self.M[B_i]
            intersect = len(A & B)
            interference = intersect / len(A)
            
            if interference >= this_max:
                this_max = interference
            
        if this_max > maxInterference:
            maxInterference = this_max
        return this_max


    def next_memory(self, A, k):
        B = set()
        mode_f = np.zeros(self.n, dtype=int)

        for s_i in A:
            mask = self.g[s_i]
            mode_f[mask] = mode_f[mask]+1

        for i in range(self.n):
            if mode_f[i] >= k:
                B.add(i)

        return B
    
    def SJOIN_one_step(self, A):
        global trackMinK
        global trackMaxK

        #n = 100000
        min_k = 15
        max_k = 45

        min_memory = None
        max_memory = None
        while True:
            mid_k = round((min_k+max_k)/2)
            B = self.next_memory(A, mid_k)

            if len(B) > self.r_approx:
                min_k = mid_k+1
                max_memory = B
            else:
                max_k = mid_k-1
                min_memory = B

            if max_k<min_k:

                if len(max_memory)-self.r_approx > self.r_approx-len(min_memory):
                    B = min_memory
                    mid_k = min_k
                else:
                    B = max_memory
                    mid_k = max_k

                print("{} : {}".format(mid_k, len(B)))

                if mid_k > trackMaxK:
                    trackMaxK = mid_k
                if mid_k < trackMinK:
                    trackMinK = mid_k
                return B
            

    def SJOIN_one_step_v2(self, A):
        mode_f = np.zeros(self.n, dtype=int)
        for s_i in A:
            mask = self.g[s_i]
            mode_f[mask] = mode_f[mask]+1

        counts = np.zeros(self.k*2, dtype=int)
        for i in range(self.n):
            for j in range(mode_f[i]):
                counts[j+1] = counts[j+1]+1

        idx = np.searchsorted(-counts, -self.r_approx, side="left")
        if idx > 0 and (idx == len(counts) or math.fabs(self.r_approx - counts[idx-1]) < math.fabs(self.r_approx - counts[idx])):
            idx = idx-1
        
        B = set()
        for i in range(self.n):
            if mode_f[i] >= idx:
                B.add(i)

        return B
            
    
    def SJOIN_long_sequence(self, num):
        global maxInterference

        df = None

        if os.path.isfile('output.xlsx'):
            print("File found")
            df = pd.read_excel('output.xlsx')
            maxInterference = df.loc[len(df.index)-1][0]
        else:
            print("Create new file")
            columns = ["Max Interference", "Interference", "Memory"]
            df = pd.DataFrame(columns=columns)
            maxInterference = 0

        if len(self.M) == 0:
            self.M.append(set(self.rng.choice(np.arange(0,self.n-1), size=self.r_approx, replace=False)))
            new_row = [0, 0, self.r_approx]
            df.loc[len(df.index)] = new_row


        memory_count = 1
        while True:
            print("Memory {}".format(memory_count))

            A = self.M[-1]
            B = self.SJOIN_one_step_v2(A)
            self.M.append(B)

            this_max = self.SJOIN_interference_check(B, len(self.M)-2)

            new_row = [maxInterference, this_max, len(B)]
            df.loc[len(df.index)] = new_row

            if this_max >= 0.5 or memory_count >= num:
                df.to_excel('output.xlsx', index=False)
                return self.M
            
            memory_count = memory_count + 1
            




In [4]:
model = NeuroidalModel(n=200000)
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

: 

In [2]:
global trackMinK
global trackMaxK
with open('model.pkl', 'rb') as f:
    model = pickle.load(f)

M = model.SJOIN_long_sequence(num=10000)

with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

print("{} : {}".format(trackMinK, trackMaxK))

File found


  maxInterference = df.loc[len(df.index)-1][0]


Memory 1
31 : 3875
Memory 2
28 : 4675
Memory 3
33 : 4398
Memory 4
32 : 3336
Memory 5
25 : 4010
Memory 6
29 : 4291
Memory 7
31 : 3778
Memory 8
28 : 3613
Memory 9
27 : 3543
Memory 10
26 : 4681
Memory 11
33 : 4449
Memory 12
32 : 3705
Memory 13
27 : 4562
Memory 14
32 : 4904
Memory 15
35 : 3361
Memory 16
25 : 4387
Memory 17
31 : 4878
Memory 18
34 : 4765
Memory 19
34 : 3731
Memory 20
28 : 3208
Memory 21
24 : 4531
Memory 22
32 : 4591
Memory 23
33 : 3530
Memory 24
26 : 4504
Memory 25
32 : 4189
Memory 26
30 : 4473
Memory 27
32 : 4063
Memory 28
30 : 3267
Memory 29
25 : 3330
Memory 30
25 : 3895
Memory 31
28 : 4817
Memory 32
34 : 4153
Memory 33
30 : 4030
Memory 34
29 : 4559
Memory 35
33 : 3326
Memory 36
25 : 3982
Memory 37
29 : 3879
Memory 38
28 : 4741
Memory 39
34 : 3379
Memory 40
25 : 4577
Memory 41
33 : 3431
Memory 42
26 : 3415
Memory 43
26 : 3248
Memory 44
25 : 3188
Memory 45
24 : 4127
Memory 46
30 : 3818
Memory 47
28 : 4003
Memory 48
29 : 4184
Memory 49
30 : 4444
Memory 50
32 : 3762
Memory 51

In [2]:
with open('model.pkl', 'rb') as f:
    model = pickle.load(f)

B1 = model.SJOIN_one_step(model.M[-1])
B2 = model.SJOIN_one_step_v2(model.M[-1])

print(len(B1))
print(len(B2))
print(len(B1 & B2))

27 : 4327
4096
[     0 100000 100000 100000 100000  99996  99985  99931  99832  99562
  99020  98032  96309  93592  89436  84033  77273  69273  60355  50994
  42033  33555  25873  19280  13922   9718   6582   4327   2758   1690
   1031    594    356    212    113     58     37     24     13      6
      3      2      2      0      0      0      0      0      0      0
      0      0      0      0      0      0      0      0      0      0
      0      0      0      0]
27
4327
4327
4327


In [3]:
#with open('model.pkl', 'rb') as f:
#    model = pickle.load(f)

st = time.time()
B1 = model.SJOIN_one_step(model.M[-1])
et = time.time()
print("V1 : {}".format(et-st))

st = time.time()
B2 = model.SJOIN_one_step_v2(model.M[-1])
et = time.time()
print("V2 : {}".format(et-st))

27 : 4327
V1 : 0.8129959106445312
V2 : 0.35044264793395996
