In [1]:
import csv
import random
from utils import *
from collections import defaultdict
import time
import matplotlib.pyplot as plt
#import seaborn as sns
import numpy as np

## The functions that implements our algorithm

In [144]:
def test_sparse_vertex(current_graph, eps=0.2):
    # sample log n neighbors for each vertex
    current_num_vertex = len(current_graph)
    num_sample = max((int)(5*np.log(current_num_vertex)), 20)
    sample_dict = {}
    for this_vertex in current_graph:
        sample_vertex_set = current_graph[this_vertex].getRandom(i=num_sample)
        sample_dict[this_vertex] = sample_vertex_set
    sparse_vertex_list = []
    # test sparsity for each vertex
    for this_vertex in current_graph:
        num_neighbor_diff = 0
        neighbors_this = sample_dict[this_vertex]
        for comp_vertex in sample_dict[this_vertex]:
            neighbors_comp = sample_dict[comp_vertex]
            dif1 = np.setdiff1d(neighbors_this, neighbors_comp)
            dif2 = np.setdiff1d(neighbors_comp, neighbors_this)
            total_diff = len(np.concatenate((dif1, dif2)))
            max_degree_between_two = max(current_graph[this_vertex].degree, 
                                         current_graph[comp_vertex].degree)
            if (total_diff>=eps*num_sample):
                num_neighbor_diff = num_neighbor_diff + 1
        if num_neighbor_diff >=eps*num_sample:
            sparse_vertex_list.append(this_vertex)
    
    # return the list of sparse vertices
    return sparse_vertex_list

In [306]:
def sparse_dense_decop(current_graph, eps=0.2):
    current_num_vertex = len(current_graph)
    # check sparse vertices
    current_sparse_vertice = test_sparse_vertex(current_graph, eps=eps)
    # sample from the dense vertices
    dense_subgraph = {vertex: current_graph[vertex] 
                      for vertex in current_graph if vertex not in current_sparse_vertice}
    anchor_vertex_dict = {}
    for this_vertex in dense_subgraph.keys():
        # rejection sampling
        dense_sample_prob = max((5*np.log(current_num_vertex))/dense_subgraph[this_vertex].degree, 0.05)
        if dense_sample_prob>=np.random.uniform(low=0.0, high=1.0):
            anchor_vertex_dict[this_vertex] = dense_subgraph[this_vertex]
    # recursively form almost-cliques
    num_sample = max((int)(5*np.log(current_num_vertex)), 20)
    AC_dict = {}
    # maintain a list of covered vertices
    covered_AC_vertex = []
    for this_anchor_vertex in anchor_vertex_dict.keys():
        if this_anchor_vertex in covered_AC_vertex:
            continue
        AC_dict[this_anchor_vertex] = []
        covered_AC_vertex.append(this_anchor_vertex)
        anchor_neighbor_samples = anchor_vertex_dict[this_anchor_vertex].getRandom(i=num_sample)
        for candidate_vertex in anchor_vertex_dict[this_anchor_vertex]:
            if (candidate_vertex in current_sparse_vertice) or (candidate_vertex in covered_AC_vertex):
                print(current_num_vertex)
                print(len(covered_AC_vertex))
                continue
            cand_neighbor_samples = dense_subgraph[candidate_vertex].getRandom(i=num_sample)
            # test whether their symmetric difference is large enough
            dif1 = np.setdiff1d(anchor_neighbor_samples, cand_neighbor_samples)
            dif2 = np.setdiff1d(cand_neighbor_samples, anchor_neighbor_samples)
            total_diff = len(np.concatenate((dif1, dif2)))
            max_degree_between_two = max(dense_subgraph[this_anchor_vertex].degree, 
                                         dense_subgraph[candidate_vertex].degree)
            if (total_diff<=2*eps*num_sample):
                AC_dict[this_anchor_vertex].append(candidate_vertex)
                covered_AC_vertex.append(candidate_vertex)
            # this line is for debugging purpose -- remove later
            else:
                # it appears that this line has never been entered...
                print('something interesting is happening!')
   
    dense_vertex_list = [v for v in current_graph if v not in current_sparse_vertice]
    
    return current_sparse_vertice, AC_dict

## Read the edges and maintain clustering

In [307]:
adjacency_list, edge_list = create_graph_from_csv("../data/email-Eu-core.csv")

In [308]:
'''
TODO

See if functions are doable

'''

'\nTODO\n\nSee if functions are doable\n\n'

In [305]:
no_edges = len(edge_list)  # No. of edges

prob_del = 0.2      # Probability to delete edge
eps_param = 0.1

current_graph = {}
current_edge_list = []

available_edge_list = np.random.permutation(edge_list).tolist()

stream_length = 2*no_edges

track_update_num = {}
track_update_benckmark = {}

for i in range(stream_length):
    # Insertion
    if available_edge_list: #
        current_edge_list.append(available_edge_list[i])
        u = available_edge_list[i][0]
        v = available_edge_list[i][1]
        if u not in current_graph.keys():
            current_graph[u] = OptList()
        current_graph[u].insert(v)
        if v not in current_graph.keys():
            current_graph[v] = OptList()
        current_graph[v].insert(u)
        available_edge_list.pop(0)
        
        # keep track of the benchmark for the updates
        if u not in track_update_benckmark:
            track_update_benckmark[u] = current_graph[u].degree
        if v not in track_update_benckmark:
            track_update_benckmark[v] = current_graph[v].degree
        # update the tracking of the updates on u and v
        if u not in track_update_num:
            track_update_num[u] = 1
        else:
            track_update_num[u] = track_update_num[u] + 1
            
        if v not in track_update_num:
            track_update_num[v] = 1
        else:
            track_update_num[v] = track_update_num[v] + 1
        
        
        '''
        Code for SDD and PIVOT goes here
        '''
        if (track_update_num[u]>max(100, eps_param*track_update_benckmark[u])):
            # tests
            current_sparse_vertex_list, almost_cliques = sparse_dense_decop(current_graph)
            # clear the number of updates
            track_update_num[u] = 0
            track_update_benckmark = current_graph[u].degree
            all_vertex_list = [v for v in current_graph]
            print('------------------------')
            AC_vertex_list = []
            for anchor_ver in almost_cliques.keys():
                AC_vertex_list.append(anchor_ver)
                for ac_ver in almost_cliques[anchor_ver]:
                    AC_vertex_list.append(ac_ver)
            AC_vertex_list = list(set(AC_vertex_list))
            recovered_vertex = np.concatenate((AC_vertex_list, current_sparse_vertex_list))
            print(np.setdiff1d(returned_dense_vertex,AC_vertex_list))
            print('===============================')
            print(np.setdiff1d(all_vertex_list,recovered_vertex))
            print('******************************')
            print('The number of almost-cliques is ', len(almost_cliques))
            break
        
    else:
        # We have run out of edges to insert
        edge_to_delete = np.random.choice(current_edge_list)
        
        u = edge_to_delete[0]
        v = edge_to_delete[1]
        current_graph[u].remove(v)
        current_graph[v].remove(u)
        
        available_edge_list.extend(edge_to_delete)
        current_edge_list.remove(edge_to_delete)
    
        
    
    
#     if np.random.binomial(1,prob_del):
#         # Deletion
#         print(current_edge_list)
#         edge_to_delete = np.random.choice(current_edge_list)
        
#         u = edge_to_delete[0]
#         v = edge_to_delete[1]
#         current_graph[u].remove(v)
#         current_graph[v].remove(u)
        
#         available_edge_list.extend(edge_to_delete)
#         current_edge_list.remove(edge_to_delete)
        



835
27
835
58
835
61
835
67
835
67
835
93
835
96
835
106
835
109
835
119
835
119
835
119
835
126
835
129
835
130
835
136
835
139
835
140
835
143
835
147
835
147
835
152
835
152
835
157
835
157
835
158
835
160
835
164
835
165
835
165
835
166
835
166
835
166
835
168
835
172
835
175
835
176
835
185
835
188
835
193
835
193
835
196
835
200
835
201
835
201
835
209
835
211
835
214
835
219
835
222
835
223
835
223
835
227
835
229
835
231
835
231
835
231
835
231
835
231
835
231
835
231
835
232
835
232
835
235
835
237
835
238
835
239
835
242
835
243
835
243
835
243
835
243
835
244
835
245
835
245
835
245
835
245
835
245
835
245
835
246
835
252
835
252
835
253
835
255
835
255
835
255
835
255
835
255
835
256
835
257
835
258
835
259
835
265
835
265
835
267
835
267
835
267
835
268
835
268
835
268
835
269
835
272
835
272
835
272
835
272
835
274
835
275
835
277
835
277
835
278
835
278
835
278
835
278
835
280
835
280
835
283
835
283
835
283
835
283
835
284
835
284
835
284
835
284
835
284
835
284
835
285

835
719
835
719
835
719
835
719
835
720
835
721
835
721
835
722
835
722
835
722
835
723
835
723
835
723
835
724
835
724
835
725
835
725
835
726
835
726
835
726
835
726
835
727
835
727
835
727
835
727
835
727
835
728
835
729
835
729
835
730
835
730
835
730
835
731
835
731
835
732
835
732
835
733
835
734
835
735
835
735
835
736
835
736
835
736
835
736
835
736
835
736
835
737
835
737
835
740
835
741
835
742
835
742
835
742
835
742
835
742
835
743
835
744
835
745
835
746
835
747
835
747
835
747
835
748
835
749
835
749
835
749
835
750
835
750
835
750
835
751
835
752
835
753
835
753
835
753
835
753
835
753
835
753
835
753
835
754
835
755
835
755
835
755
835
756
835
756
835
757
835
757
835
758
835
759
835
760
835
760
835
760
835
761
835
762
835
762
835
762
835
762
835
762
835
762
835
762
835
763
835
763
835
764
835
764
835
765
835
766
835
766
835
766
835
767
835
768
835
768
835
769
835
770
835
770
835
771
835
772
835
773
835
773
835
774
835
774
835
774
835
775
835
775
835
776
835
776
835
776
