In [None]:
%load_ext autoreload
%autoreload 2
import networkx as nx
from pysad.pysad.NodeInfo import NodeInfo, SynthNodeInfo
import pandas as pd

In [None]:
#import pysad
#import pysad.utils
import pysad.pysad.collect as collect
import pysad.pysad.synthesis as synthesis

# Creating the graph

In [None]:
data_path = '../synthesis/'
graphname = 'Barabasi-Albert'

In [None]:
nodes = 1000
edges_per_node = 2
G = nx.barabasi_albert_graph(nodes, edges_per_node)
G.graph['name'] = graphname
graph_handle = synthesis.SyntheticNetwork(G)
graph_handle.rules['min_degree'] = 3

In [None]:
exploration_depth = 5
total_node_list, total_nodes_df, total_edges_df, info_acc = collect.spiky_ball([20], graph_handle, 
                                                                            exploration_depth=exploration_depth,
                                                                            mode='constant', random_subset_size=2,
                                                                            spread_type='broad', 
                                                                            node_acc=SynthNodeInfo())

In [None]:
print('Total number of nodes in the spiky ball:',len(total_node_list))

## The graph

In [None]:
import pysad.pysad.graph as graph

In [None]:
node_df, edge_df = total_nodes_df, total_edges_df
node_df = synthesis.reshape_node_data(node_df)
edge_df = synthesis.reshape_edge_data(edge_df)

In [None]:
MIN_WEIGHT = 0
MIN_DEGREE = 1 # Minimal number of connections in the graph

# Write spiky ball info on the graph
G = graph.add_edges_attributes(G,edge_df)
G = graph.add_node_attributes(G,node_df)

# create the spiky ball graph (option)
Gsp = graph.graph_from_edgeslist(edge_df, MIN_WEIGHT)
Gsp = graph.add_edges_attributes(Gsp,edge_df)
Gsp = graph.add_node_attributes(Gsp,node_df)
Gsp = graph.reduce_graph(Gsp,MIN_DEGREE)
Gsp = graph.handle_spikyball_neighbors(Gsp,graph_handle,remove=True)

In [None]:
# Save the graph
import networkx as nx
import json
# Save as gexf file
graphfilename = data_path + graphname + '.gexf'
nx.write_gexf(G,graphfilename)
print('Wrote',graphfilename)


## Statistics of propagation

In [None]:
nx.diameter(G)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def ball_test(graph_handle,params):
    node_dic = {}
    for it in range(params['nb_iter']):
        total_node_list, total_nodes_df, total_edges_df, node_acc = collect.spiky_ball([params['initial_node']], 
                                                                               graph_handle, 
                                                                               exploration_depth=params['exploration_depth'],
                                                                               mode='percent',
                                                                               random_subset_size=params['random_subset_size'],
                                                                                  spread_type='sharp', node_acc=SynthNodeInfo())
        for node in total_node_list:
            if node in node_dic:
                node_dic[node] += 1 
            else:
                node_dic[node] = 1

    # delete the initial node (always in the list)
    del node_dic[params['initial_node']] 
    return node_dic

In [None]:
params = {'nb_iter' : 4, 'exploration_depth' : 10,
            'initial_node' : 20, 'random_subset_size' : 0.1}
node_dic1 = ball_test(graph_handle,params)
# dictionary with each node as key and its degree as value
degree_dic1 = dict(G.degree(node_dic1.keys()))

In [None]:
params = {'nb_iter' : 4, 'exploration_depth' : 4,
            'initial_node' : 20, 'random_subset_size' : 1}
node_dic2 = ball_test(graph_handle,params)
# dictionary with each node as key and its degree as value
degree_dic2 = dict(G.degree(node_dic2.keys()))

## Degree distribution

In [None]:
# Averaged over iterations

def expand_occurences(node_dic,degree_dic):
    i = 0
    av_dic = {}
    for node,v in node_dic.items():
        for n in range(v):
            av_dic[i] = degree_dic[node]
            i +=1
    return av_dic

av_dic1 = expand_occurences(node_dic1,degree_dic1)
av_dic2 = expand_occurences(node_dic2,degree_dic2)


### log-binning found there: https://stackoverflow.com/questions/16489655/plotting-log-binned-network-degree-distributions

In [None]:
def drop_zeros(a_list):
    return [i for i in a_list if i>0]

def log_binning(counter_dict,bin_count=35):

    max_x = np.log10(max(counter_dict.keys()))
    max_y = np.log10(max(counter_dict.values()))
    max_base = max([max_x,max_y])

    min_x = np.log10(min(drop_zeros(counter_dict.keys())))

    bins = np.logspace(min_x,max_base,num=bin_count)

    # Based off of: http://stackoverflow.com/questions/6163334/binning-data-in-python-with-scipy-numpy
    bin_means_y = (np.histogram(list(counter_dict.keys()),bins,weights=list(counter_dict.values()))[0] /
                   np.histogram(list(counter_dict.keys()),bins)[0])
    bin_means_x = (np.histogram(list(counter_dict.keys()),bins,weights=list(counter_dict.keys()))[0] /
                   np.histogram(list(counter_dict.keys()),bins)[0])

    return bin_means_x,bin_means_y

In [None]:
from collections import Counter
def degree_distribution(degree_dic, mode='log', density=True):
    #ba_c = nx.degree_centrality(ba_g)

    count_dic = dict(Counter(degree_dic.values()))

    if mode == 'lin':    # linear bins
        dd_x,dd_y = list(count_dic.keys()),list(count_dic.values())
    elif mode == 'log':# log bins
        dd_x,dd_y = log_binning(count_dic,20)
    else:
        raise('Unknown mode, use mode="lin" or mode="log".')
    
    if density == True: # normalize
        dd_y = [v / len(degree_dic) for v in dd_y]
    return dd_x,dd_y  

In [None]:
plt.xscale('log')
plt.yscale('log')
lin_x,lin_y = degree_distribution(av_dic1, mode='lin')
#plt.scatter(lin_x,lin_y,c='r',marker='x',s=10)
log_x,log_y = degree_distribution(av_dic1, mode='log',density=True)
plt.scatter(log_x,log_y,c='r',marker='.',s=100,label='spiky')
lin_x,lin_y = degree_distribution(av_dic2, mode='lin')
#plt.scatter(lin_x,lin_y,c='b',marker='x',s=10)
log_x,log_y = degree_distribution(av_dic2, mode='log',density=True)
plt.scatter(log_x,log_y,c='b',marker='.',s=100,label='snowy')
#plt.xlim((1e-4,1e-1))
#plt.ylim((.9,1e4))
plt.xlabel('Degree')
plt.ylabel('Density')
plt.title('Degree distribution of spiky and snow balls')
plt.legend()
plt.show()

## Degree stats

In [None]:
count_dic = {}
nb_iter = 100 # ?
for it in range(nb_iter):
    n_list = [node for node,val in node_dic1.items() if val == it]
    degree_list = [degree_dic1[node] for node in n_list]
    if degree_list:
        count_dic[it] = degree_list

In [None]:
match_size = len(count_dic.keys())
x = np.zeros((1,match_size))
x_mean = np.zeros((1,match_size))
x_std = np.zeros((1,match_size))
for idx,(nb_match,degrees) in enumerate(count_dic.items()):
    x[0,idx] = nb_match
    x_mean[0,idx] = np.mean(np.array(degrees))
    x_std[0,idx] = np.std(np.array(degrees))

In [None]:
x

In [None]:
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')

In [None]:
plt.errorbar(x[0],x_mean[0], x_std[0], linestyle='None', marker='^')
plt.xlabel('Nb of visits')
plt.ylabel('Degree (mean and deviation)')
plt.yscale('log')
plt.show()

In [None]:
data = []
for idx in range(1,max(count_dic.keys())+1):
    if idx in count_dic:
        data.append(count_dic[idx])
    else:
        data.append(np.array([0]))
#data = [v for k,v in count_dic.items()]
fig7, ax7 = plt.subplots()
ax7.set_title('Multiple Samples with Different sizes')
ax7.boxplot(data)
ax7.set(ylim=(0, 100))
plt.show()

In [None]:
import seaborn as sns

In [None]:
sns.boxplot(data=data)
plt.ylim(0,90)

# visits vs degree

In [None]:
def degrees_visits(node_dic,degree_dic):
    match_size = len(node_dic.keys())
    visits = np.zeros((1,match_size))
    degrees = np.zeros((1,match_size))
    for idx,node in enumerate(node_dic):
        visits[0,idx] = node_dic[node]
        degrees[0,idx] = degree_dic[node]
    return degrees, visits

In [None]:
degrees1,visits1 = degrees_visits(node_dic1, degree_dic1)
degrees2,visits2 = degrees_visits(node_dic2, degree_dic2)

In [None]:
#plt.scatter(degrees1,visits1/nb_iter)
plt.scatter(degrees2,visits2/nb_iter)
#plt.xscale('log')
#plt.yscale('log')
plt.xlabel('Degrees')
plt.ylabel('Ratio of visits')
plt.show()

In [None]:
plt.scatter(degrees1,visits1/nb_iter)
plt.xscale('log')
#plt.yscale('log')
plt.xlabel('Degrees')
plt.ylabel('Ratio of visits')
plt.show()

## appearance wrt degree

In [None]:
degrees_list = []
for node in node_dic1:
    for occur in range(degree_dic1[node]):
        degree_list.append(degree_dic1[node])

In [None]:
# log-scaled bins
bins = np.logspace(1, 2, 50)
widths = (bins[1:] - bins[:-1])

plt.hist(degree_list,bins=bins)
plt.yscale('log')
plt.xscale('log')
plt.xlabel('Degree')
plt.show()

In [None]:
degree_list