# Module

In [1]:
# From the repository
from util import *
from read_data import *
data_names = list(name2file_name.keys())
print(data_names)

# Basic modules
import os
import glob
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from graph_tool.all import *

#name = "wiod2016"
name = data_names[3]
print("We are going to use: " + name)
# Enable original_format to use the format as originally provided
data_dict = get_data(name,original_format=False)
data_dict.keys()
#df_nodes = data_dict["df_nodes"]
df_edges = data_dict["df_edges"]

df_edges = df_edges[["source","target"]].drop_duplicates()
cond = df_edges["source"] != df_edges["target"]
df_edges = df_edges.loc[cond].copy()

['blogcatalog', 'homosapiens', 'wikipos', 'enron', 'unvote', 'untrade', 'uslegis_net', 'uslegis_net_small_dyn', 'uslegis_net_dyn', 'uslegis_hyp_dyn', 'contacts', 'dawn_net', 'dawn_hyp', 'ndc_net', 'ndc_hyp', 'coauth_dblp_net', 'coauth_dblp_hyp', 'wiod2016', 'wiod2013', 'wiodlong', 'eth', 'bitcoinalpha', 'bitcoinotc', 'uscourt']
We are going to use: enron


# Create Grah Object

In [2]:
g = Graph()
uni_nodes = list(set(df_edges["source"]))
uni_nodes.extend(list(set(df_edges["target"])))
uni_nodes = list(set(uni_nodes))  

node2index = dict()
for i in range(len(uni_nodes)):
    node2index.update({uni_nodes[i]:i})
    

g.add_vertex(len(uni_nodes))
weight = g.new_edge_property("double")


for i in range(len(df_edges)):
    
    source_index = node2index[df_edges["source"].iloc[i]]
    target_index = node2index[df_edges["target"].iloc[i]]
    e = g.add_edge(g.vertex(source_index),g.vertex(target_index))
    weight[e] = 1

g.edge_properties["weight"] = weight

# Approximation but it works well

In [3]:
%%time
state_ndc = minimize_nested_blockmodel_dl(g, state_args=dict(deg_corr=False))
state_dc  = minimize_nested_blockmodel_dl(g, state_args=dict(deg_corr=True))

print("Non-degree-corrected DL:\t", state_ndc.entropy())
print("Degree-corrected DL:\t", state_dc.entropy())

Non-degree-corrected DL:	 7663.2652567340165
Degree-corrected DL:	 7678.185149344292
CPU times: user 2min 50s, sys: 554 ms, total: 2min 50s
Wall time: 6.97 s


# If you want to refine your result

In [4]:
%%time
if 1 == 0:
    state_ndc = NestedBlockState(g,state_args=dict(deg_corr=False))
    state_dc = NestedBlockState(g,state_args=dict(deg_corr=True))


    # Now we run 1000 sweeps of the MCMC

    dS, nmoves = 0, 0
    for i in range(100):
        ret = state_ndc.multiflip_mcmc_sweep(niter=10)
        dS += ret[0]
        nmoves += ret[1]
    print("Change in description length:", dS)
    print("Number of accepted vertex moves:", nmoves)


    dS, nmoves = 0, 0
    for i in range(100):
        ret = state_dc.multiflip_mcmc_sweep(niter=10)
        dS += ret[0]
        nmoves += ret[1]
    print("Change in description length:", dS)
    print("Number of accepted vertex moves:", nmoves)


    print("Non-degree-corrected DL:\t", state_ndc.entropy())
    print("Degree-corrected DL:\t", state_dc.entropy())

CPU times: user 5 µs, sys: 0 ns, total: 5 µs
Wall time: 11.7 µs


# View Result

In [5]:
#state_ndc.print_summary()
levels = state_ndc.get_levels()
num_levels = len(levels)
for s in levels:
    print(s)
    if s.get_N() == 1:
        break

<BlockState object with 14 blocks (14 nonempty), degree-corrected, for graph <Graph object, directed, with 182 vertices and 3007 edges, 1 internal edge property, at 0x14e17c22e250>, at 0x14e17c245d30>
<BlockState object with 5 blocks (5 nonempty), for graph <Graph object, directed, with 14 vertices and 148 edges, at 0x14e17c245220>, at 0x14e17c236a30>
<BlockState object with 3 blocks (3 nonempty), for graph <Graph object, directed, with 5 vertices and 24 edges, at 0x14e17c1d2d00>, at 0x14e17c23bcd0>
<BlockState object with 1 blocks (1 nonempty), for graph <Graph object, directed, with 3 vertices and 9 edges, at 0x14e17c1fc730>, at 0x14e17c1ee4f0>


In [6]:
node_id = 4
r = node_id
for i in range(num_levels):
    r = levels[i].get_blocks()[r]    # group membership of node 46 in level 0
    print("Group membership of node " + str(node_id) + " in level " + str(i+1) + ": " + str(r))

Group membership of node 4 in level 1: 3
Group membership of node 4 in level 2: 2
Group membership of node 4 in level 3: 2
Group membership of node 4 in level 4: 0


In [7]:
df_nodes = pd.DataFrame(node2index.items())
df_nodes.columns = ["Id","index"]
for choose_level in range(num_levels):
    
    b = levels[choose_level].get_blocks()

    out = []
    for i in range(len(df_nodes)):
        ind = df_nodes["index"].iloc[i]
        out.append(b[ind])

    df_nodes["sbm_" + str(choose_level)] = out

In [8]:
df_nodes

Unnamed: 0,Id,index,sbm_0,sbm_1,sbm_2,sbm_3
0,1,0,0,0,0,0
1,2,1,1,1,1,0
2,3,2,1,1,2,0
3,4,3,2,2,0,0
4,5,4,3,2,0,0
...,...,...,...,...,...,...
177,180,177,1,0,0,0
178,181,178,1,0,0,0
179,182,179,4,0,0,0
180,183,180,2,0,0,0


In [9]:
df_nodes.to_csv("./tables/df_nested_sbm.csv",index=False)