# Encontro 13: Medidas de Centralidade

Importando a biblioteca:

In [20]:
import sys
sys.path.append('..')

from random import choice
from itertools import permutations

import pandas as pd
import networkx as nx

import socnet as sn

Configurando a biblioteca:

In [21]:
sn.node_size = 10
sn.node_color = (255, 255, 255)

sn.edge_width = 1
sn.edge_color = (192, 192, 192)

sn.node_label_position = 'top center'

Carregando rede de casamentos entre famílias de Florença durante a Renascença.

J. F. Padgett e C. K. Ansell. *Robust action and the rise of the Medici, 1400–1434.* American Journal of
Sociology 98, págs. 1259-1319, 1993.

In [22]:
g = sn.load_graph('Renaissance.gml', has_pos=True)

sn.show_graph(g, nlab=True)

Função que registra, em cada nó, seus sucessores em geodésicas de $s$ a $t$.

In [23]:
def set_geodesic_successors(g, s, t):
    for n in g.nodes:
        g.nodes[n]['geodesic_successors'] = set()

    for p in nx.all_shortest_paths(g, s, t):
        for i in range(len(p) - 1):
            g.nodes[p[i]]['geodesic_successors'].add(p[i + 1])

Funções que representam uma escolha aleatória de sucessor para diferentes tipos de trajetórias.

In [24]:
# Pense que o atributo 'passages' abaixo indica quantas
# vezes um fluxo já passou por um nó ou por uma aresta.

def random_geodesic_successor(g, n):
    return choice([m for m in g.nodes[n]['geodesic_successors']])

def random_path_successor(g, n):
    return choice([m for m in g.neighbors(n) if g.nodes[m]['passages'] == 0])

def random_trail_successor(g, n):
    return choice([m for m in g.neighbors(n) if g.edges[n, m]['passages'] == 0])

def random_walk_successor(g, n):
    return choice([m for m in g.neighbors(n)])

Função que faz uma simulação de fluxo de $s$ a $t$, que pode ou não ser bem-sucedida.

In [25]:
def simulate_single_flow(g, s, t, func_traj, difusao):
    # Inicializa o atributo 'passages' de cada nó.
    for n in g.nodes:
        g.nodes[n]['passages'] = 0
    g.nodes[s]['passages'] = 1

    # Inicializa o atributo 'passages' de cada aresta.
    for n, m in g.edges:
        g.edges[n, m]['passages'] = 0

    # Inicializa s como o único dono do insumo.
    for n in g.nodes:
        g.nodes[n]['owner'] = False
    g.nodes[s]['owner'] = True

    # Simula o fluxo, contando o número total de passos.

    steps = 0

    while True:
        # O conjunto reached representa todos os nós
        # que o fluxo consegue alcançar no passo atual.
        reached = set()

        # Verifica cada um dos donos atuais do insumo.

        owners = [n for n in g.nodes if g.nodes[n]['owner']]

        for n in owners:
            # Deixa de ser dono do insumo.
            g.nodes[n]['owner'] = difusao ###
            
            try:
                # Escolhe aleatoriamente um dos sucessores.
                m = func_traj(g, n) ##
            except IndexError:
                continue
            
            # Deixa de ser dono do insumo.
            g.nodes[n]['owner'] = difusao ###
            
            # Incrementa o atributo 'passages' do nó.
            g.nodes[m]['passages'] += 1

            # Incrementa o atributo 'passages' da aresta.
            g.edges[n, m]['passages'] += 1

            # Registra que consegue alcançar esse nó.
            reached.add(m)

            
        # Todo nó alcançado passa a ser dono do insumo.

        for n in reached:
            g.nodes[n]['owner'] = True

        # Isso conclui o passo atual da simulação.
        steps += 1

        # Se o passo alcançou t, chegamos ao fim da simulação.
        # Ela foi bem-sucedida: devolvemos o número de passos.
        if t in reached:
            return steps

        # Se o passo não alcançou ninguém, chegamos ao fim da
        # simulação. Ela não foi bem-sucedida: devolvemos -1.
        if not reached:
            return -1

Função que faz simulações de fluxo de $s$ a $t$ até uma ser bem-sucedida.

In [26]:
def simulate_successful_flow(g, s, t, func_traj, difusao):
    set_geodesic_successors(g, s, t)

    while True:
        steps = simulate_single_flow(g, s, t, func_traj, difusao)

        if steps != -1:
            return steps

Função que faz simulações de fluxo para todo $s$ e $t$ possíveis, e tira disso um *closeness simulado* e um *betweenness simulado*.

In [27]:
def simulate_all_flows(g,func_traj, difusao):
    for n in g.nodes:
        g.nodes[n]['closeness'] = 0
        g.nodes[n]['betweenness'] = 0

    for s, t in permutations(g.nodes, 2):
        steps = simulate_successful_flow(g, s, t, func_traj, difusao)

        g.nodes[s]['closeness'] += steps
        for n in g.nodes:
            if n != s and n != t:
                g.nodes[n]['betweenness'] += g.nodes[n]['passages']

    # Normalizações necessárias para comparar com os
    # resultados analíticos. Não precisa entender.
    for n in g.nodes:
        g.nodes[n]['closeness'] = (g.number_of_nodes() - 1) / g.nodes[n]['closeness']
        g.nodes[n]['betweenness'] /= (g.number_of_nodes() - 1) * (g.number_of_nodes() - 2)

Média de *closeness simulado* e *betweenness simulado* para muitas repetições da simulação acima.

In [28]:
# TIMES = 1000
# func_traj = random_geodesic_successor
# difusao = False

def simul(TIMES, func_traj, difusao, coluna):
    for n in g.nodes:
        g.nodes[n]['mean_closeness'] = 0
        g.nodes[n]['mean_betweenness'] = 0

    for _ in range(TIMES):
        simulate_all_flows(g, func_traj, difusao)

        for n in g.nodes:
            g.nodes[n]['mean_closeness'] += g.nodes[n]['closeness']
            g.nodes[n]['mean_betweenness'] += g.nodes[n]['betweenness']

    for n in g.nodes:
        g.nodes[n]['mean_closeness'] /= TIMES
        g.nodes[n]['mean_betweenness'] /= TIMES
        
    df = pd.DataFrame({
    'família': [g.nodes[n]['label'] for n in g.nodes],
    'closeness simulado': [g.nodes[n]['mean_closeness'] for n in g.nodes],
    'closeness analítico': [cc[n] for n in g.nodes],
    'betweenness simulado': [g.nodes[n]['mean_betweenness'] for n in g.nodes],
    'betweenness analítico': [bc[n] for n in g.nodes],
    })
    return df.sort_values(coluna, ascending=0)

Cálculo de *closeness* e *betweenness* a partir das funções prontas da NetworkX, para comparação.

In [29]:
cc = nx.closeness_centrality(g)

bc = nx.betweenness_centrality(g)

Construção de data frame só para comparar mais facilmente.

E agora, vamos pensar um pouco...

* Onde você precisa mudar o código para usar uma *trajetória* que não seja a *geodésica*? (caminho, trilha, passeio)

* Onde você precisa mudar o código para usar uma *difusão* que não seja a *transferência*? (duplicação)

Considere então a seguinte **hipótese**:

>Quando consideramos outros tipos de trajetória e outros tipos de difusão, os nós com maior *closeness simulado* e *betweenness simulado* não são necessariamente os nós com maior *closeness* e *betweenness* segundo as fórmulas clássicas. (que correspondem ao uso de geodésica e transferência na simulação)

Queremos:

1. Operacionalização e teste dessas hipótese. (Objetivo 3)
2. Interpretação dos resultados na linguagem de Análise de Redes Sociais (Objetivo 4)

Um *feedback* da atividade sobre *coreness no Jazz* será dado em breve, para vocês terem uma melhor referência do item 2.

In [30]:
simul(100, random_geodesic_successor, False, "closeness simulado")

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,0.522527,0.56,0.56,medici
2,0.212454,0.210385,0.482759,0.482759,albizzi
7,0.091575,0.09022,0.482759,0.482759,tornabuon
9,0.086081,0.090659,0.482759,0.482759,ridolfi
3,0.260073,0.255714,0.466667,0.466667,guadagni
13,0.115385,0.115,0.4375,0.4375,barbadori
11,0.075092,0.078681,0.424242,0.424242,strozzi
8,0.120879,0.120989,0.4,0.4,bischeri
5,0.142857,0.142857,0.388889,0.388889,salviati
14,0.087912,0.087033,0.388889,0.388889,castellan


In [31]:
simul(100, random_geodesic_successor, True,"closeness simulado")

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,1.386044,0.56,0.56,medici
2,0.212454,0.573681,0.482759,0.482759,albizzi
7,0.091575,0.243791,0.482759,0.482759,tornabuon
9,0.086081,0.247473,0.482759,0.482759,ridolfi
3,0.260073,0.69033,0.466667,0.466667,guadagni
13,0.115385,0.317363,0.4375,0.4375,barbadori
11,0.075092,0.206044,0.424242,0.424242,strozzi
8,0.120879,0.313956,0.4,0.4,bischeri
5,0.142857,0.406593,0.388889,0.388889,salviati
14,0.087912,0.231703,0.388889,0.388889,castellan


In [32]:
simul(100, random_path_successor, False,"closeness simulado")

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,0.628846,0.56,0.305928,medici
3,0.260073,0.40033,0.466667,0.267572,guadagni
9,0.086081,0.363901,0.482759,0.259995,ridolfi
2,0.212454,0.226978,0.482759,0.256858,albizzi
14,0.087912,0.383901,0.388889,0.256628,castellan
7,0.091575,0.333132,0.482759,0.256323,tornabuon
5,0.142857,0.142857,0.388889,0.246463,salviati
13,0.115385,0.244066,0.4375,0.241467,barbadori
10,0.0,0.0,0.368421,0.240796,acciaiuol
8,0.120879,0.367857,0.4,0.240239,bischeri


In [33]:
simul(100, random_path_successor, True,"closeness simulado")

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
7,0.091575,0.679945,0.482759,0.280956,tornabuon
9,0.086081,0.698462,0.482759,0.276036,ridolfi
13,0.115385,0.578407,0.4375,0.268469,barbadori
6,0.521978,0.821044,0.56,0.266934,medici
3,0.260073,0.742857,0.466667,0.265593,guadagni
8,0.120879,0.649286,0.4,0.26556,bischeri
11,0.075092,0.678077,0.424242,0.263235,strozzi
14,0.087912,0.614396,0.388889,0.256068,castellan
2,0.212454,0.616703,0.482759,0.255755,albizzi
12,0.021978,0.52,0.35,0.253315,peruzzi


In [34]:
simul(100, random_trail_successor, False,"closeness simulado")

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,0.701044,0.56,0.276711,medici
2,0.212454,0.267143,0.482759,0.27651,albizzi
3,0.260073,0.412802,0.466667,0.26739,guadagni
13,0.115385,0.267033,0.4375,0.26029,barbadori
7,0.091575,0.323462,0.482759,0.255878,tornabuon
12,0.021978,0.231209,0.35,0.240739,peruzzi
9,0.086081,0.334451,0.482759,0.23892,ridolfi
5,0.142857,0.142857,0.388889,0.235841,salviati
8,0.120879,0.349121,0.4,0.231593,bischeri
11,0.075092,0.327857,0.424242,0.228511,strozzi


In [35]:
simul(100, random_trail_successor, True,"closeness simulado")

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
7,0.091575,0.927308,0.482759,0.266076,tornabuon
9,0.086081,0.926538,0.482759,0.262047,ridolfi
13,0.115385,0.601813,0.4375,0.257968,barbadori
6,0.521978,1.24511,0.56,0.252182,medici
8,0.120879,0.787253,0.4,0.252101,bischeri
3,0.260073,0.897692,0.466667,0.251982,guadagni
11,0.075092,0.885659,0.424242,0.251122,strozzi
2,0.212454,0.684835,0.482759,0.245398,albizzi
14,0.087912,0.833352,0.388889,0.243222,castellan
12,0.021978,0.591209,0.35,0.236425,peruzzi


In [36]:
simul(100, random_walk_successor, False,"closeness simulado")

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
4,0.0,0.704231,0.285714,0.040108,pazzi
5,0.142857,1.548681,0.388889,0.038815,salviati
0,0.0,0.744615,0.333333,0.038613,ginori
6,0.521978,5.050824,0.56,0.03793,medici
10,0.0,0.757143,0.368421,0.037,acciaiuol
9,0.086081,2.467582,0.482759,0.036817,ridolfi
2,0.212454,2.461319,0.482759,0.036692,albizzi
1,0.0,0.743626,0.325581,0.036507,lambertes
13,0.115385,1.633626,0.4375,0.036499,barbadori
3,0.260073,3.319231,0.466667,0.036166,guadagni


In [37]:
simul(100, random_walk_successor, True,"closeness simulado")

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
7,0.091575,2.398901,0.482759,0.171209,tornabuon
6,0.521978,6.924286,0.56,0.170529,medici
9,0.086081,2.521209,0.482759,0.168175,ridolfi
13,0.115385,1.483681,0.4375,0.166961,barbadori
2,0.212454,3.053571,0.482759,0.161015,albizzi
11,0.075092,2.71478,0.424242,0.159081,strozzi
10,0.0,0.591758,0.368421,0.156493,acciaiuol
8,0.120879,2.757967,0.4,0.156366,bischeri
3,0.260073,4.504231,0.466667,0.155031,guadagni
14,0.087912,3.132802,0.388889,0.153486,castellan


In [38]:
simul(100, random_geodesic_successor, False, "betweenness simulado")

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,0.522967,0.56,0.56,medici
3,0.260073,0.255604,0.466667,0.466667,guadagni
2,0.212454,0.212033,0.482759,0.482759,albizzi
5,0.142857,0.142857,0.388889,0.388889,salviati
8,0.120879,0.12,0.4,0.4,bischeri
13,0.115385,0.11533,0.4375,0.4375,barbadori
9,0.086081,0.089451,0.482759,0.482759,ridolfi
14,0.087912,0.088901,0.388889,0.388889,castellan
7,0.091575,0.088571,0.482759,0.482759,tornabuon
11,0.075092,0.077473,0.424242,0.424242,strozzi


In [39]:
simul(100, random_geodesic_successor, True,"betweenness simulado")

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,1.388462,0.56,0.56,medici
3,0.260073,0.689066,0.466667,0.466667,guadagni
2,0.212454,0.572088,0.482759,0.482759,albizzi
5,0.142857,0.406593,0.388889,0.388889,salviati
13,0.115385,0.316044,0.4375,0.4375,barbadori
8,0.120879,0.312143,0.4,0.4,bischeri
9,0.086081,0.251264,0.482759,0.482759,ridolfi
7,0.091575,0.243187,0.482759,0.482759,tornabuon
14,0.087912,0.231538,0.388889,0.388889,castellan
11,0.075092,0.208407,0.424242,0.424242,strozzi


In [40]:
simul(100, random_path_successor, False,"betweenness simulado")

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,0.62989,0.56,0.306293,medici
3,0.260073,0.398846,0.466667,0.267602,guadagni
14,0.087912,0.383462,0.388889,0.256854,castellan
11,0.075092,0.370824,0.424242,0.240057,strozzi
8,0.120879,0.365,0.4,0.250833,bischeri
9,0.086081,0.362692,0.482759,0.255026,ridolfi
7,0.091575,0.324725,0.482759,0.252932,tornabuon
12,0.021978,0.246593,0.35,0.224927,peruzzi
13,0.115385,0.243516,0.4375,0.235534,barbadori
2,0.212454,0.231319,0.482759,0.261697,albizzi


In [41]:
simul(100, random_path_successor, True,"betweenness simulado")

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,0.819835,0.56,0.268929,medici
3,0.260073,0.739396,0.466667,0.267421,guadagni
9,0.086081,0.696923,0.482759,0.276186,ridolfi
7,0.091575,0.682033,0.482759,0.278616,tornabuon
11,0.075092,0.677363,0.424242,0.264226,strozzi
8,0.120879,0.648187,0.4,0.262647,bischeri
14,0.087912,0.619231,0.388889,0.257338,castellan
2,0.212454,0.615824,0.482759,0.256393,albizzi
13,0.115385,0.581374,0.4375,0.265803,barbadori
12,0.021978,0.523571,0.35,0.253673,peruzzi


In [42]:
simul(100, random_trail_successor, False,"betweenness simulado")

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,0.698736,0.56,0.290384,medici
3,0.260073,0.402857,0.466667,0.264994,guadagni
14,0.087912,0.38033,0.388889,0.227608,castellan
8,0.120879,0.345055,0.4,0.231327,bischeri
11,0.075092,0.334011,0.424242,0.225671,strozzi
9,0.086081,0.331923,0.482759,0.242388,ridolfi
7,0.091575,0.313571,0.482759,0.2512,tornabuon
13,0.115385,0.267692,0.4375,0.268069,barbadori
2,0.212454,0.263516,0.482759,0.270006,albizzi
12,0.021978,0.229615,0.35,0.242317,peruzzi


In [43]:
simul(100, random_trail_successor, True,"betweenness simulado")

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,1.263242,0.56,0.255495,medici
7,0.091575,0.927308,0.482759,0.263213,tornabuon
9,0.086081,0.925934,0.482759,0.262181,ridolfi
3,0.260073,0.889231,0.466667,0.255873,guadagni
11,0.075092,0.883516,0.424242,0.251704,strozzi
14,0.087912,0.822967,0.388889,0.243887,castellan
8,0.120879,0.779945,0.4,0.252332,bischeri
2,0.212454,0.687308,0.482759,0.246673,albizzi
13,0.115385,0.604066,0.4375,0.253892,barbadori
12,0.021978,0.589231,0.35,0.23454,peruzzi


In [44]:
simul(100, random_walk_successor, False,"betweenness simulado")

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,5.070604,0.56,0.034492,medici
3,0.260073,3.333846,0.466667,0.037022,guadagni
7,0.091575,2.49478,0.482759,0.034404,tornabuon
8,0.120879,2.481593,0.4,0.036409,bischeri
9,0.086081,2.480714,0.482759,0.034549,ridolfi
2,0.212454,2.477308,0.482759,0.036556,albizzi
11,0.075092,2.467802,0.424242,0.035602,strozzi
14,0.087912,2.45022,0.388889,0.0341,castellan
13,0.115385,1.625165,0.4375,0.036389,barbadori
12,0.021978,1.602857,0.35,0.036085,peruzzi


In [45]:
simul(100, random_walk_successor, True,"betweenness simulado")

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,6.916209,0.56,0.169008,medici
3,0.260073,4.528297,0.466667,0.161194,guadagni
14,0.087912,3.153956,0.388889,0.151515,castellan
2,0.212454,3.085714,0.482759,0.164322,albizzi
8,0.120879,2.783352,0.4,0.157047,bischeri
11,0.075092,2.722198,0.424242,0.157393,strozzi
9,0.086081,2.528736,0.482759,0.164939,ridolfi
7,0.091575,2.429121,0.482759,0.170247,tornabuon
5,0.142857,1.811923,0.388889,0.142319,salviati
12,0.021978,1.681703,0.35,0.147333,peruzzi
