# Encontro 13: Medidas de Centralidade

Importando a biblioteca:

In [1]:
import sys
sys.path.append('..')

from random import choice
from itertools import permutations

import pandas as pd
import networkx as nx

import socnet as sn
from tqdm import tqdm
import matplotlib.pyplot as plt
from IPython.display import display, HTML


Configurando a biblioteca:

In [2]:
sn.node_size = 10
sn.node_color = (255, 255, 255)

sn.edge_width = 1
sn.edge_color = (192, 192, 192)

sn.node_label_position = 'top center'

Carregando rede de casamentos entre famílias de Florença durante a Renascença.

J. F. Padgett e C. K. Ansell. *Robust action and the rise of the Medici, 1400–1434.* American Journal of
Sociology 98, págs. 1259-1319, 1993.

In [3]:
g = sn.load_graph('Renaissance.gml', has_pos=True)

sn.show_graph(g, nlab=True)

Função que registra, em cada nó, seus sucessores em geodésicas de $s$ a $t$.

In [4]:
def set_geodesic_successors(g, s, t):
    for n in g.nodes:
        g.nodes[n]['geodesic_successors'] = set()

    for p in nx.all_shortest_paths(g, s, t):
        for i in range(len(p) - 1):
            g.nodes[p[i]]['geodesic_successors'].add(p[i + 1])

Funções que representam uma escolha aleatória de sucessor para diferentes tipos de trajetórias.

In [5]:
# Pense que o atributo 'passages' abaixo indica quantas
# vezes um fluxo já passou por um nó ou por uma aresta.

def random_geodesic_successor(g, n):
    return choice([m for m in g.nodes[n]['geodesic_successors']])

def random_path_successor(g, n):
    return choice([m for m in g.neighbors(n) if g.nodes[m]['passages'] == 0])

def random_trail_successor(g, n):
    return choice([m for m in g.neighbors(n) if g.edges[n, m]['passages'] == 0])

def random_walk_successor(g, n):
    return choice([m for m in g.neighbors(n)])

Função que faz uma simulação de fluxo de $s$ a $t$, que pode ou não ser bem-sucedida.

In [6]:
def simulate_single_flow(g, s, t, difusion='transfer', traj='geodesic'):
    # Inicializa o atributo 'passages' de cada nó.
    for n in g.nodes:
        g.nodes[n]['passages'] = 0
    g.nodes[s]['passages'] = 1

    # Inicializa o atributo 'passages' de cada aresta.
    for n, m in g.edges:
        g.edges[n, m]['passages'] = 0

    # Inicializa s como o único dono do insumo.
    for n in g.nodes:
        g.nodes[n]['owner'] = False
    g.nodes[s]['owner'] = True

    # Simula o fluxo, contando o número total de passos.

    steps = 0

    while True:
        # O conjunto reached representa todos os nós
        # que o fluxo consegue alcançar no passo atual.
        reached = set()

        # Verifica cada um dos donos atuais do insumo.

        owners = [n for n in g.nodes if g.nodes[n]['owner']]

        for n in owners:
            # Escolhe aleatoriamente um dos sucessores.
            try:
                m = random_geodesic_successor(g, n)
            except IndexError:
                continue

            if difusion == 'transfer':
                # Deixa de ser dono do insumo.
                g.nodes[n]['owner'] = False
            try:
                if traj == 'geodesic':
                    # Escolhe aleatoriamente um dos sucessores.
                    m = random_geodesic_successor(g, n)
                elif traj == 'path':
                    m = random_path_successor(g, n)
                elif traj == 'trail':
                    m = random_trail_successor(g, n)
                else:
                    m = random_walk_successor(g, n)
                    
                # Incrementa o atributo 'passages' do nó.
                g.nodes[m]['passages'] += 1

                # Incrementa o atributo 'passages' da aresta.
                g.edges[n, m]['passages'] += 1

                # Registra que consegue alcançar esse nó.
                reached.add(m)
            except Exception as err:
                pass

        # Todo nó alcançado passa a ser dono do insumo.

        for n in reached:
            g.nodes[n]['owner'] = True

        # Isso conclui o passo atual da simulação.
        steps += 1

        # Se o passo alcançou t, chegamos ao fim da simulação.
        # Ela foi bem-sucedida: devolvemos o número de passos.
        if t in reached:
            return steps

        # Se o passo não alcançou ninguém, chegamos ao fim da
        # simulação. Ela não foi bem-sucedida: devolvemos -1.
        if not reached:
            return -1

Função que faz simulações de fluxo de $s$ a $t$ até uma ser bem-sucedida.

In [7]:
def simulate_successful_flow(g, s, t, difusion, traj):
    set_geodesic_successors(g, s, t)

    while True:
        steps = simulate_single_flow(g, s, t, difusion, traj)

        if steps != -1:
            return steps

Função que faz simulações de fluxo para todo $s$ e $t$ possíveis, e tira disso um *closeness simulado* e um *betweenness simulado*.

In [8]:
def simulate_all_flows(g, difusion, traj):
    for n in g.nodes:
        g.nodes[n]['closeness'] = 0
        g.nodes[n]['betweenness'] = 0

    for s, t in permutations(g.nodes, 2):
        steps = simulate_successful_flow(g, s, t, difusion, traj)

        g.nodes[s]['closeness'] += steps
        for n in g.nodes:
            if n != s and n != t:
                g.nodes[n]['betweenness'] += g.nodes[n]['passages']

    # Normalizações necessárias para comparar com os
    # resultados analíticos. Não precisa entender.
    for n in g.nodes:
        g.nodes[n]['closeness'] = (g.number_of_nodes() - 1) / g.nodes[n]['closeness']
        g.nodes[n]['betweenness'] /= (g.number_of_nodes() - 1) * (g.number_of_nodes() - 2)

Média de *closeness simulado* e *betweenness simulado* para muitas repetições da simulação acima.

In [9]:
# TIMES = 100


# for n in g.nodes:
#     g.nodes[n]['mean_closeness'] = 0
#     g.nodes[n]['mean_betweenness'] = 0

# for _ in range(TIMES):
#     simulate_all_flows(g)

#     for n in g.nodes:
#         g.nodes[n]['mean_closeness'] += g.nodes[n]['closeness']
#         g.nodes[n]['mean_betweenness'] += g.nodes[n]['betweenness']

# for n in g.nodes:
#     g.nodes[n]['mean_closeness'] /= TIMES
#     g.nodes[n]['mean_betweenness'] /= TIMES

Cálculo de *closeness* e *betweenness* a partir das funções prontas da NetworkX, para comparação.

In [10]:
# cc = nx.closeness_centrality(g)

# bc = nx.betweenness_centrality(g)

Construção de data frame só para comparar mais facilmente.

In [11]:
# pd.DataFrame({
#     'família': [g.nodes[n]['label'] for n in g.nodes],
#     'closeness simulado': [g.nodes[n]['mean_closeness'] for n in g.nodes],
#     'closeness analítico': [cc[n] for n in g.nodes],
#     'betweenness simulado': [g.nodes[n]['mean_betweenness'] for n in g.nodes],
#     'betweenness analítico': [bc[n] for n in g.nodes],
# })

In [20]:
def test (g, difusion, traj):
    TIMES = 100


    for n in g.nodes:
        g.nodes[n]['mean_closeness'] = 0
        g.nodes[n]['mean_betweenness'] = 0

    for _ in tqdm(range(TIMES)):
        simulate_all_flows(g, difusion, traj)

        for n in g.nodes:
            g.nodes[n]['mean_closeness'] += g.nodes[n]['closeness']
            g.nodes[n]['mean_betweenness'] += g.nodes[n]['betweenness']

    for n in g.nodes:
        g.nodes[n]['mean_closeness'] /= TIMES
        g.nodes[n]['mean_betweenness'] /= TIMES
        
    cc = nx.closeness_centrality(g)

    bc = nx.betweenness_centrality(g)
    string = "{} com {}".format(difusion, traj)
    print(string)
    return string ,{
    'família': [g.nodes[n]['label'] for n in g.nodes],
    'closeness simulado': [g.nodes[n]['mean_closeness'] for n in g.nodes],
    'closeness analítico': [cc[n] for n in g.nodes],
    'dif closeness': [cc[n] - g.nodes[n]['mean_closeness'] for n in g.nodes],
    'betweenness simulado': [g.nodes[n]['mean_betweenness'] for n in g.nodes],
    'betweenness analítico': [bc[n] for n in g.nodes],
    'dif betweenness': [bc[n] - g.nodes[n]['mean_betweenness'] for n in g.nodes]
    }

In [21]:
trajectory = ['path', 'trail', 'walk', 'geodesic']
# trajectory = ['path']

difusion = ['transfer', 'duplicate']

dataframes = []
for i in range(len(trajectory)):
    for j in range(len(difusion)):
        title,result = test(g,difusion[j], trajectory[i])
        dataframes.append(result)
        ts = pd.DataFrame(result)
        display(ts)

100%|██████████| 100/100 [00:21<00:00,  4.58it/s]

transfer com path





Unnamed: 0,família,closeness simulado,closeness analítico,dif closeness,betweenness simulado,betweenness analítico,dif betweenness
0,ginori,0.333333,0.333333,3.885781e-16,0.0,0.0,0.0
1,lambertes,0.325581,0.325581,-3.330669e-16,0.0,0.0,0.0
2,albizzi,0.482759,0.482759,-8.326673e-16,0.20967,0.212454,0.002783883
3,guadagni,0.466667,0.466667,-5.5511150000000004e-17,0.261099,0.260073,-0.001025641
4,pazzi,0.282009,0.285714,0.003705495,0.0,0.0,0.0
5,salviati,0.383132,0.388889,0.005756526,0.142857,0.142857,1.665335e-16
6,medici,0.547062,0.56,0.01293846,0.517582,0.521978,0.004395604
7,tornabuon,0.476966,0.482759,0.005793103,0.099945,0.091575,-0.008369963
8,bischeri,0.382912,0.4,0.01708771,0.123956,0.120879,-0.003076923
9,ridolfi,0.482759,0.482759,-8.326673e-16,0.099011,0.086081,-0.0129304


100%|██████████| 100/100 [00:05<00:00, 17.50it/s]

duplicate com path





Unnamed: 0,família,closeness simulado,closeness analítico,dif closeness,betweenness simulado,betweenness analítico,dif betweenness
0,ginori,0.21369,0.333333,0.119643,0.17533,0.0,-0.17533
1,lambertes,0.225875,0.325581,0.099706,0.160879,0.0,-0.160879
2,albizzi,0.23313,0.482759,0.249628,0.543846,0.212454,-0.331392
3,guadagni,0.247581,0.466667,0.219085,0.457582,0.260073,-0.197509
4,pazzi,0.192428,0.285714,0.093286,0.068901,0.0,-0.068901
5,salviati,0.209934,0.388889,0.178955,0.348022,0.142857,-0.205165
6,medici,0.235189,0.56,0.324811,0.690165,0.521978,-0.168187
7,tornabuon,0.259259,0.482759,0.223499,0.561868,0.091575,-0.470293
8,bischeri,0.244283,0.4,0.155717,0.34456,0.120879,-0.223681
9,ridolfi,0.252031,0.482759,0.230727,0.507363,0.086081,-0.421282


100%|██████████| 100/100 [00:23<00:00,  4.53it/s]

transfer com trail





Unnamed: 0,família,closeness simulado,closeness analítico,dif closeness,betweenness simulado,betweenness analítico,dif betweenness
0,ginori,0.333333,0.333333,3.885781e-16,0.0,0.0,0.0
1,lambertes,0.325581,0.325581,-3.330669e-16,0.0,0.0,0.0
2,albizzi,0.482759,0.482759,-8.326673e-16,0.209066,0.212454,0.003388278
3,guadagni,0.466667,0.466667,-5.5511150000000004e-17,0.259615,0.260073,0.0004578755
4,pazzi,0.282352,0.285714,0.003362637,0.0,0.0,0.0
5,salviati,0.384335,0.388889,0.004554285,0.142857,0.142857,1.665335e-16
6,medici,0.550286,0.56,0.009714058,0.520604,0.521978,0.001373626
7,tornabuon,0.478092,0.482759,0.004666667,0.099231,0.091575,-0.007655678
8,bischeri,0.381661,0.4,0.01833903,0.123132,0.120879,-0.002252747
9,ridolfi,0.482759,0.482759,-8.326673e-16,0.097747,0.086081,-0.01166667


100%|██████████| 100/100 [00:06<00:00, 15.13it/s]

duplicate com trail





Unnamed: 0,família,closeness simulado,closeness analítico,dif closeness,betweenness simulado,betweenness analítico,dif betweenness
0,ginori,0.215553,0.333333,0.11778,0.17,0.0,-0.17
1,lambertes,0.220021,0.325581,0.10556,0.158352,0.0,-0.158352
2,albizzi,0.237757,0.482759,0.245001,0.553846,0.212454,-0.341392
3,guadagni,0.24195,0.466667,0.224717,0.47511,0.260073,-0.215037
4,pazzi,0.188275,0.285714,0.097439,0.069011,0.0,-0.069011
5,salviati,0.206691,0.388889,0.182198,0.347088,0.142857,-0.204231
6,medici,0.241056,0.56,0.318944,0.765549,0.521978,-0.243571
7,tornabuon,0.245517,0.482759,0.237242,0.648571,0.091575,-0.556996
8,bischeri,0.241856,0.4,0.158144,0.35011,0.120879,-0.229231
9,ridolfi,0.233792,0.482759,0.248967,0.592088,0.086081,-0.506007


100%|██████████| 100/100 [00:33<00:00,  3.02it/s]

transfer com walk





Unnamed: 0,família,closeness simulado,closeness analítico,dif closeness,betweenness simulado,betweenness analítico,dif betweenness
0,ginori,0.239937,0.333333,0.093396,0.0,0.0,0.0
1,lambertes,0.234991,0.325581,0.090591,0.0,0.0,0.0
2,albizzi,0.445464,0.482759,0.037295,0.287033,0.212454,-0.074579
3,guadagni,0.399103,0.466667,0.067563,0.342692,0.260073,-0.082619
4,pazzi,0.1587,0.285714,0.127015,0.0,0.0,0.0
5,salviati,0.33358,0.388889,0.055309,0.250879,0.142857,-0.108022
6,medici,0.473225,0.56,0.086775,0.625659,0.521978,-0.103681
7,tornabuon,0.430053,0.482759,0.052705,0.144505,0.091575,-0.05293
8,bischeri,0.281058,0.4,0.118942,0.161484,0.120879,-0.040604
9,ridolfi,0.436613,0.482759,0.046145,0.133022,0.086081,-0.046941


100%|██████████| 100/100 [00:08<00:00, 11.95it/s]

duplicate com walk





Unnamed: 0,família,closeness simulado,closeness analítico,dif closeness,betweenness simulado,betweenness analítico,dif betweenness
0,ginori,0.124228,0.333333,0.209105,0.433242,0.0,-0.433242
1,lambertes,0.129949,0.325581,0.195632,0.318077,0.0,-0.318077
2,albizzi,0.138684,0.482759,0.344074,1.722912,0.212454,-1.510458
3,guadagni,0.14435,0.466667,0.322317,1.913242,0.260073,-1.653168
4,pazzi,0.108813,0.285714,0.176902,0.312143,0.0,-0.312143
5,salviati,0.122396,0.388889,0.266493,1.265989,0.142857,-1.123132
6,medici,0.142253,0.56,0.417747,3.299341,0.521978,-2.777363
7,tornabuon,0.14782,0.482759,0.334939,1.477033,0.091575,-1.385458
8,bischeri,0.141076,0.4,0.258924,0.993407,0.120879,-0.872527
9,ridolfi,0.138435,0.482759,0.344324,1.361813,0.086081,-1.275733


100%|██████████| 100/100 [00:03<00:00, 25.61it/s]

transfer com geodesic





Unnamed: 0,família,closeness simulado,closeness analítico,dif closeness,betweenness simulado,betweenness analítico,dif betweenness
0,ginori,0.333333,0.333333,3.885781e-16,0.0,0.0,0.0
1,lambertes,0.325581,0.325581,-3.330669e-16,0.0,0.0,0.0
2,albizzi,0.482759,0.482759,-8.326673e-16,0.211264,0.212454,0.001190476
3,guadagni,0.466667,0.466667,-5.5511150000000004e-17,0.256209,0.260073,0.003864469
4,pazzi,0.285714,0.285714,2.775558e-16,0.0,0.0,0.0
5,salviati,0.388889,0.388889,3.330669e-16,0.142857,0.142857,1.665335e-16
6,medici,0.56,0.56,-6.661338e-16,0.522198,0.521978,-0.0002197802
7,tornabuon,0.482759,0.482759,-8.326673e-16,0.089011,0.091575,0.002564103
8,bischeri,0.4,0.4,7.771561e-16,0.11956,0.120879,0.001318681
9,ridolfi,0.482759,0.482759,-8.326673e-16,0.090385,0.086081,-0.004304029


100%|██████████| 100/100 [00:04<00:00, 24.58it/s]

duplicate com geodesic





Unnamed: 0,família,closeness simulado,closeness analítico,dif closeness,betweenness simulado,betweenness analítico,dif betweenness
0,ginori,0.333333,0.333333,3.885781e-16,0.0,0.0,0.0
1,lambertes,0.325581,0.325581,-3.330669e-16,0.0,0.0,0.0
2,albizzi,0.482759,0.482759,-8.326673e-16,0.570549,0.212454,-0.358095
3,guadagni,0.466667,0.466667,-5.5511150000000004e-17,0.68978,0.260073,-0.429707
4,pazzi,0.285714,0.285714,2.775558e-16,0.0,0.0,0.0
5,salviati,0.388889,0.388889,3.330669e-16,0.406593,0.142857,-0.263736
6,medici,0.56,0.56,-6.661338e-16,1.388736,0.521978,-0.866758
7,tornabuon,0.482759,0.482759,-8.326673e-16,0.247363,0.091575,-0.155788
8,bischeri,0.4,0.4,7.771561e-16,0.314341,0.120879,-0.193462
9,ridolfi,0.482759,0.482759,-8.326673e-16,0.248132,0.086081,-0.162051


In [44]:
from scipy.stats import ttest_ind, ttest_rel

In [23]:
dataframes

[{'família': ['ginori',
   'lambertes',
   'albizzi',
   'guadagni',
   'pazzi',
   'salviati',
   'medici',
   'tornabuon',
   'bischeri',
   'ridolfi',
   'acciaiuol',
   'strozzi',
   'peruzzi',
   'barbadori',
   'castellan'],
  'closeness simulado': [0.3333333333333329,
   0.32558139534883757,
   0.482758620689656,
   0.46666666666666673,
   0.28200879120879124,
   0.3831323631323631,
   0.5470615384615393,
   0.4769655172413798,
   0.38291229132307436,
   0.482758620689656,
   0.3623106546854941,
   0.42424242424242414,
   0.35000000000000064,
   0.4375,
   0.3865765765765765],
  'closeness analítico': [0.3333333333333333,
   0.32558139534883723,
   0.4827586206896552,
   0.4666666666666667,
   0.2857142857142857,
   0.3888888888888889,
   0.56,
   0.4827586206896552,
   0.4,
   0.4827586206896552,
   0.3684210526315789,
   0.42424242424242425,
   0.35,
   0.4375,
   0.3888888888888889],
  'dif closeness': [3.885780586188048e-16,
   -3.3306690738754696e-16,
   -8.326672684688674e

In [45]:
# results = []

# for i in range(len(dataframes)):
#     if(i == 6):
#         continue
    

In [53]:
results_betweenness = []
results_closeness = []
geodesic_transfer = dataframes[6]


In [54]:
index = 0

for i in range(len(trajectory)):
    for j in range(len(difusion)):
        if(index == 6):
            pass
        else:
            t_test_closeness = ttest_rel(dataframes[index]['closeness simulado'], geodesic_transfer['closeness simulado'])
            t_test_betweenness = ttest_rel(dataframes[index]['betweenness simulado'], geodesic_transfer['betweenness simulado'])
            string = "{} com {}".format(difusion[j], trajectory[i])
            print(string)
            print(t_test_closeness)
            print(t_test_betweenness)
            print()
            results_betweenness.append(t_test_betweenness)
            results_closeness.append(t_test_betweenness)
        index +=1
            
        
       

transfer com path
Ttest_relResult(statistic=-2.6264200402191173, pvalue=0.019925046473814732)
Ttest_relResult(statistic=1.4301975625161518, pvalue=0.174598352984419)

duplicate com path
Ttest_relResult(statistic=-11.141892783725963, pvalue=2.4095522941281495e-08)
Ttest_relResult(statistic=8.747237533996547, pvalue=4.771408000894475e-07)

transfer com trail
Ttest_relResult(statistic=-2.422148887927411, pvalue=0.029585542084858387)
Ttest_relResult(statistic=1.3868963546630604, pvalue=0.18715995476469102)

duplicate com trail
Ttest_relResult(statistic=-11.3832996987942, pvalue=1.835538728006037e-08)
Ttest_relResult(statistic=7.725408599004582, pvalue=2.05153906654044e-06)

transfer com walk
Ttest_relResult(statistic=-11.106396102584622, pvalue=2.5089252932285144e-08)
Ttest_relResult(statistic=4.402229096099289, pvalue=0.0006022251468877877)

duplicate com walk
Ttest_relResult(statistic=-16.325870282930005, pvalue=1.6512975668271723e-10)
Ttest_relResult(statistic=6.030818589066589, pvalue=

E agora, vamos pensar um pouco...

* Onde você precisa mudar o código para usar uma *trajetória* que não seja a *geodésica*? (caminho, trilha, passeio)

* Onde você precisa mudar o código para usar uma *difusão* que não seja a *transferência*? (duplicação)

Considere então a seguinte **hipótese**:

>Quando consideramos outros tipos de trajetória e outros tipos de difusão, os nós com maior *closeness simulado* e *betweenness simulado* não são necessariamente os nós com maior *closeness* e *betweenness* segundo as fórmulas clássicas. (que correspondem ao uso de geodésica e transferência na simulação)

Queremos:

1. Operacionalização e teste dessas hipótese. (Objetivo 3)
2. Interpretação dos resultados na linguagem de Análise de Redes Sociais (Objetivo 4)

Um *feedback* da atividade sobre *coreness no Jazz* será dado em breve, para vocês terem uma melhor referência do item 2.