# Encontro 13: Medidas de Centralidade

Importando a biblioteca:

In [1]:
import sys
sys.path.append('..')

from random import choice
from itertools import permutations

import pandas as pd
import networkx as nx

import socnet as sn

Configurando a biblioteca:

In [2]:
sn.node_size = 10
sn.node_color = (255, 255, 255)

sn.edge_width = 1
sn.edge_color = (192, 192, 192)

sn.node_label_position = 'top center'

Carregando rede de casamentos entre famílias de Florença durante a Renascença.

J. F. Padgett e C. K. Ansell. *Robust action and the rise of the Medici, 1400–1434.* American Journal of
Sociology 98, págs. 1259-1319, 1993.

In [3]:
g = sn.load_graph('Renaissance.gml', has_pos=True)

sn.show_graph(g, nlab=True)

Função que registra, em cada nó, seus sucessores em geodésicas de $s$ a $t$.

In [4]:
def set_geodesic_successors(g, s, t):
    for n in g.nodes:
        g.nodes[n]['geodesic_successors'] = set()

    for p in nx.all_shortest_paths(g, s, t):
        for i in range(len(p) - 1):
            g.nodes[p[i]]['geodesic_successors'].add(p[i + 1])

Funções que representam uma escolha aleatória de sucessor para diferentes tipos de trajetórias.

In [5]:
# Pense que o atributo 'passages' abaixo indica quantas
# vezes um fluxo já passou por um nó ou por uma aresta.

def random_geodesic_successor(g, n):
    return choice([m for m in g.nodes[n]['geodesic_successors']])

def random_path_successor(g, n):
    return choice([m for m in g.neighbors(n) if g.nodes[m]['passages'] == 0])

def random_trail_successor(g, n):
    return choice([m for m in g.neighbors(n) if g.edges[n, m]['passages'] == 0])

def random_walk_successor(g, n):
    return choice([m for m in g.neighbors(n)])

Função que faz uma simulação de fluxo de $s$ a $t$, que pode ou não ser bem-sucedida.

In [6]:
def simulate_single_flow(g, s, t, func_traj, difusao):
    # Inicializa o atributo 'passages' de cada nó.
    for n in g.nodes:
        g.nodes[n]['passages'] = 0
    g.nodes[s]['passages'] = 1

    # Inicializa o atributo 'passages' de cada aresta.
    for n, m in g.edges:
        g.edges[n, m]['passages'] = 0

    # Inicializa s como o único dono do insumo.
    for n in g.nodes:
        g.nodes[n]['owner'] = False
    g.nodes[s]['owner'] = True

    # Simula o fluxo, contando o número total de passos.

    steps = 0

    while True:
        # O conjunto reached representa todos os nós
        # que o fluxo consegue alcançar no passo atual.
        reached = set()

        # Verifica cada um dos donos atuais do insumo.

        owners = [n for n in g.nodes if g.nodes[n]['owner']]

        for n in owners:
            # Deixa de ser dono do insumo.
            g.nodes[n]['owner'] = difusao ###
            
            try:
                # Escolhe aleatoriamente um dos sucessores.
                m = func_traj(g, n) ##
            except IndexError:
                continue
            
            # Deixa de ser dono do insumo.
            g.nodes[n]['owner'] = difusao ###
            
            # Incrementa o atributo 'passages' do nó.
            g.nodes[m]['passages'] += 1

            # Incrementa o atributo 'passages' da aresta.
            g.edges[n, m]['passages'] += 1

            # Registra que consegue alcançar esse nó.
            reached.add(m)

            
        # Todo nó alcançado passa a ser dono do insumo.

        for n in reached:
            g.nodes[n]['owner'] = True

        # Isso conclui o passo atual da simulação.
        steps += 1

        # Se o passo alcançou t, chegamos ao fim da simulação.
        # Ela foi bem-sucedida: devolvemos o número de passos.
        if t in reached:
            return steps

        # Se o passo não alcançou ninguém, chegamos ao fim da
        # simulação. Ela não foi bem-sucedida: devolvemos -1.
        if not reached:
            return -1

Função que faz simulações de fluxo de $s$ a $t$ até uma ser bem-sucedida.

In [7]:
def simulate_successful_flow(g, s, t, func_traj, difusao):
    set_geodesic_successors(g, s, t)

    while True:
        steps = simulate_single_flow(g, s, t, func_traj, difusao)

        if steps != -1:
            return steps

Função que faz simulações de fluxo para todo $s$ e $t$ possíveis, e tira disso um *closeness simulado* e um *betweenness simulado*.

In [8]:
def simulate_all_flows(g,func_traj, difusao):
    for n in g.nodes:
        g.nodes[n]['closeness'] = 0
        g.nodes[n]['betweenness'] = 0

    for s, t in permutations(g.nodes, 2):
        steps = simulate_successful_flow(g, s, t, func_traj, difusao)

        g.nodes[s]['closeness'] += steps
        for n in g.nodes:
            if n != s and n != t:
                g.nodes[n]['betweenness'] += g.nodes[n]['passages']

    # Normalizações necessárias para comparar com os
    # resultados analíticos. Não precisa entender.
    for n in g.nodes:
        g.nodes[n]['closeness'] = (g.number_of_nodes() - 1) / g.nodes[n]['closeness']
        g.nodes[n]['betweenness'] /= (g.number_of_nodes() - 1) * (g.number_of_nodes() - 2)

Média de *closeness simulado* e *betweenness simulado* para muitas repetições da simulação acima.

In [9]:
# TIMES = 1000
# func_traj = random_geodesic_successor
# difusao = False

def simul(TIMES, func_traj, difusao):
    for n in g.nodes:
        g.nodes[n]['mean_closeness'] = 0
        g.nodes[n]['mean_betweenness'] = 0

    for _ in range(TIMES):
        simulate_all_flows(g, func_traj, difusao)

        for n in g.nodes:
            g.nodes[n]['mean_closeness'] += g.nodes[n]['closeness']
            g.nodes[n]['mean_betweenness'] += g.nodes[n]['betweenness']

    for n in g.nodes:
        g.nodes[n]['mean_closeness'] /= TIMES
        g.nodes[n]['mean_betweenness'] /= TIMES
        
    df = pd.DataFrame({
    'família': [g.nodes[n]['label'] for n in g.nodes],
    'closeness simulado': [g.nodes[n]['mean_closeness'] for n in g.nodes],
    'closeness analítico': [cc[n] for n in g.nodes],
    'betweenness simulado': [g.nodes[n]['mean_betweenness'] for n in g.nodes],
    'betweenness analítico': [bc[n] for n in g.nodes],
    })
    return df
#     return df.sort_values(coluna, ascending=0)#.describe()

Cálculo de *closeness* e *betweenness* a partir das funções prontas da NetworkX, para comparação.

In [10]:
cc = nx.closeness_centrality(g)

bc = nx.betweenness_centrality(g)

Construção de data frame só para comparar mais facilmente.

E agora, vamos pensar um pouco...

* Onde você precisa mudar o código para usar uma *trajetória* que não seja a *geodésica*? (caminho, trilha, passeio)

* Onde você precisa mudar o código para usar uma *difusão* que não seja a *transferência*? (duplicação)

Considere então a seguinte **hipótese**:

>Quando consideramos outros tipos de trajetória e outros tipos de difusão, os nós com maior *closeness simulado* e *betweenness simulado* não são necessariamente os nós com maior *closeness* e *betweenness* segundo as fórmulas clássicas. (que correspondem ao uso de geodésica e transferência na simulação)

Queremos:

1. Operacionalização e teste dessas hipótese. (Objetivo 3)
2. Interpretação dos resultados na linguagem de Análise de Redes Sociais (Objetivo 4)

Um *feedback* da atividade sobre *coreness no Jazz* será dado em breve, para vocês terem uma melhor referência do item 2.

In [11]:
from scipy import stats

In [12]:
bm = simul(100, random_geodesic_successor, False)
bmc = bm["closeness simulado"]
bmb = bm["betweenness simulado"]
bm.describe()

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado
count,15.0,15.0,15.0,15.0
mean,0.115751,0.115751,0.411834,0.411834
std,0.136863,0.136289,0.074293,0.074293
min,0.0,0.0,0.285714,0.285714
25%,0.010989,0.010769,0.359211,0.359211
50%,0.087912,0.089066,0.4,0.4
75%,0.131868,0.131813,0.474713,0.474713
max,0.521978,0.521813,0.56,0.56


In [13]:
bm.sort_values('betweenness simulado', ascending=0)

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,0.521813,0.56,0.56,medici
3,0.260073,0.256923,0.466667,0.466667,guadagni
2,0.212454,0.208516,0.482759,0.482759,albizzi
5,0.142857,0.142857,0.388889,0.388889,salviati
8,0.120879,0.120769,0.4,0.4,bischeri
13,0.115385,0.115714,0.4375,0.4375,barbadori
7,0.091575,0.091868,0.482759,0.482759,tornabuon
9,0.086081,0.089066,0.482759,0.482759,ridolfi
14,0.087912,0.088352,0.388889,0.388889,castellan
11,0.075092,0.078846,0.424242,0.424242,strozzi


In [14]:
bm.sort_values('closeness simulado', ascending=0)

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,0.521813,0.56,0.56,medici
2,0.212454,0.208516,0.482759,0.482759,albizzi
7,0.091575,0.091868,0.482759,0.482759,tornabuon
9,0.086081,0.089066,0.482759,0.482759,ridolfi
3,0.260073,0.256923,0.466667,0.466667,guadagni
13,0.115385,0.115714,0.4375,0.4375,barbadori
11,0.075092,0.078846,0.424242,0.424242,strozzi
8,0.120879,0.120769,0.4,0.4,bischeri
5,0.142857,0.142857,0.388889,0.388889,salviati
14,0.087912,0.088352,0.388889,0.388889,castellan


In [15]:
gd = simul(100, random_geodesic_successor, True)
gdc = gd["closeness simulado"]
gdb = gd["betweenness simulado"]
gd.describe()

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado
count,15.0,15.0,15.0,15.0
mean,0.115751,0.311462,0.411834,0.411834
std,0.136863,0.364122,0.074293,0.074293
min,0.0,0.0,0.285714,0.285714
25%,0.010989,0.02728,0.359211,0.359211
50%,0.087912,0.246374,0.4,0.4
75%,0.131868,0.362363,0.474713,0.474713
max,0.521978,1.386538,0.56,0.56


In [16]:
gd.sort_values('betweenness simulado', ascending=0)

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,1.386538,0.56,0.56,medici
3,0.260073,0.692033,0.466667,0.466667,guadagni
2,0.212454,0.568791,0.482759,0.482759,albizzi
5,0.142857,0.406593,0.388889,0.388889,salviati
13,0.115385,0.318132,0.4375,0.4375,barbadori
8,0.120879,0.316099,0.4,0.4,bischeri
7,0.091575,0.246978,0.482759,0.482759,tornabuon
9,0.086081,0.246374,0.482759,0.482759,ridolfi
14,0.087912,0.230055,0.388889,0.388889,castellan
11,0.075092,0.205769,0.424242,0.424242,strozzi


In [17]:
gd.sort_values('closeness simulado', ascending=0)

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,1.386538,0.56,0.56,medici
2,0.212454,0.568791,0.482759,0.482759,albizzi
7,0.091575,0.246978,0.482759,0.482759,tornabuon
9,0.086081,0.246374,0.482759,0.482759,ridolfi
3,0.260073,0.692033,0.466667,0.466667,guadagni
13,0.115385,0.318132,0.4375,0.4375,barbadori
11,0.075092,0.205769,0.424242,0.424242,strozzi
8,0.120879,0.316099,0.4,0.4,bischeri
5,0.142857,0.406593,0.388889,0.388889,salviati
14,0.087912,0.230055,0.388889,0.388889,castellan


In [18]:
stats.ttest_ind(bmc,gdc)

Ttest_indResult(statistic=0.0, pvalue=1.0)

In [19]:
stats.ttest_ind(bmb,gdb)

Ttest_indResult(statistic=-1.9495873366410785, pvalue=0.06130505846536749)

In [20]:
pt = simul(100, random_path_successor, False)
ptc = pt["closeness simulado"]
ptb = pt["betweenness simulado"]
pt.describe()

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado
count,15.0,15.0,15.0,15.0
mean,0.115751,0.248619,0.411834,0.242545
std,0.136863,0.188477,0.074293,0.026557
min,0.0,0.0,0.285714,0.200979
25%,0.010989,0.071429,0.359211,0.227446
50%,0.087912,0.253352,0.4,0.242136
75%,0.131868,0.372225,0.474713,0.256957
max,0.521978,0.628407,0.56,0.300654


In [21]:
pt.sort_values('betweenness simulado', ascending=0)

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,0.628407,0.56,0.300654,medici
3,0.260073,0.406264,0.466667,0.273422,guadagni
14,0.087912,0.387527,0.388889,0.255311,castellan
8,0.120879,0.372637,0.4,0.249186,bischeri
11,0.075092,0.371813,0.424242,0.231126,strozzi
9,0.086081,0.360824,0.482759,0.259563,ridolfi
7,0.091575,0.331703,0.482759,0.258603,tornabuon
12,0.021978,0.253352,0.35,0.223766,peruzzi
13,0.115385,0.246154,0.4375,0.237003,barbadori
2,0.212454,0.227747,0.482759,0.251054,albizzi


In [22]:
pt.sort_values('closeness simulado', ascending=0)

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,0.628407,0.56,0.300654,medici
3,0.260073,0.406264,0.466667,0.273422,guadagni
9,0.086081,0.360824,0.482759,0.259563,ridolfi
7,0.091575,0.331703,0.482759,0.258603,tornabuon
14,0.087912,0.387527,0.388889,0.255311,castellan
2,0.212454,0.227747,0.482759,0.251054,albizzi
8,0.120879,0.372637,0.4,0.249186,bischeri
5,0.142857,0.142857,0.388889,0.242136,salviati
10,0.0,0.0,0.368421,0.240449,acciaiuol
13,0.115385,0.246154,0.4375,0.237003,barbadori


In [23]:
stats.ttest_ind(bmc,ptc)

Ttest_indResult(statistic=8.3102732977874538, pvalue=4.8339583834601444e-09)

In [24]:
stats.ttest_ind(bmb,ptb)

Ttest_indResult(statistic=-2.2124541631886769, pvalue=0.035261493050459052)

In [25]:
pd1 = simul(100, random_path_successor, True)
pdc = pd1["closeness simulado"]
pdb = pd1["betweenness simulado"]
pd1.describe()

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado
count,15.0,15.0,15.0,15.0
mean,0.115751,0.565941,0.411834,0.251751
std,0.136863,0.160751,0.074293,0.021453
min,0.0,0.254121,0.285714,0.20485
25%,0.010989,0.417088,0.359211,0.234145
50%,0.087912,0.613132,0.4,0.260596
75%,0.131868,0.681841,0.474713,0.26522
max,0.521978,0.823187,0.56,0.279277


In [26]:
pd1.sort_values('betweenness simulado', ascending=0)

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,0.823187,0.56,0.26612,medici
3,0.260073,0.742692,0.466667,0.26369,guadagni
9,0.086081,0.699451,0.482759,0.275591,ridolfi
7,0.091575,0.682802,0.482759,0.279277,tornabuon
11,0.075092,0.680879,0.424242,0.264672,strozzi
8,0.120879,0.651593,0.4,0.265409,bischeri
14,0.087912,0.619286,0.388889,0.256496,castellan
2,0.212454,0.613132,0.482759,0.260596,albizzi
13,0.115385,0.586374,0.4375,0.265032,barbadori
12,0.021978,0.524835,0.35,0.25348,peruzzi


In [27]:
pd1.sort_values('closeness simulado', ascending=0)

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
7,0.091575,0.682802,0.482759,0.279277,tornabuon
9,0.086081,0.699451,0.482759,0.275591,ridolfi
6,0.521978,0.823187,0.56,0.26612,medici
8,0.120879,0.651593,0.4,0.265409,bischeri
13,0.115385,0.586374,0.4375,0.265032,barbadori
11,0.075092,0.680879,0.424242,0.264672,strozzi
3,0.260073,0.742692,0.466667,0.26369,guadagni
2,0.212454,0.613132,0.482759,0.260596,albizzi
14,0.087912,0.619286,0.388889,0.256496,castellan
12,0.021978,0.524835,0.35,0.25348,peruzzi


In [28]:
stats.ttest_ind(bmc,pdc)

Ttest_indResult(statistic=8.017743970641396, pvalue=9.8927754462101546e-09)

In [29]:
stats.ttest_ind(bmb,pdb)

Ttest_indResult(statistic=-8.2732232065940199, pvalue=5.2896388052324323e-09)

In [30]:
tt = simul(100, random_trail_successor, False)
ttc = tt["closeness simulado"]
ttb = tt["betweenness simulado"]
tt.describe()

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado
count,15.0,15.0,15.0,15.0
mean,0.115751,0.246586,0.411834,0.240741
std,0.136863,0.195124,0.074293,0.02548
min,0.0,0.0,0.285714,0.197433
25%,0.010989,0.071429,0.359211,0.225408
50%,0.087912,0.267308,0.4,0.241368
75%,0.131868,0.33522,0.474713,0.25994
max,0.521978,0.704505,0.56,0.285168


In [31]:
tt.sort_values('betweenness simulado', ascending=0)

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,0.704505,0.56,0.285168,medici
3,0.260073,0.402527,0.466667,0.269881,guadagni
14,0.087912,0.375,0.388889,0.231549,castellan
8,0.120879,0.335769,0.4,0.228222,bischeri
9,0.086081,0.33467,0.482759,0.250678,ridolfi
11,0.075092,0.328736,0.424242,0.230025,strozzi
7,0.091575,0.315659,0.482759,0.247274,tornabuon
2,0.212454,0.267308,0.482759,0.269203,albizzi
13,0.115385,0.266978,0.4375,0.271373,barbadori
12,0.021978,0.22478,0.35,0.241368,peruzzi


In [32]:
tt.sort_values('closeness simulado', ascending=0)

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,0.704505,0.56,0.285168,medici
13,0.115385,0.266978,0.4375,0.271373,barbadori
3,0.260073,0.402527,0.466667,0.269881,guadagni
2,0.212454,0.267308,0.482759,0.269203,albizzi
9,0.086081,0.33467,0.482759,0.250678,ridolfi
7,0.091575,0.315659,0.482759,0.247274,tornabuon
5,0.142857,0.142857,0.388889,0.243159,salviati
12,0.021978,0.22478,0.35,0.241368,peruzzi
14,0.087912,0.375,0.388889,0.231549,castellan
11,0.075092,0.328736,0.424242,0.230025,strozzi


In [33]:
stats.ttest_ind(bmc,ttc)

Ttest_indResult(statistic=8.4368729414471169, pvalue=3.5580340351345788e-09)

In [34]:
stats.ttest_ind(bmb,ttb)

Ttest_indResult(statistic=-2.1290062618420067, pvalue=0.042186030136462362)

In [35]:
td = simul(100, random_trail_successor, True)
tdc = td["closeness simulado"]
tdb = td["betweenness simulado"]
td.describe()

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado
count,15.0,15.0,15.0,15.0
mean,0.115751,0.677278,0.411834,0.241711
std,0.136863,0.279481,0.074293,0.018113
min,0.0,0.264396,0.285714,0.202306
25%,0.010989,0.405907,0.359211,0.229347
50%,0.087912,0.683077,0.4,0.248907
75%,0.131868,0.887665,0.474713,0.254011
max,0.521978,1.254286,0.56,0.266139


In [36]:
td.sort_values('betweenness simulado', ascending=0)

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,1.254286,0.56,0.254913,medici
9,0.086081,0.92511,0.482759,0.262992,ridolfi
7,0.091575,0.921374,0.482759,0.266139,tornabuon
3,0.260073,0.890549,0.466667,0.255715,guadagni
11,0.075092,0.88478,0.424242,0.25139,strozzi
14,0.087912,0.817912,0.388889,0.24291,castellan
8,0.120879,0.779505,0.4,0.249762,bischeri
2,0.212454,0.683077,0.482759,0.248907,albizzi
13,0.115385,0.598681,0.4375,0.253109,barbadori
12,0.021978,0.586703,0.35,0.237539,peruzzi


In [37]:
td.sort_values('closeness simulado', ascending=0)

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
7,0.091575,0.921374,0.482759,0.266139,tornabuon
9,0.086081,0.92511,0.482759,0.262992,ridolfi
3,0.260073,0.890549,0.466667,0.255715,guadagni
6,0.521978,1.254286,0.56,0.254913,medici
13,0.115385,0.598681,0.4375,0.253109,barbadori
11,0.075092,0.88478,0.424242,0.25139,strozzi
8,0.120879,0.779505,0.4,0.249762,bischeri
2,0.212454,0.683077,0.482759,0.248907,albizzi
14,0.087912,0.817912,0.388889,0.24291,castellan
12,0.021978,0.586703,0.35,0.237539,peruzzi


In [38]:
stats.ttest_ind(bmc,tdc)

Ttest_indResult(statistic=8.6163194266791407, pvalue=2.3127537404839404e-09)

In [39]:
stats.ttest_ind(bmb,tdb)

Ttest_indResult(statistic=-6.9942169239565635, pvalue=1.3199298743884696e-07)

In [40]:
wt = simul(100, random_walk_successor, False)
wtc = wt["closeness simulado"]
wtb = wt["betweenness simulado"]
wt.describe()

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado
count,15.0,15.0,15.0,15.0
mean,0.115751,2.051,0.411834,0.036663
std,0.136863,1.162449,0.074293,0.001732
min,0.0,0.718516,0.285714,0.034197
25%,0.010989,1.164753,0.359211,0.035562
50%,0.087912,2.416978,0.4,0.036251
75%,0.131868,2.467555,0.474713,0.037359
max,0.521978,5.050604,0.56,0.04099


In [41]:
wt.sort_values('betweenness simulado', ascending=0)

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,5.050604,0.56,0.036041,medici
3,0.260073,3.267363,0.466667,0.035037,guadagni
9,0.086081,2.500989,0.482759,0.037437,ridolfi
7,0.091575,2.482527,0.482759,0.036251,tornabuon
11,0.075092,2.452582,0.424242,0.036272,strozzi
2,0.212454,2.436154,0.482759,0.037059,albizzi
8,0.120879,2.43478,0.4,0.036012,bischeri
14,0.087912,2.416978,0.388889,0.035109,castellan
13,0.115385,1.606429,0.4375,0.035287,barbadori
12,0.021978,1.567143,0.35,0.034197,peruzzi


In [42]:
wt.sort_values('closeness simulado', ascending=0)

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
4,0.0,0.718516,0.285714,0.04099,pazzi
1,0.0,0.753571,0.325581,0.038732,lambertes
10,0.0,0.776374,0.368421,0.038401,acciaiuol
9,0.086081,2.500989,0.482759,0.037437,ridolfi
0,0.0,0.747857,0.333333,0.037281,ginori
2,0.212454,2.436154,0.482759,0.037059,albizzi
11,0.075092,2.452582,0.424242,0.036272,strozzi
7,0.091575,2.482527,0.482759,0.036251,tornabuon
6,0.521978,5.050604,0.56,0.036041,medici
8,0.120879,2.43478,0.4,0.036012,bischeri


In [43]:
stats.ttest_ind(bmc,wtc)

Ttest_indResult(statistic=19.552771002118625, pvalue=7.3351411335073947e-18)

In [44]:
stats.ttest_ind(bmb,wtb)

Ttest_indResult(statistic=-6.4038919162246106, pvalue=6.2164986716400501e-07)

In [45]:
wd = simul(100, random_walk_successor, True)
wdc = wd["closeness simulado"]
wdb = wd["betweenness simulado"]
wd.describe()

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado
count,15.0,15.0,15.0,15.0
mean,0.115751,2.394344,0.411834,0.154397
std,0.136863,1.676575,0.074293,0.013179
min,0.0,0.587088,0.285714,0.127472
25%,0.010989,1.140714,0.359211,0.144943
50%,0.087912,2.432418,0.4,0.153653
75%,0.131868,2.876676,0.474713,0.165615
max,0.521978,6.959066,0.56,0.171596


In [46]:
wd.sort_values('betweenness simulado', ascending=0)

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
6,0.521978,6.959066,0.56,0.170869,medici
3,0.260073,4.492857,0.466667,0.165853,guadagni
14,0.087912,3.129451,0.388889,0.149206,castellan
2,0.212454,3.014451,0.482759,0.159935,albizzi
8,0.120879,2.738901,0.4,0.152015,bischeri
11,0.075092,2.71978,0.424242,0.156242,strozzi
9,0.086081,2.562582,0.482759,0.171596,ridolfi
7,0.091575,2.432418,0.482759,0.171451,tornabuon
5,0.142857,1.841429,0.388889,0.142261,salviati
12,0.021978,1.675879,0.35,0.144701,peruzzi


In [47]:
wd.sort_values('closeness simulado', ascending=0)

Unnamed: 0,betweenness analítico,betweenness simulado,closeness analítico,closeness simulado,família
9,0.086081,2.562582,0.482759,0.171596,ridolfi
7,0.091575,2.432418,0.482759,0.171451,tornabuon
6,0.521978,6.959066,0.56,0.170869,medici
3,0.260073,4.492857,0.466667,0.165853,guadagni
13,0.115385,1.476099,0.4375,0.165376,barbadori
2,0.212454,3.014451,0.482759,0.159935,albizzi
11,0.075092,2.71978,0.424242,0.156242,strozzi
10,0.0,0.587088,0.368421,0.153653,acciaiuol
8,0.120879,2.738901,0.4,0.152015,bischeri
14,0.087912,3.129451,0.388889,0.149206,castellan


In [48]:
stats.ttest_ind(bmc,wdc)

Ttest_indResult(statistic=13.214199738152377, pvalue=1.4835214927520742e-13)

In [49]:
stats.ttest_ind(bmc,wdb)

Ttest_indResult(statistic=-4.5752201329396991, pvalue=8.8517679262207902e-05)

A hiopotese nula (as duas amostras sao iguais) é rejeitada quando o p-value é menor que 0.05.

Dessa forma, a única situação onde não há rejeição da hipótese nula (tanto no closeness quanto no betweeness) é no caso de geodésica com difusão, uma vez que seu p-valor é maior que 5%.

Com isso, é possível afirmar que há diferença entre as simulações.

Se ordenarmos as familias por closeness e depois por betweenness percebe-se para closeness a ordem sofre quando comparada ao betweennes onde há familias quase sem alteracoes na sua posicao.

Closenees
é mais dificil achar um contancia e ele altera em praticamente todas as simulacoes.
Ao analisar o closeness a familia medici se mantem no topo, sendo uma exceção junto com a familia pazzi que esta sempre na ultima posicao.
Uma suposicao pra essa ocorrencia poderia ser a localizacao dessas familias no grafos, enquanto a medici tem muitas conexoes (degree alto) a pazzi tem somente uma a qual faz poucas conexoes

Betweeness
é mais constante as posicoes nas simulacoes, com medice tambem no topo e acciaiuol no final.
as familias no topo estao em sua maioria no centro do grafo enquanto as que possuem um valor de betweennees menor estao na periferia.
