In [1]:
import torch
import random
import numpy as np
from sklearn.utils import check_random_state
import pandas as pd


random.seed(42)

# Set the random seed for numpy
np.random.seed(42)

# Set the random seed for pandas
# pd.np.random.seed(42)

# Set the random seed for scikit-learn
check_random_state(42)

# Set the random seed for pytorch
torch.manual_seed(42)

<torch._C.Generator at 0x7f16063c4070>

In [2]:
from src.load_data import get_data
from src.EchoGAE import EchoGAE_algorithm
from src.echo_chamber_measure import EchoChamberMeasure


# TODO: Work on baselines
from networkx.algorithms.community import asyn_fluidc
from src.baselines.RWC_jit import RWC
# from src.baselines.RWC import RWC # If you don't have numba installed use this line instead but it will be slower

from src.baselines.polarization_index import add_ideology_to_graph, opinion_model, get_polarization_index

  from .autonotebook import tqdm as notebook_tqdm


In [23]:
datasets = ["gun", "abortion", "super_bowl", "sxsw"]
ds = datasets[3]

print("Dataset: ", ds)

Dataset:  sxsw


# Echo Chamber Score:

In [24]:
G, users_embeddings, labels, allsides_scores, node_id_map = get_data(f"data/{ds}/")

In [25]:
print("Number of nodes: ", G.number_of_nodes())
print("Number of edges: ", G.number_of_edges())

print("")
num_of_communities = labels.max() + 1
print("Number of communities: ", num_of_communities)

for i in range(num_of_communities):
    print(f"Number of nodes in community {i}: ", (labels == i).sum())

print("")
print("Number of users with AllSides score: ", len(allsides_scores))

Number of nodes:  2436
Number of edges:  5325

Number of communities:  6
Number of nodes in community 0:  1532
Number of nodes in community 1:  34
Number of nodes in community 2:  85
Number of nodes in community 3:  717
Number of nodes in community 4:  54
Number of nodes in community 5:  14

Number of users with AllSides score:  200


In [37]:
user_emb = EchoGAE_algorithm(G, user_embeddings= users_embeddings,show_progress=True, hidden_channels = 20, out_channels=10, epochs=300)

Epoch: 001, AUC: 0.7465, AP: 0.7775
Epoch: 002, AUC: 0.7641, AP: 0.7877
Epoch: 003, AUC: 0.7707, AP: 0.7915
Epoch: 004, AUC: 0.7652, AP: 0.7891
Epoch: 005, AUC: 0.7542, AP: 0.7835
Epoch: 006, AUC: 0.7392, AP: 0.7761
Epoch: 007, AUC: 0.7275, AP: 0.7706
Epoch: 008, AUC: 0.7215, AP: 0.7682
Epoch: 009, AUC: 0.7198, AP: 0.7680
Epoch: 010, AUC: 0.7211, AP: 0.7691
Epoch: 011, AUC: 0.7230, AP: 0.7706
Epoch: 012, AUC: 0.7259, AP: 0.7734
Epoch: 013, AUC: 0.7294, AP: 0.7765
Epoch: 014, AUC: 0.7368, AP: 0.7817
Epoch: 015, AUC: 0.7559, AP: 0.7925
Epoch: 016, AUC: 0.7870, AP: 0.8101
Epoch: 017, AUC: 0.8194, AP: 0.8307
Epoch: 018, AUC: 0.8412, AP: 0.8465
Epoch: 019, AUC: 0.8531, AP: 0.8563
Epoch: 020, AUC: 0.8590, AP: 0.8614
Epoch: 021, AUC: 0.8624, AP: 0.8645
Epoch: 022, AUC: 0.8654, AP: 0.8662
Epoch: 023, AUC: 0.8678, AP: 0.8663
Epoch: 024, AUC: 0.8680, AP: 0.8643
Epoch: 025, AUC: 0.8648, AP: 0.8595
Epoch: 026, AUC: 0.8579, AP: 0.8529
Epoch: 027, AUC: 0.8534, AP: 0.8497
Epoch: 028, AUC: 0.8544, AP:

In [38]:
ecm = EchoChamberMeasure(user_emb, labels)

In [39]:
ecm.echo_chamber_index()

0.46833934688724504

# Baselines:

## 1. Random Walk Controversy Score

In [40]:
mem = asyn_fluidc(G, k=2, seed=42, max_iter=1000)
mem = list(mem)

nodes_0 = np.array(list(mem[0]))
nodes_1 = np.array(list(mem[1]))

In [41]:
RWC(G, nodes_0, nodes_1)

0.485014087571752

## 2. Polarization Index:

In [42]:
def add_ideology_to_graph_22(G, ideologies_dict):
    G = G.copy()
    core_nodes = []
    for node in G.nodes():
        if node in ideologies_dict:
            G.nodes[node]["ideology"] = ideologies_dict[node]
            core_nodes.append(node)
        else:
            G.nodes[node]["ideology"] = 0
    return G, core_nodes

In [43]:
G_ven, core_nodes = add_ideology_to_graph_22(G, allsides_scores)
opinions = opinion_model(G_ven, core_nodes=core_nodes)
polarization_index = get_polarization_index(opinions)[0]

In [44]:
polarization_index

0.0015141484435409224