In [3]:
import networkx as nx
import matplotlib.pyplot as plt
import json
import numpy as np

In [4]:
# let's try to use this twitter dataset
# it's in data/congress-twitter
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10493874/

def load_congress_data_from_json(json_file_path):
    with open(json_file_path, 'r') as file:
        data = json.load(file)
    data = data[0]

    inList = data['inList']
    inWeight = data['inWeight']
    outList = data['outList']
    outWeight = data['outWeight']
    usernameList = data['usernameList']

    return inList, inWeight, outList, outWeight, usernameList

def create_congress_graph(inList, inWeight, outList, outWeight, usernameList):
    G = nx.DiGraph()
    for i, username in enumerate(usernameList):
        G.add_node(i, username=username)
    for i in range(len(inList)):
        for j, in_node in enumerate(inList[i]):
            G.add_edge(in_node, i, weight=inWeight[i][j])
        for j, out_node in enumerate(outList[i]):
            G.add_edge(i, out_node, weight=outWeight[i][j])
    return G

In [5]:
inList, inWeight, outList, outWeight, usernameList = load_congress_data_from_json('data/congress-twitter/congress_network_data.json')
G = create_congress_graph(inList, inWeight, outList, outWeight, usernameList)
print(G)

DiGraph with 475 nodes and 13289 edges


In [6]:
# sanity checks
print(G.number_of_nodes())
print(G.number_of_edges())

# from congress.edgelist,
# should be 0 4 {'weight': 0.002105263157894737}
# and 4 0 {'weight': 0.0036496350364963502}
print(G.nodes[0])
print(G.nodes[4])
print(G.get_edge_data(0, 4))
print(G.get_edge_data(4, 0))

475
13289
{'username': 'SenatorBaldwin'}
{'username': 'SenBlumenthal'}
{'weight': 0.002105263157894737}
{'weight': 0.0036496350364963502}


In [42]:
# try implementing model from the community detection paper
def community_detection(G, alpha=0.1, R=1.0, delta=0.2):
    n = len(G.nodes())
    x0 = np.random.rand(n) # generate random opinions
    print('x0 =', x0)
    x = x0.copy()
    # in the future, let's use their method of generating opinions
    rho = 1 - alpha*delta # rho is convergence rate
    A = nx.to_numpy_array(G) # adjacency matrix

    t = 0
    while True:
        t += 1
        Ni = [np.where(A[i]!=0)[0] for i in range(n)] # compute neighbor sets

        x_prev = x.copy()

        if t % 10000 == 0:
            print('t =', t)
        # update opinions 
        for i in range(n):
            if len(Ni[i]) > 0:
                x[i] = x[i] + alpha * np.mean(x[Ni[i]] - x[i])
                if t % 10000 == 0:
                    print('x[{}] = {}'.format(i, x[i]))
                    
        if np.max(np.abs(x - x_prev)) < 1e-12:
            print('converged at t =', t)
            print('x =', x)
            break
        # update rho 
        rho *= rho
    
    x = [0.12741559, 0.1274119, 0.10524521, 0.17017279, 0.14870213, 0.17017279, 0.12741577, 0.12723148, 0.12716827, 0.12722516]
    print('x =', x)
    # generate final communities
    communities = {}
    for i, opinion in enumerate(x):
        for key in communities.keys():
            if abs(key - opinion) < 1e-4:
                communities[key].append(i)
                break
        else:
            communities[opinion] = [i]
    communities_list = list(communities.values())
    communities_list.sort(key=len, reverse=True)
    return communities_list

In [43]:
subgraph = G.subgraph([0, 4, 5, 6, 7, 8, 9, 10, 11, 12])
communities = community_detection(subgraph)
print(communities)

x0 = [0.54917165 0.69575432 0.17767742 0.29325098 0.28559031 0.3181052
 0.34854873 0.09052964 0.88956782 0.45996672]
converged at t = 1246
x = [0.22448668 0.22448668 0.17767742 0.3181052  0.27129594 0.3181052
 0.22448668 0.22448668 0.22448668 0.22448668]
x = [0.12741559, 0.1274119, 0.10524521, 0.17017279, 0.14870213, 0.17017279, 0.12741577, 0.12723148, 0.12716827, 0.12722516]
[[0, 1, 6], [7, 8, 9], [3, 5], [2], [4]]
