In [3]:
import networkx as nx
import matplotlib.pyplot as plt
import json
import numpy as np

In [4]:
# let's try to use this twitter dataset
# it's in data/congress-twitter
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10493874/

def load_congress_data_from_json(json_file_path):
    with open(json_file_path, 'r') as file:
        data = json.load(file)
    data = data[0]

    inList = data['inList']
    inWeight = data['inWeight']
    outList = data['outList']
    outWeight = data['outWeight']
    usernameList = data['usernameList']

    return inList, inWeight, outList, outWeight, usernameList

def create_congress_graph(inList, inWeight, outList, outWeight, usernameList):
    G = nx.DiGraph()
    for i, username in enumerate(usernameList):
        G.add_node(i, username=username)
    for i in range(len(inList)):
        for j, in_node in enumerate(inList[i]):
            G.add_edge(in_node, i, weight=inWeight[i][j])
        for j, out_node in enumerate(outList[i]):
            G.add_edge(i, out_node, weight=outWeight[i][j])
    return G

In [5]:
inList, inWeight, outList, outWeight, usernameList = load_congress_data_from_json('data/congress-twitter/congress_network_data.json')
G = create_congress_graph(inList, inWeight, outList, outWeight, usernameList)
print(G)

DiGraph with 475 nodes and 13289 edges


In [6]:
# sanity checks
print(G.number_of_nodes())
print(G.number_of_edges())

# from congress.edgelist,
# should be 0 4 {'weight': 0.002105263157894737}
# and 4 0 {'weight': 0.0036496350364963502}
print(G.nodes[0])
print(G.nodes[4])
print(G.get_edge_data(0, 4))
print(G.get_edge_data(4, 0))

475
13289
{'username': 'SenatorBaldwin'}
{'username': 'SenBlumenthal'}
{'weight': 0.002105263157894737}
{'weight': 0.0036496350364963502}


In [93]:
# try implementing model from the community detection paper
def community_detection(G, alpha=0.1, R=1.0, delta=0.1):
    n = len(G.nodes())
    x0 = np.random.rand(n) # generate random opinions
    print('x0 =', x0)
    x = x0.copy()
    # in the future, let's use their method of generating opinions
    rho = 1 - alpha*delta # rho is convergence rate
    A = nx.to_numpy_array(G) # adjacency matrix

    Ni_0 = [np.where(A[i]!=0)[0] for i in range(n)] # compute neighbor sets

    t = 0
    while True:
        t += 1

        # update confidence neighborhood
        # this is wrong
        Ni = Ni_0.copy()
        for i in range(n):
            for j in Ni_0[i]:
                if abs(x[i] - x[j]) > R*rho**t:
                    Ni[i] = np.delete(Ni[i], np.where(Ni[i]==j)[0][0])

        x_prev = x.copy()

        if t % 10000 == 0:
            print('t =', t)
        # update opinions 
        for i in range(n):
            if len(Ni[i]) > 0:
                x[i] = x[i] + alpha * np.mean(x[Ni[i]] - x[i])
                if t % 10000 == 0:
                    print('x[{}] = {}'.format(i, x[i]))
                    
        if np.max(np.abs(x - x_prev)) < 1e-12:
            print('converged at t =', t)
            print('x =', x)
            break
        # update rho 
        rho *= rho
    
    # generate final communities
    communities = {}
    for i, opinion in enumerate(x):
        for key in communities.keys():
            if abs(key - opinion) < 1e-4:
                communities[key].append(i)
                break
        else:
            communities[opinion] = [i]
    communities_list = list(communities.values())
    communities_list.sort(key=len, reverse=True)
    return communities_list

In [94]:
karate_club = nx.karate_club_graph()
print(karate_club)
communities_list = community_detection(karate_club)
print(communities_list)

Graph named "Zachary's Karate Club" with 34 nodes and 78 edges
x0 = [0.69907103 0.3634365  0.04987303 0.62022632 0.28551137 0.79392739
 0.38909158 0.00630134 0.72451832 0.62168237 0.88047167 0.69761287
 0.26129732 0.82010641 0.72457774 0.51175075 0.00971304 0.75943874
 0.89375495 0.34122449 0.97441369 0.01220099 0.54819316 0.81448049
 0.56320354 0.78448477 0.08602732 0.35991089 0.66137208 0.29552046
 0.12180801 0.32183769 0.16783848 0.96258837]
converged at t = 8
x = [0.61035183 0.37876098 0.23276858 0.52545447 0.42157922 0.68151504
 0.41954991 0.18155028 0.60455752 0.58246022 0.7483835  0.66583291
 0.41071997 0.70507718 0.67182634 0.53270955 0.22197049 0.65033763
 0.78488261 0.45074212 0.82747804 0.21346459 0.55422843 0.69849288
 0.5310756  0.69141914 0.2889177  0.45909234 0.5860228  0.37996418
 0.28825429 0.4371561  0.32584467 0.78706912]
[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], 

In [95]:
subgraph = G.subgraph([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
communities = community_detection(G)
print(communities)

x0 = [0.59886956 0.99698614 0.59453348 0.23621477 0.17371208 0.19381058
 0.52596697 0.7860803  0.53979627 0.16767861 0.03966246 0.83293405
 0.40560284 0.11731636 0.43344755 0.28816712 0.03615742 0.37933049
 0.88047524 0.16290633 0.66582554 0.62763997 0.2381386  0.96606822
 0.15317465 0.99253332 0.70984276 0.63224392 0.47665071 0.8391338
 0.94645736 0.92224846 0.64690438 0.35042938 0.68637622 0.53072343
 0.10464374 0.3605196  0.11158007 0.24611246 0.12743077 0.2922116
 0.35370483 0.00878736 0.90436207 0.41019165 0.1088725  0.10042015
 0.77055449 0.24232182 0.95313606 0.69277065 0.54429593 0.83600529
 0.64193057 0.05579075 0.9366455  0.51772098 0.65293201 0.24433729
 0.70448498 0.44710328 0.31460324 0.17092703 0.68884628 0.74008845
 0.73687049 0.31763887 0.7679834  0.06381189 0.0495859  0.87414663
 0.45855297 0.8246143  0.24016295 0.30317097 0.46119947 0.79303853
 0.14369566 0.51180392 0.44325285 0.12651785 0.13126197 0.508102
 0.1951951  0.45555951 0.57332936 0.29115867 0.5450217  0.908

In [84]:
A = nx.to_numpy_array(G)
print(A[0])

[0.         0.         0.         0.         0.00210526 0.
 0.         0.         0.         0.         0.         0.
 0.00210526 0.         0.         0.         0.         0.
 0.00210526 0.         0.         0.         0.         0.
 0.         0.00421053 0.         0.         0.         0.
 0.00210526 0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.00631579 0.
 0.         0.         0.         0.         0.         0.
 0.         0.00210526 0.         0.         0.00210526 0.00421053
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.00210526 0.         0.00421053 0.00210526
 0.         0.         0.         0.         0.         0.
 0.         0.00210526 0.00210526 0.00631579 0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.    