In [1]:
import igraph as ig
import scipy.sparse
import re
from collections import Counter

from maxent_graph import bicm, poibin

ModuleNotFoundError: No module named 'maxent_graph'

Let's try projecting a bipartite network of senators and bills onto the senators. The data comes from the 116th congress, https://github.com/unitedstates/congress/wiki/bills

Read the graph in and construct a bipartite matrix using igraph.

In [None]:
gn = "my_senate_116_bipartite"
g = ig.read(f"../data/{gn}.graphml", format="graphml")
assert g.is_bipartite()
B = scipy.sparse.csr_matrix(g.get_incidence()[0])

In [None]:
B

By convention we project onto the rows. In this case I want the senators, who are on the columns currently. So, I transpose.

In [None]:
B = B.T

Solve the equations to get the fitnesses for each node on both sides.

In [None]:
# sol_bundle = bicm.solve_equations_kitchen_sink(B)
sol_bundle = bicm.solve_equations(B, initial_guess_option=2, method="lm")

In [None]:
B.shape

Using the previously computed fitnesses, construct the projection using a particular p-value. To keep a record of *how* significant the edge is according to the null model, we keep the 'surprise' of the edge as a weight.

In [None]:
new_A = bicm.construct_projection(B, sol_bundle, p_val=0.05)

Let's visualize the results using igraph. Getting this weighted adjacency matrix into igraph is a bit annoying but here goes.. 

https://igraph.org/python/doc/tutorial/tutorial.html#layouts-and-plotting

In [None]:
sources, targets = new_A.nonzero()
edgelist = list(zip(sources.tolist(), targets.tolist()))

# https://github.com/igraph/python-igraph/issues/168
# zzz
new_g = ig.Graph(new_A.shape[0], edgelist)

new_g.es['surprise'] = new_A[new_A.nonzero()].toarray()[0]

In [None]:
(new_g.vcount(), new_g.ecount())

In [None]:
# l = new_g.layout_kamada_kawai()
# l = new_g.layout_lgl()
l = new_g.layout_fruchterman_reingold(weights=new_g.es["surprise"])

# trash
# l = new_g.layout_drl(weights=new_g.es["surprise"])

Leiden is a good quick community detection algorithm.

In [None]:
clusters = new_g.community_leiden(objective_function='CPM',
                              weights=new_g.es["surprise"],
                              resolution_parameter=0.5, beta=0.01,
                              initial_membership=None, n_iterations=10, node_weights=None)

In [None]:
for c in clusters:
    print(c)

In [None]:
# https://carto.com/carto-colors/
PASTEL_CARTO = ['#66C5CC','#F6CF71','#F89C74','#DCB0F2','#87C55F','#9EB9F3',
                '#FE88B1','#C9DB74','#8BE0A4','#B497E7','#D3B484','#B3B3B3']

In [None]:
assert len(clusters) <= len(PASTEL_CARTO)

In [None]:
community_assignment = [0 for _ in range(new_g.vcount())]

for i, c in enumerate(clusters):
    for v in c:
        community_assignment[v] = i

In [None]:
new_g.vs['color'] = [PASTEL_CARTO[c] for c in community_assignment]

In [None]:
party_re = re.compile(".*\(..-(D|R|I)\)")

party = []
senator_names = g.vs['name'][:(B.shape[0])]
for n in senator_names:
    m = party_re.match(n)
    if m is None:
        print(n)
    p = m.group(1)
    party.append(p)
    
Counter(party)

In [None]:
senator_names[0]

In [None]:
def map_party(p):
    if p == 'R':
        return 'square'
    elif p == 'D':
        return 'triangle'
    elif p == 'I':
        return 'circle'
    assert False

In [None]:
shapes = [map_party(p) for p in party]

In [None]:
shapes[:5]

In [None]:
new_g.vs['shape'] = shapes
new_g.vs['label'] = senator_names
new_g.vs['label_size'] = [7 for v in new_g.vs]

In [None]:
ig.plot(new_g, layout=l, vertex_size=10)