In [2]:
import pandas as pd
import networkx as nx
import igraph as ig
import numpy as np
import matplotlib.pyplot as plt
try:
    import leidenalg as la
except ModuleNotFoundError:
        !conda install leidenalg

import pickle

from community import community_louvain as louvain # pip install python-louvain


CUSTOM_FORMAT = {"font_size": 8,
                "node_size": 80, "node_color": "#ff0000", "edgecolors": "#911a07",
                "width": 0.4, "edge_color": "black"}

In [3]:
with open("../../state_files/PyPi Dataframe V4.pickle", "rb") as f:
    df = pickle.load(f)

G = nx.read_gexf('../../state_files/PyPi Network V4.gexf')
G_ig = ig.Graph.TupleList(G.edges(), directed=True) # Mejor manera que la que teníamos. Conserva nombres.

In [23]:
G_ig

<igraph.Graph at 0x158f0344740>

# Comunidades

In [3]:
def clusters_to_list(clusters, G) -> dict:
    """
    Arma un diccionario con los nombres de los nodos
    como keys y el id del cluster como valor.
    """
    dic = {}
    clusters_list = []
    for i, cluster in enumerate(sorted(list(clusters), key = len, reverse = True)):
        nodes_in_cluster = []
        for node in cluster:
            nodes_in_cluster.append(G.vs[node]['_nx_name'])
        clusters_list.append(nodes_in_cluster)
    return clusters_list

In [4]:
# Louvain no se puede hacer porque no funciona para redes dirigidas.
# Fast Greedy tampoco se puede hacer en redes dirigidas.

infomap = False
edge_betweenes = False
leidenalg = True

# Infomap.
if infomap:
    try:
        with open("com_im.pickle", "rb") as f:
            com_im = pickle.load(f)

    except FileNotFoundError:
        com_im = G_ig.community_infomap()
        with open("com_im.pickle", "wb") as f:
            pickle.dump(com_im, f)
        print("Infomap done.")

# Edge Betweeness
if edge_betweenes:
    try:
        with open("com_bt.pickle", "rb") as f:
            com_bt = pickle.load(f)
    except FileNotFoundError:
        com_bt = G_ig.community_edge_betweenness(clusters = None, directed = True, weights = None)
        with open("com_bt.pickle", "wb") as f:
            pickle.dump(com_bt, f)
        print("Edge Betweeness done.")

# Leiden Algorithm
if leidenalg:
    try:
        with open("com_la.pickle", "rb") as f:
            com_la = pickle.load(f)
    except FileNotFoundError:
        com_la = la.find_partition(G_ig, la.ModularityVertexPartition)
        # with open("com_la.pickle", "wb") as f:
        #     pickle.dump(com_la, f)
        print("Leiden Algorithm done.")


Leiden Algorithm done.


# LeidenAlg

In [5]:
def get_communities(com_ig) -> list:
    
    communities = []
    for com in com_ig:
        communities.append(com)

    communities = sorted(communities, reverse=True, key=len)
    
    return communities

In [6]:
communities = get_communities(com_la)

In [None]:
G_ig_com0 = G_ig.subgraph(communities[0])
G_com0 = nx.subgraph(G, G_ig_com0.vs['name'])

# Infomap

In [41]:
communities = []
for com in com_im:
    communities.append(com)
communities = sorted(communities, reverse=True, key=len)

print('Tamanio de comunidades:')
print('-----------------------')
for com in communities[:10]:
    print(len(com))

Tamanio de comunidades:
-----------------------
100563
475
383
319
239
223
192
187
163
150


In [45]:
G_ig_com0 = G_ig.subgraph(communities[0])

In [52]:
com_im_com0 = G_ig_com0.community_infomap()

communities_com0 = []
for com in com_im_com0:
    communities_com0.append(com)
communities_com0 = sorted(communities_com0, reverse=True, key=len)

print('Tamanio de comunidades:')
print('-----------------------')
for com in communities_com0[:10]:
    print(len(com))

Tamanio de comunidades:
-----------------------
99502
5
5
4
4
3
3
3
3
3


In [53]:
G_ig_com0_com0 = G_ig.subgraph(communities_com0[0])
com_im_com0_com0 = G_ig_com0_com0.community_infomap()

communities_com0_com0 = []
for com in com_im_com0_com0:
    communities_com0_com0.append(com)
communities_com0_com0 = sorted(communities_com0_com0, reverse=True, key=len)

print('Tamanio de comunidades:')
print('-----------------------')
for com in communities_com0_com0[:10]:
    print(len(com))

Tamanio de comunidades:
-----------------------
64280
475
192
184
170
162
121
120
75
70


In [36]:
nodes_com0 = communities[0]
G_com0 = nx.subgraph(G, nodes_com0)

### Código de Eitu

In [8]:
clusters = com_fg.as_clustering()
communities = clusters_to_list(clusters, G_ig)
for i in range(15):
    print(len(communities[i]))
nodes_to_plot = communities[10]
G_com = G.subgraph(nodes_to_plot)
nx.draw(G_com, labels={i: str(i) for i in range(len(list(G_com.nodes())))}, **CUSTOM_FORMAT)
fig, axs = plt.subplots(2, 2)
axs = axs.flatten()
for i, com_ix in enumerate(range(8, 12)):
    nodes_to_plot = communities[com_ix]
    G_com = G.subgraph(nodes_to_plot)
    nx.draw_kamada_kawai(G_com, ax=axs[i], **CUSTOM_FORMAT)

TypeError: 'Graph' object is not iterable