In [None]:
import networkx as nx 

G = nx.read_gml('graphs/G_dir_alpha0.8_beta1.0.gml')

In [None]:
import numpy as np
def edge_color(ar):
    ret = np.zeros((ar.shape[-1], 4))
    ret[:, -1] = ar 
    return ret 

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from collections import Counter 
def plot_community(G, partition=None, min_elem=None,  **kwargs):
    G_com = G.copy()
    node_to_del = []

    # Keep large community (>= min_elem)
    if min_elem is not None and partition is not None:
        for community in set(partition.values()):
            community_nodes = [node for node in partition.keys() if partition[node] == community]
            if len(community_nodes) < min_elem:
                node_to_del.extend(community_nodes)

    # Retrieve graph and parition contains node in large community
    G_com.remove_nodes_from(node_to_del)
    

    plt.figure(figsize=(6.4*2, 4.8*2), dpi=500)

    # Define positions for the nodes
    pos = nx.spring_layout(G, weight='weight', **kwargs)
    pos_com = {node:pos[node] for node in G_com.nodes()}


    # Extract edge weights and colors
    # edge_weights = [(u, v, G[u][v]['weight']) for u, v in G.edges()]
    edge_weights = np.array([G_com[u][v]['weight'] for u, v in G_com.edges()])
    max_weight = max(edge_weights)

    edge_cm = plt.get_cmap('Greys')

    # Draw the graph with edge labels and edge colors based on weight
    if partition:
        common_partition = {k:v for k,v in partition.items() if k not in node_to_del} 
        centers = []
        # Get center 
        for community in set(common_partition.values()):
            community_nodes = [node for node in common_partition.keys() if partition[node] == community]
            G_sub = G_com.subgraph(community_nodes)
            d_c = nx.degree_centrality(G_sub)
            center = max(d_c, key=d_c.get)
            centers.append(center)
            

        node_cm = plt.get_cmap('gist_rainbow', max(partition.values()) + 1)

        # Draw nodes, edges, center labels
        nx.draw_networkx(
            G_com, pos_com, 
            edge_color=edge_color(1.*edge_weights), 
            width=1, node_size=2, 
            cmap=node_cm, node_color=list(common_partition.values()),
            labels = dict(zip(centers, centers)),  font_size=5, font_color='blue'
        )

        # Draw center nodes
        nx.draw_networkx_nodes(
            G_com,
            nodelist=centers, 
            pos = {node:pos_com[node] for node in centers}, 
            cmap=node_cm, node_color=[common_partition[node] for node in centers],
            node_size=50, edgecolors = 'black', alpha=0.5
        )
    else:
        nx.draw(
            G_com, pos_com, with_labels=False, 
            edge_color=edge_color(1.*edge_weights), 
            width=1, node_size=2
        )

    # Draw edge labels
    # edge_labels = {(u, v): d['weight'] for u, v, d in G.edges(data=True)}
    # nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=12)

    # Show the graph
    plt.show()

    return G_com


# from community import community_louvain
# plot_community(G, seed=7)
# partition = community_louvain.best_partition(G)

# plot_community(G, partition, seed=7)

# plot_community(G, partition, 5, seed=7)


In [None]:

edge_weights = np.array([G_flex[u][v]['weight'] for u, v in G_flex.edges()])
max_weight = max(edge_weights)

edge_color(edge_weights)

In [None]:

from community import community_louvain
# plot_community(G_flex, seed=7)
partition = community_louvain.best_partition(G)

plot_community(G, partition, seed=7)

plot_community(G, partition, 10, seed=7)

plot_community(G, partition, 20, seed=7)

plot_community(G, partition, 30, seed=7)

# Legacy


In [None]:
# nodes: author - main focus (max apperance in paper)
# edges: author1 - author2 - co-occur
import functools as ft
with open('subjects.txt', 'r') as f:
    data = f.readlines()

def map_string(s):
    s = s.strip().split(',')

    s = list(map(lambda x: x.strip().split(), s)) 

    s = ft.reduce(lambda x,y: x+y, s, [])

    for i in range(len(s)):
        if s[i].isdigit():
            break 

    subject, papers= ' '.join(s[:i]), ' '.join(s[i:])
    return subject, papers


data = list(map(map_string, data))
import pandas as pd 

df = pd.DataFrame(dict(
    subject=[d[0] for d in data],
    papers=[d[1] for d in data]
))

df.to_csv('subjects.csv', index=0)

In [None]:
import pandas as pd 
df = pd.read_csv('authors.csv')
df['first'] = df.author.apply(lambda x: x.split(',')[0].strip().upper())
df['last'] = df.author.apply(lambda x: x.split(',')[1].strip().upper())


author_list = df[['first', 'last']]

author_list = author_list.drop_duplicates(subset=['first', 'last']).sort_values(by=['first', 'last'])

author_list['change_group'] = author_list['first'] != author_list.shift()['first']

author_list['group_name'] = author_list['change_group'].cumsum()

# author_list.drop(columns='change_group').to_csv('author_uniq.csv', index=0)