In [None]:
import scipy.io
mat = scipy.io.loadmat('data/nips12raw_str602.mat')

mat.keys()

In [None]:
import os 

# You can change graph name here
graph_name = "G_dir_alpha0.8_08-12"

graph_path = f'graphs/{graph_name}.gml'
vis_path = f'pdf/vis/{graph_name}' 
os.makedirs(vis_path, exist_ok=True)

# Run visualization

In [None]:
import networkx as nx 
import numpy as np
from collaboration_network import utils 

# Load the graph
G = nx.read_gml(graph_path)

weights = [G[u][v]['weight'] for u,v in G.edges()]
max_weight = np.max(weights)

In [None]:
# Plot weight distribution (cummulative)
start = 0.0
stop = max_weight
steps = 20
step = (stop - start)/(steps - 1)
import numpy as np
from matplotlib import pyplot as plt

linkage_thres = np.arange(start, stop+step, step)
num_edges = []
total_edges = len([(u,v) for u,v in G.edges()])

for thres in linkage_thres:
    num_edges.append(len([(u,v) for u,v in G.edges() if G[u][v]['weight']>=thres])/total_edges)

plt.plot(linkage_thres, num_edges)
plt.xlabel('weight')
plt.ylabel('P(w_(u,v) >= weight')
plt.savefig(f'{vis_path}/weight_dist_cummulative.pdf')

## Plot weight distribution

In [None]:
# Plot weight frequency

import numpy as np 

from matplotlib import pyplot as plt 

w = np.array([G[u][v]['weight'] for u,v in G.edges()])

plt.figure(dpi=500)
# plt.bar(["0", "1"], [len(w[w==0]), len(w[w==1])])
plt.hist(w, bins=10)
plt.xlabel('weight')
plt.ylabel('count')
# plt.show()
plt.savefig(f'{vis_path}/weight_count.pdf')

# np.quantile(w, 0.5)


## Visualize communites

In [None]:
def edge_color(ar, clip=1):
    ar[ar > clip] = clip

    ret = np.zeros((ar.shape[-1], 4))
    ret[:, -1] = ar / clip
    return ret 

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from collections import Counter 

def plot_community(G, partition=None, min_elem=None, outpath=None,  **kwargs):
    G_com = G.copy()
    node_to_del = []

    # Keep large community (>= min_elem)
    # if min_elem is not None and partition is not None:
    #     for community in set(partition.values()):
    #         community_nodes = [node for node in partition.keys() if partition[node] == community]
    #         if len(community_nodes) < min_elem:
    #             node_to_del.extend(community_nodes)

    if min_elem is not None and partition is not None:
        for node_set in nx.connected_components(G):
            if len(node_set ) < min_elem:
                node_to_del.extend(list(node_set))

    # Retrieve graph and parition contains node in large community
    G_com.remove_nodes_from(node_to_del)
    

    plt.figure(figsize=(6.4*2, 4.8*2), dpi=500)

    # Define positions for the nodes
    
    pos = nx.spring_layout(G, weight='weight', **kwargs)
    pos_com = {node:pos[node] for node in G_com.nodes()}

    # pos_com = nx.spring_layout(G_com, weight='weight', **kwargs)

    # Extract edge weights and colors
    # edge_weights = [(u, v, G[u][v]['weight']) for u, v in G.edges()]
    edge_weights = np.array([G_com[u][v]['weight'] for u, v in G_com.edges()])
    max_weight = max(edge_weights)

    edge_cm = plt.get_cmap('Greys')

    # Draw the graph with edge labels and edge colors based on weight
    if partition:
        common_partition = {k:v for k,v in partition.items() if k not in node_to_del} 
        centers = []
        # Get center 
        for community in set(common_partition.values()):
            community_nodes = [node for node in common_partition.keys() if partition[node] == community]
            G_sub = G_com.subgraph(community_nodes)
            d_c = nx.degree_centrality(G_sub)
            center = max(d_c, key=d_c.get)
            centers.append(center)
            

        node_cm = plt.get_cmap('gist_rainbow', max(partition.values()) + 1, )

        nx.draw_networkx_nodes(
            G_com, pos_com, 
            node_size=5, 
            cmap=node_cm, node_color=list(common_partition.values()), 
            edgecolors=None, alpha=0.7,
            # labels = dict(zip(centers, centers)),  font_size=5, font_color='blue'
        )

        nx.draw_networkx_edges(
            G_com, pos_com, 
            edge_color=edge_color(1.*edge_weights), 
            width=0.3
        )

        # Draw center nodes
        center_node_size = []
        for center in centers:
            community = common_partition[center]
            community_size = len([node for node,comm in common_partition.items() if comm==community])
            node_size = 35 + 20*community_size
            center_node_size.append(node_size)

        # nx.draw_networkx_labels(
        #     G_com, pos_com, 
        #     labels = dict(zip(centers, centers)),  font_size=5, font_color='blue'
        # )

        nx.draw_networkx_nodes(
            G_com,
            nodelist=centers, 
            pos = {node:pos_com[node] for node in centers}, 
            cmap=node_cm, node_color=[common_partition[node] for node in centers],
            node_size=node_size, edgecolors='black', alpha=0.5,
        )
    else:
        # nx.draw(
        #     G_com, pos_com, with_labels=False, 
        #     edge_color=edge_color(1.*edge_weights), 
        #     width=0.3, node_size=5
        # )

        # Draw nodes, edges, center labels
        nx.draw_networkx_nodes(
            G_com, pos_com, 
            node_size=5, 
            edgecolors=None, alpha=0.7
            # labels = dict(zip(centers, centers)),  font_size=5, font_color='blue'
        )

        nx.draw_networkx_edges(
            G_com, pos_com, 
            edge_color=edge_color(1.*edge_weights), 
            width=0.3
        )

    # Draw edge labels
    # edge_labels = {(u, v): d['weight'] for u, v, d in G.edges(data=True)}
    # nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=12)

    # Show the graph
    if not outpath:
        plt.show()
    else:
        plt.savefig(outpath)

    return G_com


In [None]:

from community import community_louvain
from collaboration_network import algorithms as alg 

resolutions = [0.2, 0.4, 0.6, 0.8, 1.0]

algorithm = alg.CDLibAlgorithm(
    'louvain', 
    dict(randomize=7),
    name=f'louvain'
)

plot_community(G, seed=7, outpath=f'{vis_path}/unlabeled.pdf')

# Org
res_dir = f'{vis_path}/orig'
os.makedirs(res_dir, exist_ok=True)

partition = algorithm(G)

plot_community(G, partition, seed=7, outpath=f'{res_dir}/{algorithm.name}_labeled_full.pdf')

plot_community(G, partition, 5, seed=7, outpath=f'{res_dir}/{algorithm.name}_labeled_gt_5.pdf')

# Prune 
res_dir = f'{vis_path}/prune_wmin-0.1'
os.makedirs(res_dir, exist_ok=True)
partition = algorithm(G, w_min=0.1)

plot_community(G, partition, seed=7, outpath=f'{res_dir}/{algorithm.name}_labeled_full.pdf')

plot_community(G, partition, 5, seed=7, outpath=f'{res_dir}/{algorithm.name}_labeled_gt_5.pdf')

# Prune 
res_dir = f'{vis_path}/prune_wmin-0.2'
os.makedirs(res_dir, exist_ok=True)
partition = algorithm(G, w_min=0.2)

plot_community(G, partition, seed=7, outpath=f'{res_dir}/{algorithm.name}_labeled_full.pdf')

plot_community(G, partition, 5, seed=7, outpath=f'{res_dir}/{algorithm.name}_labeled_gt_5.pdf')


In [None]:

from community import community_louvain
from collaboration_network import algorithms as alg 

resolutions = [0.2, 0.4, 0.6, 0.8, 1.0]

algorithm = alg.LouvainMD(
    random_state=7, name=f'louvain_md'
)

plot_community(G, seed=7, outpath=f'{vis_path}/unlabeled.pdf')

# Org
res_dir = f'{vis_path}/orig'
os.makedirs(res_dir, exist_ok=True)

partition = algorithm(G)

plot_community(G, partition, seed=7, outpath=f'{res_dir}/{algorithm.name}_labeled_full.pdf')

plot_community(G, partition, 5, seed=7, outpath=f'{res_dir}/{algorithm.name}_labeled_gt_5.pdf')

# Prune 
res_dir = f'{vis_path}/prune_wmin-0.1'
os.makedirs(res_dir, exist_ok=True)
partition = algorithm(G, w_min=0.1)

plot_community(G, partition, seed=7, outpath=f'{res_dir}/{algorithm.name}_labeled_full.pdf')

plot_community(G, partition, 5, seed=7, outpath=f'{res_dir}/{algorithm.name}_labeled_gt_5.pdf')

# Prune 
res_dir = f'{vis_path}/prune_wmin-0.2'
os.makedirs(res_dir, exist_ok=True)
partition = algorithm(G, w_min=0.2)

plot_community(G, partition, seed=7, outpath=f'{res_dir}/{algorithm.name}_labeled_full.pdf')

plot_community(G, partition, 5, seed=7, outpath=f'{res_dir}/{algorithm.name}_labeled_gt_5.pdf')


In [None]:

from community import community_louvain
from collaboration_network import algorithms as alg 

resolutions = [0.2, 0.4, 0.6, 0.8, 1.0]

algorithm = alg.CDLibAlgorithm(
    'girvan_newman', 
    dict(level=3),
    name=f'girvan_newman'
)

plot_community(G, seed=7, outpath=f'{vis_path}/unlabeled.pdf')

# Org
res_dir = f'{vis_path}/orig'
os.makedirs(res_dir, exist_ok=True)

partition = algorithm(G)

plot_community(G, partition, seed=7, outpath=f'{res_dir}/{algorithm.name}_labeled_full.pdf')

plot_community(G, partition, 5, seed=7, outpath=f'{res_dir}/{algorithm.name}_labeled_gt_5.pdf')

# Prune 
res_dir = f'{vis_path}/prune_wmin-0.1'
os.makedirs(res_dir, exist_ok=True)
partition = algorithm(G, w_min=0.1)

plot_community(G, partition, seed=7, outpath=f'{res_dir}/{algorithm.name}_labeled_full.pdf')

plot_community(G, partition, 5, seed=7, outpath=f'{res_dir}/{algorithm.name}_labeled_gt_5.pdf')

# Prune 
res_dir = f'{vis_path}/prune_wmin-0.2'
os.makedirs(res_dir, exist_ok=True)
partition = algorithm(G, w_min=0.2)

plot_community(G, partition, seed=7, outpath=f'{res_dir}/{algorithm.name}_labeled_full.pdf')

plot_community(G, partition, 5, seed=7, outpath=f'{res_dir}/{algorithm.name}_labeled_gt_5.pdf')


# Legacy


In [None]:
# # nodes: author - main focus (max apperance in paper)
# # edges: author1 - author2 - co-occur
# import functools as ft
# with open('subjects.txt', 'r') as f:
#     data = f.readlines()

# def map_string(s):
#     s = s.strip().split(',')

#     s = list(map(lambda x: x.strip().split(), s)) 

#     s = ft.reduce(lambda x,y: x+y, s, [])

#     for i in range(len(s)):
#         if s[i].isdigit():
#             break 

#     subject, papers= ' '.join(s[:i]), ' '.join(s[i:])
#     return subject, papers


# data = list(map(map_string, data))
# import pandas as pd 

# df = pd.DataFrame(dict(
#     subject=[d[0] for d in data],
#     papers=[d[1] for d in data]
# ))

# df.to_csv('subjects.csv', index=0)

In [None]:
# import pandas as pd 
# df = pd.read_csv('authors.csv')
# df['first'] = df.author.apply(lambda x: x.split(',')[0].strip().upper())
# df['last'] = df.author.apply(lambda x: x.split(',')[1].strip().upper())


# author_list = df[['first', 'last']]

# author_list = author_list.drop_duplicates(subset=['first', 'last']).sort_values(by=['first', 'last'])

# author_list['change_group'] = author_list['first'] != author_list.shift()['first']

# author_list['group_name'] = author_list['change_group'].cumsum()

# # author_list.drop(columns='change_group').to_csv('author_uniq.csv', index=0)