###### PREPARATION GRAPH ########### 

#' prepGraph
#' 
#' @description This function reads graphs from a file and 
#' prepares them for the analysis.
#'
#' @param file The input file containing the graph.
#' @param file.format Character constant giving the file format. Edgelist, 
#' pajek, graphml, gml, ncol, lgl, dimacs, graphdb and igraph are
#' supported.
#' @param numbers A logical value indicating if the names of the nodes are 
#' values.This argument is settable for the edgelist format. 
#' The default is FALSE.
#' @param directed A logical value indicating if is a directed graph. The 
#' default is FALSE.
#' @param header A logical value indicating whether the file contains 
#' the names of the variables as its first line.This argument is settable 
#' @param verbose flag for verbose output (default as FALSE).
#' for the edgelist format.The default is FALSE.
#' @return An igraph object, which do not contain loop and multiple edges.
#' @import igraph
#' @importFrom utils read.table
#' @export
#'
#' @examples
#' #install.packages("robin")
#' 
#' #If there are problems with the installation try:
#' # if (!requireNamespace("BiocManager", quietly = TRUE))
#' #     install.packages("BiocManager")
#' # BiocManager::install("gprege")
#' # install.packages("robin")   
#'                      
#' my_file <- system.file("example/football.gml", package="robin")
#' graph <- prepGraph(file=my_file, file.format="gml")

In [11]:
import igraph as ig
import pandas as pd
import os

def prep_graph(file, file_format='gml', numbers=False, directed=False, header=False, verbose=False):
    file_formats = ["edgelist", "pajek", "ncol", "lgl", "graphml", "dimacs", "graphdb", "gml", "dl", "igraph"]
    if file_format not in file_formats:
        raise ValueError("Invalid file_format. Expected one of: %s" % file_formats)
    
    if verbose:
        print("Detected file format:", file_format)
    
    if file_format == "gml":
        net = ig.Graph.Read_GML(file)
        ind = [v.index for v in net.vs if net.degree(v) == 0]  # isolate node
        net.delete_vertices(ind)
        graph = net.simplify()
    elif file_format == "edgelist" and numbers:
        edge = pd.read_table(file, header=None, dtype=str, quoting=3)
        edge = edge.to_numpy()
        net = ig.Graph.TupleList(edge, directed=directed)
        ind = [v.index for v in net.vs if net.degree(v) == 0]  # isolate node
        net.delete_vertices(ind)
        graph = net.simplify()
    else:
        net = ig.Graph.Read(file, format=file_format, directed=directed)
        ind = [v.index for v in net.vs if net.degree(v) == 0]  # isolate node
        net.delete_vertices(ind)
        graph = net.simplify()

    return graph

In [17]:
my_file = os.path.join("datasets", "football.gml")
graph = prep_graph(file=my_file, file_format="gml")

####### GRAPH RANDOM #########
#' random
#'
#' @description This function randomly rewires the edges while preserving the original graph's 
#' degree distribution.
#' @param graph The output of prepGraph.
#' @param verbose flag for verbose output (default as FALSE)
#' 
#' @return An igraph object, a randomly rewired graph.
#' @import igraph
#' @export
#'
#' @examples 
#' my_file <- system.file("example/football.gml", package="robin")
#' graph <- prepGraph(file=my_file, file.format="gml")
#' graphRandom <- random(graph=graph)

In [19]:
def random_graph(graph, verbose=False):
    if verbose:
        print("Randomizing the graph edges.")
    
    z = graph.ecount()  # number of edges
    graph_random = graph.rewire(n=z)

    return graph_random

In [20]:
graph_random = random_graph(graph=graph)

###### COMMUNITY METHOD ######    
#' methodCommunity
#' 
#' @description This function detects the community structure of a graph.
#' To detect the community structure the user can choose one of the methods implemented 
#' in igraph.
#' @param graph The output of prepGraph.
#' @param method The clustering method, one of "walktrap", "edgeBetweenness", 
#' "fastGreedy", "louvain", "spinglass", "leadingEigen", "labelProp", "infomap",
#' "optimal", "other".
#' @param FUN in case the @method parameter is "other" there is the possibility 
#' to use a personal function passing its name through this parameter.
#' The personal parameter has to take as input the @graph and the @weights 
#' (that can be NULL), and has to return a community object.
#' @param weights  Optional positive weight vector. If the graph has a weight 
#' edge attribute, then this is used by default. Supply NA here if the graph 
#' has a weight edge attribute, but you want to ignore it. Larger edge weights
#' correspond to stronger connections. This argument is not settable for 
#' "infomap" method.
#' @param steps The number of steps to take, this is actually the number of 
#' tries to make a step. It is not a particularly useful parameter. This 
#' argument is settable only for "leadingEigen" and "walktrap" method.
#' @param spins Integer constant, the number of spins to use. This is the upper 
#' limit for the number of communities. It is not a problem to supply a 
#' (reasonably) big number here, in which case some spin states will be 
#' unpopulated. This argument is settable only for "spinglass" method.
#' @param e.weights If not NULL, then a numeric vector of edge weights. 
#' The length must match the number of edges in the graph. By default the 
#' ‘weight’ edge attribute is used as weights. If it is not present, then all
#' edges are considered to have the same weight. Larger edge weights correspond 
#' to stronger connections. This argument is settable only for "infomap"
#'  method.
#' @param v.weights If not NULL, then a numeric vector of vertex weights. The
#' length must match the number of vertices in the graph. By default the 
#' ‘weight’ vertex attribute is used as weights. If it is not present, then all
#' vertices are considered to have the same weight. A larger vertex weight means
#' a larger probability that the random surfer jumps to that vertex. This 
#' argument is settable only for "infomap" method.
#' @param nb.trials The number of attempts to partition the network (can be any
#' integer value equal or larger than 1). This argument is settable only for
#' "infomap" method.
#' @param resolution only for "louvain" and "leiden". Optional resolution 
#' parameter that allows the user to adjust the resolution parameter of the 
#' modularity function that the algorithm uses internally. Lower values 
#' typically yield fewer, larger clusters (default is 1).
#' @param directed Logical constant, whether to calculate directed edge 
#' betweenness for directed graphs. This argument is settable only for 
#' "edgeBetweenness" method.
#' @param verbose flag for verbose output (default as FALSE)
#'
#' @return A Communities object.
#' @import igraph
#' @export
#'
#' @examples 
#' my_file <- system.file("example/football.gml", package="robin")
#' graph <- prepGraph(file=my_file, file.format="gml")
#' methodCommunity (graph=graph, method="louvain") 

In [27]:
def method_community(graph,
                     method="louvain",
                     FUN=None,
                     directed=False,
                     weights=None,
                     steps=4,
                     spins=25,
                     e_weights=None,
                     v_weights=None,
                     nb_trials=10,
                     resolution=1,
                     verbose=False):
    method = method.lower()

    if verbose:
        print(f"Applying community method {method}")

    if weights is None and method in ["walktrap", "edgebetweenness", "fastgreedy"]:
        weights = graph.es["weight"]

    if steps == 4 and method == "leadingeigen":
        steps = -1

    if method == "optimal":
        communities = graph.community_optimal(weights=weights)
    elif method == "louvain":
        communities = graph.community_multilevel(weights=weights, return_levels=False)
    elif method == "walktrap":
        communities = graph.community_walktrap(weights=weights, steps=steps).as_clustering()
    elif method == "spinglass":
        communities = graph.community_spinglass(weights=weights, spins=spins)
    elif method == "leadingeigen":
        communities = graph.community_leading_eigenvector(weights=weights, niter=steps)
    elif method == "edgebetweenness":
        communities = graph.community_edge_betweenness(weights=weights, directed=directed).as_clustering()
    elif method == "fastgreedy":
        communities = graph.community_fastgreedy(weights=weights).as_clustering()
    elif method == "labelprop":
        communities = graph.community_label_propagation(weights=weights)
    elif method == "infomap":
        communities = graph.community_infomap(edge_weights=e_weights, vertex_weights=v_weights, trials=nb_trials)
    elif method == "leiden":
        communities = graph.community_leiden(weights=weights, resolution_parameter=resolution)
    elif method == "other":
        communities = FUN(graph, weights)
    else:
        raise ValueError("Invalid community detection method.")

    return communities

In [28]:
communities = method_community(graph=graph, method="louvain")

##### MEMBERSHIP COMMUNITIES ######    
#' membershipCommunities
#' 
#' @description This function computes the membership vector of the community 
#' structure. To detect the community structure the user can choose one of the methods implemented 
#' in igraph.
#' @param graph The output of prepGraph.
#' @param method The clustering method, one of "walktrap", "edgeBetweenness", 
#' "fastGreedy", "louvain", "spinglass", "leadingEigen", "labelProp", "infomap",
#' "optimal", "leiden","other".
#' @param FUN in case the @method parameter is "other" there is the possibility 
#' to use a personal function passing its name through this parameter.
#' The personal parameter has to take as input the @graph and the @weights 
#' (that can be NULL), and has to return a community object.
#' @param weights  Optional positive weight vector. If the graph has a weight 
#' edge attribute, then this is used by default. Supply NA here if the graph 
#' has a weight edge attribute, but you want to ignore it. Larger edge weights
#' correspond to stronger connections. This argument is not settable for 
#' "infomap" method.
#' @param steps The number of steps to take, this is actually the number of 
#' tries to make a step. It is not a particularly useful parameter. This 
#' argument is settable only for "leadingEigen"and"walktrap" method.
#' @param spins Integer constant, the number of spins to use. This is the upper 
#' limit for the number of communities. It is not a problem to supply a 
#' (reasonably) big number here, in which case some spin states will be 
#' unpopulated. This argument is settable only for "spinglass" method.
#' @param e.weights If not NULL, then a numeric vector of edge weights. 
#' The length must match the number of edges in the graph. By default the 
#' ‘weight’ edge attribute is used as weights. If it is not present, then all
#' edges are considered to have the same weight. Larger edge weights correspond 
#' to stronger connections.  This argument is settable only for "infomap"
#'  method.
#' @param v.weights If not NULL, then a numeric vector of vertex weights. The
#' length must match the number of vertices in the graph. By default the 
#' ‘weight’ vertex attribute is used as weights. If it is not present, then all
#' vertices are considered to have the same weight. A larger vertex weight means
#' a larger probability that the random surfer jumps to that vertex. This 
#' argument is settable only for "infomap" method.
#' @param nb.trials The number of attempts to partition the network (can be any
#' integer value equal or larger than 1). This argument is settable only for
#' "infomap" method.
#' @param directed Logical constant, whether to calculate directed edge 
#' betweenness for directed graphs. This argument is settable only for 
#' "edgeBetweenness" method.
#' @param resolution only for "louvain" and "leiden". Optional resolution 
#' parameter, lower values typically yield fewer, larger clusters (default=1).
#' 
#' @return Returns a numeric vector, one number for each vertex in the graph; 
#' the membership vector of the community structure.
#' @import igraph
#' @export
#'
#' @examples 
#' my_file <- system.file("example/football.gml", package="robin")
#' graph <- prepGraph(file=my_file, file.format="gml")
#' membershipCommunities (graph=graph, method="louvain")

In [30]:
def membership_communities(graph,
                           method="louvain",
                           FUN=None,
                           directed=False,
                           weights=None,
                           steps=4,
                           spins=25,
                           e_weights=None,
                           v_weights=None,
                           nb_trials=10,
                           resolution=1):
    method = method.lower()
    communities = method_community(graph=graph, method=method,
                                   FUN=FUN,
                                   directed=directed,
                                   weights=weights,
                                   steps=steps,
                                   spins=spins,
                                   e_weights=e_weights,
                                   v_weights=v_weights,
                                   nb_trials=nb_trials,
                                   resolution=resolution)
    
    return communities.membership

In [31]:
membership = membership_communities(graph=graph, method="louvain")

################ PLOT GRAPH ###############
#' plotGraph
#'
#' @description Graphical interactive representation of the network.
#' @param graph The output of prepGraph.
#'
#' @return Creates an interactive plot, a D3 JavaScript network graph.
#' @import networkD3
#' @export
#'
#' @examples 
#' my_file <- system.file("example/football.gml", package="robin")
#' graph <- prepGraph(file=my_file, file.format="gml")
#' plotGraph (graph)

In [72]:
from pyvis.network import Network

def plot_graph(graph):
    network = Network(notebook=True)
    network.from_nx(graph.to_networkx())
    network.show_buttons(filter_=['physics'])
    return display(network.show("graph.html"))

In [73]:
plot_graph(graph)

graph.html


######################## PLOT COMMUNITIES ##############
#' plotComm
#' 
#' @description Graphical interactive representation of the network and its 
#' communities.
#' 
#' @param graph The output of prepGraph.
#' @param members A membership vector of the community structure, the output of
#' membershipCommunities. 
#'
#' @return Creates an interactive plot with colorful communities, a D3 
#' JavaScript network graph.
#' @import networkD3 
#' @importFrom methods is
#' @export
#'
#' @examples
#' my_file <- system.file("example/football.gml", package="robin")
#' graph <- prepGraph(file=my_file, file.format="gml")
#' members <- membershipCommunities (graph=graph, method="louvain")
#' plotComm(graph, members)

In [80]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

def plot_communities(graph, members):
    network = Network(notebook=True)
    network.from_nx(graph.to_networkx())
    network.show_buttons(filter_=['physics'])
    
    # Assign group colors based on community membership
    unique_groups = np.unique(members)
    colors = plt.cm.rainbow(np.linspace(0, 1, len(unique_groups)))
    hex_colors = [mcolors.to_hex(c) for c in colors]
    group_colors = {group: hex_colors[i] for i, group in enumerate(unique_groups)}
    
    # Set node colors according to their communities
    for node, group in zip(graph.vs, members):
        network.nodes[node.index]['color'] = group_colors[group]
    
    return display(network.show("communities.html"))

In [81]:
members = communities.membership
plot_communities(graph, members)

communities.html
