In [15]:
import numpy as np
import random
import igraph
import networkx as nx
import csv
import json
import pandas as pd
import time
import matplotlib as plt

### Data extraction

In [16]:
path = '..\data'

In [17]:
def load_data(path, filename):
    with open(path + filename) as json_file:
        data = json.load(json_file)
    print(len(data))
    return data

In [18]:
#load data
data_HR = load_data(path,'\HR_genres.json')
data_HU = load_data(path,'\HU_genres.json')
data_RO = load_data(path,'\RO_genres.json')

54573
47538
41773


In [19]:
with open(path + '\HR_edges.csv', "r") as f:
    reader = csv.reader(f)
    edges  = list(reader)[1:] 

In [20]:
def create_graph(path, filename, weighted = False, directed = False):
    with open(path + filename, "r") as f:
        reader = csv.reader(f)
        edges  = list(reader)[1:]
    edges = [(int(edge[0]),int(edge[1])) for edge in edges]
    Nb_nodes = max([max(nodes) for nodes in edges])+1
    if directed : 
        g = igraph.Graph(directed = True)
    else :
        g = igraph.Graph()
    g.add_vertices(Nb_nodes)
    g.add_edges(edges)
    if weighted :
        g.es["weight"] = g.similarity_jaccard(pairs = edges)
    else :
        g.es["weight"] = 1
    return g

### Communities detection

Useful link : https://yoyoinwanderland.github.io/2017/08/08/Community-Detection-in-Python/

In [14]:
def compare_all(path, filename):

weighted = [True, False]
directed = [True, False]
    
for weight in weighted:
    if weight :
        print("Weighted")
    else :
        print("Non weighted")
    for direct in directed :
        graph = create_graph(path,filename, weight, direct)
        Time = {}
        methods = {}
        weights = graph.es["weight"]
        if direct :
            print("Directed")
            g = create_graph(path,filename, weight, directed = False)
        else :
            print("Non directed")
            t = time.time()
            methods["Fast greedy"] = graph.community_fastgreedy(weights = weights).as_clustering()
            Time["Fast greedy"] = time.time() - t
            t = time.time()
            methods["Walktrap"] = graph.community_walktrap(weights = weights).as_clustering()
            Time["Walktrap"] = time.time() - t
            t = time.time()
            methods["Spectral clustering"] = graph.community_leading_eigenvector(weights = weights)
            Time["Spectral clustering"] = time.time() - t
            t = time.time()
            methods["Label propagation"] = graph.community_label_propagation(weights = weights)
            Time["Label propagation"] = time.time() - t
            t = time.time()
            methods["Louvain"] = graph.community_multilevel(weights = weights)
            Time["Louvain"] = time.time() - t
            t = time.time()
        methods["Infomap"] = graph.community_infomap(edge_weights = weights)
        Time["Infomap"] = time.time() - t
        #t = time.time()
        #methods["Girvan-Newman"] = graph.community_edge_betweenness(weights = graph.es["weight"])
        #Time["Girvan-Newman"] = time.time() - t
        for method in methods.keys():
            T = Time[method]
            Nb_comm = len(methods[method])
            Membership = methods[method].membership
            if direct:
                Modularity = g.modularity(Membership)
            else: 
                Modularity = graph.modularity(Membership)
            print("{}: time = {:.0f}, Number of communities = {}, Modularity score = {:.3f}".format(method, T, Nb_comm, Modularity))
            print("")

Weighted
Directed
im ok
Infomap: time = 552, Number of communities = 3527, Modularity score = 0.433
Non directed
fs ok
wt ok
Sc ok
Lp ok
lv ok
im ok
Fast greedy: time = 45, Number of communities = 64, Modularity score = 0.711
Walktrap: time = 773, Number of communities = 1263, Modularity score = 0.670
Spectral clustering: time = 30, Number of communities = 1, Modularity score = 0.000
Label propagation: time = 7, Number of communities = 3347, Modularity score = 0.540
Louvain: time = 8, Number of communities = 57, Modularity score = 0.726
Infomap: time = 428, Number of communities = 3058, Modularity score = 0.504
Non weighted
Directed
im ok
Infomap: time = 796, Number of communities = 2892, Modularity score = 0.485
Non directed
fs ok
wt ok
Sc ok
Lp ok
lv ok
im ok
Fast greedy: time = 326, Number of communities = 147, Modularity score = 0.579
Walktrap: time = 736, Number of communities = 1073, Modularity score = 0.694
Spectral clustering: time = 105, Number of communities = 21, Modularity 

In [None]:
compare_all(path,'\HR_edges.csv')