## Graph Based Dataset
### Louvain’s Algorithm for Community Detection
https://github.com/taynaud/python-louvain

In [1]:
import networkx as nx
import os
import pandas as pd
import json

In [2]:

def load_graph_dataset(data_path="../data/deezer_ego_nets"):
    with open(os.path.join(data_path, "deezer_edges.json")) as f:
        graph = nx.MultiGraph({
            node: [element[1] for element in neighbors]
            for node, neighbors in json.load(f).items()
        })
    target = pd.read_csv(os.path.join(data_path, "deezer_target.csv"))
    return graph, target

graph, target = load_graph_dataset()

In [3]:
len(graph.nodes)

9988

In [3]:
mat = nx.adjacency_matrix(graph)

In [None]:
from sklearn.decomposition import TruncatedSVD
tsvd = TruncatedSVD(n_components=10)

In [5]:
import numpy as np
X = mat.todense()
rows = np.random.randint(X.shape[0], size=1000)
X = X[rows, :]

In [6]:
from sklearn.decomposition import PCA, FastICA
data = PCA(n_components=0.98).fit_transform(X)



In [7]:
data.shape

(1000, 68)

In [25]:
from sklearn.cluster import KMeans
model = KMeans(2, max_iter=1000)
labels = model.fit_predict(data)

In [26]:
from sklearn.metrics import silhouette_score
print(silhouette_score(data,labels))

ValueError: Number of labels is 1. Valid values are 2 to n_samples - 1 (inclusive)

In [None]:
# import random
# k = 5
# sampled_nodes = random.sample(graph.nodes, k)
# sampled_graph = graph.subgraph(sampled_nodes)
# nx.draw_networkx(graph, arrows=True, with_labels=False)

In [6]:
from community import community_louvain

comms = community_louvain.best_partition(graph)

In [7]:
import numpy as np
unique_coms = np.unique(list(comms.values()))
unique_coms

array([0, 1, 2, 3])

In [None]:
# unique_coms = np.unique(list(comms.values()))
# cmap = {
#     0 : 'maroon',
#     1 : 'teal',
#     2 : 'black', 
#     3 : 'orange',
# }

# node_cmap = [cmap[v] for _,v in comms.items()]

# pos = nx.spring_layout(graph)
# nx.draw(graph, pos, node_size = 75, alpha = 0.8, node_color=node_cmap)
# plt.show()