In [None]:
import pandas as pd
import numpy as np
from sklearn.mixture import BayesianGaussianMixture
from sklearn.cluster import DBSCAN,KMeans

import clustergraph.clustergraph as cg
import clustergraph.distances as d
import clustergraph.plotting as cp
import clustergraph.metric_distortion as md


# Diabete DATASET
diabete = pd.read_csv("data/diabete.csv", sep =',') 
headers_diabete = diabete.columns.values
X_diabete= diabete.to_numpy()


X_labels = X_diabete[:,5]
X = X_diabete[:,:5]

# Metric distortion of a ClusterGraph (score)

## Creation of the K-Nearest Neighbors Graph

In [None]:
%%time

from sklearn.neighbors import NearestNeighbors
import networkx as nx


nb = 4
neigh = NearestNeighbors(n_neighbors = nb,  radius = 1)
neigh.fit(X=X)
nn_adjacency = neigh.kneighbors_graph(X= X , n_neighbors = nb, mode='distance')

nn_Graph = nx.from_scipy_sparse_array(nn_adjacency, edge_attribute = 'label')

## remove self edges
for node in nn_Graph.nodes:
    nn_Graph.remove_edge(node,node)
    
nx.number_connected_components(nn_Graph)

## Metric distortion depending on the clustering algorithms and the metric used

### KMeans

In [None]:
model_KM = KMeans(15, random_state =42)
prediction_KM =model_KM.fit_predict(X)

# Get the clusters from the predictions
clusters = cg.get_clusters_from_scikit(prediction_KM)

# Creation of the distance_between_clusters object
distance_clusters_ = d.Creation_distances( clusters = clusters, X = X, metric_clusters = "avg").get_distance_cluster()
  
# Creation of the object
cluster_g_KM = cg.ClusterGraph(distance_clusters_)

# Compute the distances between all the clusters
cluster_g_KM.distances_clusters(normalize = False)

km_dist = md.metric_distortion_edges_CG( cluster_g_KM.graph , nn_Graph, variable= 'label' ) 

print("Score for ClusterGraph from KMeans using average distance : ", km_dist )

### GAUSSIAN MIXTURE

In [None]:
model_GM = BayesianGaussianMixture(n_components = 5, random_state =42)
prediction_GM =model_GM.fit_predict(X)
clusters = cg.get_clusters_from_scikit(prediction_GM)

# Creation of the distance_between_clusters object
distance_clusters_ = d.Creation_distances( clusters = clusters, X = X, metric_clusters = "min").get_distance_cluster()
  
# Creation of the object
cluster_g_GM = cg.ClusterGraph(distance_clusters_)

# Compute the distances between all the clusters
cluster_g_GM .distances_clusters(normalize = False)

gm_dist = md.metric_distortion_edges_CG( cluster_g_GM.graph , nn_Graph, variable= 'label' ) 

print("Score for ClusterGraph from Gaussian Mixture using the minimum distance : ", gm_dist )

## Visualization two ClusterGraphs

Plot comparing the location of the points between two ClusterGraphs

In [None]:
t_left = "ClusterGraph by using KMeans"
t_right = 'ClusterGraph by using Gaussian Mixture'


# Choice first graph
info_1 = cluster_g_KM.graph
number_edges_wanted_1 = 29

# Choice second graph
info_2 = cluster_g_GM.graph
number_edges_wanted_2 = 6


cp.show_graphs(  info_1 , nb_edges_1 = number_edges_wanted_1, nb_edges_2 = number_edges_wanted_2 , info_graph_2 = info_2,
               title_left=t_left , title_right = t_right , choice_col_type = 'points_covered', 
               color_non_selected_nodes = 'white' , 
                font_size_edges_labels = '10px', MIN_SCALE = 5, MAX_SCALE =25 ) 
