# Cluster Visualisation

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.spatial import ConvexHull

In [None]:
# Load the clustering results
data = pd.read_csv("../data/export/clustering_results.csv")

In [None]:
# Function for visualisation of clustering results
def visualise_clusters(data, cluster_column, x_column, y_column, x_label, y_label, title="Cluster Visualisation", show_contour=False):
    """
    Visualize the clustering results using a scatter plot.

    Args:
        data (pd.DataFrame): The input data for clustering.
        cluster_column (str): The column name containing cluster labels.
        x_column (str): The column name to use for the x-axis.
        y_column (str): The column name to use for the y-axis.
        x_label (str): The label to use for the x-axis.
        y_label (str): The label to use for the y-axis.
        title (str): The title of the plot.
        show_contour (bool): Whether to show the convex hull (contour) for each cluster.
    """

    plt.figure(figsize=(5, 4))
    scatter = sns.scatterplot(
        # Use the specified columns for x and y axes
        x=data[x_column],
        y=data[y_column],
        hue=cluster_column,
        palette='viridis',
        data=data,
        s=50
    )

    scatter.set_title(title)
    scatter.set_xlabel(x_label)
    scatter.set_ylabel(y_label)
    plt.legend(title="Cluster")

    # Add filled convex hulls if show_contour is True
    if show_contour:
        unique_clusters = data[cluster_column].unique()
        for cluster in unique_clusters:
            # Skip noise points for DBSCAN
            if cluster == -1:
                continue
            cluster_data = data[data[cluster_column] == cluster]
            points = cluster_data[[x_column, y_column]].values
            # ConvexHull requires at least 3 points
            if len(points) >= 3:
                hull = ConvexHull(points)
                hull_points = points[hull.vertices]
                # Fill with semi-transparent color
                plt.fill(hull_points[:, 0], hull_points[:, 1], alpha=0.2)

    plt.show()

In [None]:
# Function for visualisation of *all* clustering
def visualise_all_clusters(data, x_column, y_column, x_label, y_label, show_contour=False):
    """
    Visualise all clusters in the data.

    Args:
        data (pd.DataFrame): The input data for clustering.
        x_column (str): The column name to use for the x-axis.
        y_column (str): The column name to use for the y-axis.
        x_label (str): The label to use for the x-axis.
        y_label (str): The label to use for the y-axis.
        show_contour (bool): Whether to show the convex hull (contour) for each cluster.
    """

    clustering_methods = [
        ("KMeans_Cluster", "KMeans"),
        ("DBSCAN_Cluster", "DBSCAN"),
        ("Agglomerative_Cluster", "Agglomerative"),
        ("GMM_Cluster", "Gaussian Mixture Model")
    ]

    # Iterate over each clustering method and visualize
    for cluster_column, title in clustering_methods:
        visualise_clusters(
            data,
            cluster_column=cluster_column,
            x_column=x_column,
            y_column=y_column,
            x_label=x_label,
            y_label=y_label,
            title=title,
            show_contour=show_contour
        )

In [None]:
visualise_all_clusters(
	data,
	x_column = "cual es tu promedio actual",
	y_column = "cuantas horas al dia usas dispositivos electronicos para fines personales",
	x_label = "Promedio",
	y_label = "Horas Diarias en Dispositivos",
	show_contour = True
)