In [None]:
import os
import pandas as pd
import seaborn as sns

import numpy as np

import matplotlib.pyplot as plt

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline

import plotly.express as px

In [2]:
experimentos = pd.read_json(r'dados/experimentos.json')

In [4]:
preprocessor = Pipeline([
    ("scaler", MinMaxScaler()),
    ("pca", PCA(n_components=2, random_state=42))
])

In [5]:
clusterer = Pipeline([
    (
        "kmeans",
         KMeans(
             n_clusters=3,
             init="k-means++",
             n_init=50,
             max_iter=500,
             random_state=42,
         ),
    ),
])

In [6]:
pipe = Pipeline([
    ("preprocessor", preprocessor),
    ("clusterer", clusterer)
])

In [None]:
pipe.fit(features)

In [8]:
preprocessed_data = pipe["preprocessor"].transform(features)

In [None]:
predicted_labels = pipe["clusterer"]["kmeans"].labels_
predicted_labels

In [None]:
silhouette_score(preprocessed_data, predicted_labels)

In [11]:
pcadf = pd.DataFrame(
    pipe["preprocessor"].transform(features),
    columns=["Componente 1", "Componente 2"]
)

In [12]:
pcadf["Cluster Predito"] = pipe["clusterer"]["kmeans"].labels_

In [None]:
scat = sns.scatterplot(
    "component_1",
    "component_2",
    s=50,
    data=pcadf,
    hue="predicted_cluster",
    palette="Set2",
)

scat.set_title("Clustering")
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)
plt.show()

In [None]:
complete_df = pd.concat([features, pcadf], axis=1)
complete_df

In [28]:
complete_df.to_json(r'dados/dataframe-clusters.json')

In [None]:
fig = px.scatter(
    complete_df,
    x="Componente 1",
    y="Componente 2",
    color="Cluster Predito",
    color_continuous_scale=('rgb(3, 5, 18)', 'rgb(72, 134, 187)', 'rgb(114, 184, 205)')
)
fig.show()