In [29]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
import plotly.express as px


In [30]:
def kmeans_clustering():
    iris = load_iris()
    df = pd.DataFrame(iris.data, columns=iris.feature_names)
    scaler = StandardScaler()
    df_scaled = scaler.fit_transform(df)
    kmeans = KMeans(n_clusters = 3, random_state=42, n_init=10)
    df['Cluster'] = kmeans.fit_predict(df_scaled)
    return df, iris.target_names

In [32]:
def plot_2d_scatter(df):
    plt.figure(figsize=(8,6))
    sns.scatterplot(x=df['sepal length (cm)'],
        y=df['sepal width (cm)'], hue=df['Cluster'], palette = 'viridis')
    plt.xlabel("Sepal Length (cm)")
    plt.ylabel("Sepal Width (cm)")
    plt.title("K-Means Clustering (2D View)")
    plt.savefig("static/plot_2d.png")
    plt.close()
    

In [33]:
def plot_3d_scatter(df):
    fig = px.scatter_3d(df, x='sepal length (cm)', y ='sepal width (cm)',
                        z='petal length (cm)', color =df['Cluster'].astype(str),
                        title="K-Means Clustering (3D View)")
    fig.write_html("static/plot_3d.html")
    

In [34]:
df,sample = kmeans_clustering()
plot_2d_scatter(df)



In [36]:
df,sample = kmeans_clustering()
plot_3d_scatter(df)


KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.

