# 🧠 Clustering Real User Mouse Activity
This notebook applies KMeans clustering to real user activity data using mouse behavior features.

In [None]:
# ✅ Step 1: Import libraries
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [None]:
# ✅ Step 2: Load real user CSV data
real_data_path = "../../data/real/km_stat_2025_04_18.csv"  # Update if needed
df = pd.read_csv(real_data_path)
df.head()

In [None]:
# ✅ Step 3: Feature extraction from mouse movement
from feature_extraction import extract_features_dataframe

feature_df = extract_features_dataframe(df)
feature_df.head()

In [None]:
# ✅ Step 4: Apply KMeans clustering
kmeans = KMeans(n_clusters=3, random_state=42)
df['cluster'] = kmeans.fit_predict(feature_df)

In [None]:
# ✅ Step 5: Visualize the clusters
plt.figure(figsize=(8, 6))
sns.scatterplot(
    x=feature_df['avg_speed'],
    y=feature_df['mouse_box_area'],
    hue=df['cluster'],
    palette='Set1'
)
plt.title("Mouse Activity Clustering by Speed and Box Area")
plt.xlabel("Average Speed")
plt.ylabel("Mouse Box Area")
plt.legend(title="Cluster")
plt.grid(True)
plt.show()

In [None]:
# ✅ Step 6: Save the clustered output
output_path = "../../data/real/km_stat_2025_04_18_clustered.csv"
df.to_csv(output_path, index=False)
print("✅ Saved clustered file to:", output_path)