# VERA Clustering Visualization (3D)

This notebook visualizes the VERA communication personas in 3D space using PCA and K-Means clustering (K=4).

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from pathlib import Path

# Configuration
DATA_PATH = Path("data/clustering_dataset/master_vector_data_set.csv")
FEATURES = [
    "body_gesture_activity_mean",   # Energy
    "body_posture_openness_mean",   # Confidence
    "body_body_sway_mean",          # Stability
    "audio_wpm",                    # Pacing
    "face_head_speed_mean",         # Engagement
    "face_smile_mean",              # Warmth
    "audio_pitch_std_st"            # Expressiveness
]

# Persona Definitions (K=4)
PERSONA_MAP = {
    "0": "High head_speed, High sway",
    "1": "Low gesture_activity, Low sway",
    "2": "High gesture_activity, High smile",
    "3": "High pitchd, Low wpm"
}

In [None]:
# 1. Load Data
df = pd.read_csv(DATA_PATH)
print(f"Loaded {len(df)} videos.")

# 2. Preprocess
X = df[FEATURES].fillna(df[FEATURES].mean())
video_ids = df["video_name"].astype(str)

# Scale
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# 3. Clustering (K=4)
kmeans = KMeans(n_clusters=4, n_init=50, random_state=42)
labels = kmeans.fit_predict(X_scaled)

# Map labels to Persona Names
df["cluster_id"] = labels.astype(str)
df["cluster"] = df["cluster_id"].map(PERSONA_MAP)

# 4. PCA for 3D Visualization
pca = PCA(n_components=3)
components = pca.fit_transform(X_scaled)

df["PC1"] = components[:, 0]
df["PC2"] = components[:, 1]
df["PC3"] = components[:, 2]

explained_var = pca.explained_variance_ratio_.sum()
print(f"Explained Variance: {explained_var:.2%}")

In [None]:
# 5. Interactive 3D Plot
title_text = f"VERA Personas (K=4) - 3D PCA Projection (Explained Variance: {explained_var:.1%})"

fig = px.scatter_3d(
    df,
    x="PC1",
    y="PC2",
    z="PC3",
    color="cluster",
    hover_name="video_name",
    hover_data=FEATURES,
    title=title_text,
    labels={"cluster": "Persona Group"},
    opacity=0.8,
    size_max=10
)

fig.update_traces(marker=dict(size=5))
fig.show()