In [None]:
import os
import json
import numpy as np 
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sentence_transformers import SentenceTransformer

In [None]:
with open("student_grades.json", "r") as f:
    student_grades = json.load(f)

In [None]:

embedder = SentenceTransformer("all-MiniLM-L6-v2")

student_vectors = {}
for student_id, skills in student_grades.items():
    grades = []
    explanations = []

    for skill, data in skills.items():
        if skill == "global grade":
            continue
        grades.append(data["grade"])
        explanations.append(data["explanation"])

    # Mean embedding of the explanations
    explanation_embeddings = embedder.encode(explanations)
    explanation_vector = np.mean(explanation_embeddings, axis=0)

    # Concatenate grades and embeddings
    full_vector = np.concatenate([grades, explanation_vector])
    student_vectors[student_id] = full_vector

### Clustering
X = np.array(list(student_vectors.values()))
student_ids = list(student_vectors.keys())

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

kmeans = KMeans(n_clusters=2, random_state=42)
clusters = kmeans.fit_predict(X_scaled)

df_clusters = pd.DataFrame({
    "student_id": student_ids,
    "cluster": clusters
})

print(df_clusters)


In [None]:
from sklearn.decomposition import PCA
import plotly.express as px

## PCA for visualization
pca = PCA(n_components=2, random_state=42)
X_pca = pca.fit_transform(X_scaled)

df_clusters = pd.DataFrame({
    "student_id": student_ids,
    "cluster": clusters,
    "PC1": X_pca[:, 0],
    "PC2": X_pca[:, 1]
})

df_clusters["cluster"] = df_clusters["cluster"].astype(str)

fig = px.scatter(
    df_clusters,
    x="PC1",
    y="PC2",
    color="cluster",
    text="student_id",
    title="Student clustering with KMeans (PCA)"
)

centroids_pca = pca.transform(kmeans.cluster_centers_)
fig.add_scatter(
    x=centroids_pca[:, 0],
    y=centroids_pca[:, 1],
    mode="markers",
    marker=dict(color="black", size=15, symbol="x"),
    name="Centroids"
)

fig.show()
