In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

In [None]:
exam1 = pd.read_csv("Exam_1_scores.csv")
exam2 = pd.read_csv("Exam_2_scores.csv")
exam3 = pd.read_csv("Exam_3_scores.csv")

In [None]:
# Only graded exams
exam1 = exam1[exam1['Status'] == 'Graded']
exam2 = exam2[exam2['Status'] == 'Graded']
exam3 = exam3[exam3['Status'] == 'Graded']

In [None]:
# Normalize scores to percentage of max points
exam1['Exam1'] = exam1['Total Score'] / exam1['Max Points'] * 100
exam2['Exam2'] = exam2['Total Score'] / exam2['Max Points'] * 100
exam3['Exam3'] = exam3['Total Score'] / exam3['Max Points'] * 100


In [None]:
# Keep only SID and normalized score
exam1 = exam1[['SID', 'Exam1']]
exam2 = exam2[['SID', 'Exam2']]
exam3 = exam3[['SID', 'Exam3']]


In [None]:
data = exam1.merge(exam2, on='SID').merge(exam3, on='SID')

# Features for clustering
X = data[['Exam1', 'Exam2', 'Exam3']]

In [None]:
# Using elbow method to determine best k
inertias = []
Ks = range(2, 9)
for k in Ks:
    km = KMeans(n_clusters=k, random_state=42)
    km.fit(X)
    inertias.append(km.inertia_)

# Plot Elbow curve
plt.figure(figsize=(6, 4))
plt.plot(Ks, inertias, marker='o')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Inertia')
plt.title('Elbow Method: Inertia vs. k')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Run K-Means with 3 clusters
kmeans = KMeans(n_clusters=3, random_state=42)
data['Cluster'] = kmeans.fit_predict(X)

In [None]:
# Compute silhouette score
score = silhouette_score(X, data['Cluster'])
print(f"Silhouette Score: {score:.2f}")

In [None]:
# 3D scatter plot of clusters
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(projection='3d')
scatter = ax.scatter(
    data['Exam1'], data['Exam2'], data['Exam3'],
    c=data['Cluster'], cmap='viridis', alpha=0.7
)
ax.set_xlabel('Exam 1 (%)')
ax.set_ylabel('Exam 2 (%)')
ax.set_zlabel('Exam 3 (%)')
ax.set_title('K-Means Clustering of Exam Scores')
plt.legend(*scatter.legend_elements(), title="Cluster")
plt.tight_layout()
plt.show()