# Nearest Centroid Classifier on Personality Dataset
This notebook implements a Nearest Centroid Classifier (NCC) using NumPy to classify personality traits (Introvert vs. Extrovert)

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

## Data Loading, Preprocessing, and Feature Selection

In [None]:
df = pd.read_csv("./data/personality_dataset.csv")
df = df.replace({
    "Stage_fear": {"Yes": 1, "No": 0},
    "Drained_after_socializing": {"Yes": 1, "No": 0},
    "Personality": {"Introvert": 0, "Extrovert": 1}
})

selected_columns = [
    "Time_spent_Alone", "Stage_fear", "Social_event_attendance", "Going_outside", 
    "Drained_after_socializing", "Friends_circle_size", "Post_frequency"
]
df[selected_columns] = df[selected_columns].fillna(df[selected_columns].mean())

X = df[selected_columns].values
y = df["Personality"].values

## Nearest Centroid Classifier Implementation

In [None]:
def fit_ncc(X,y):
    cids = np.unique(y)
    mu = np.zeros((len(cids), X.shape[1]))
    for class_idx, class_label in enumerate(cids):
        mu[class_idx] = np.mean(X[y == class_label], axis=0)
    return cids, mu

def predict_ncc(X, mu):
    NCdist = np.zeros((X.shape[0], mu.shape[0]))
    for i in range(mu.shape[0]):
        NCdist[:, i] = np.linalg.norm(X - mu[i], axis=1)
    return np.argmin(NCdist, axis=1)

## Evaluation on Full Dataset

In [26]:
cids, mu = fit_ncc(X, y)
y_pred = predict_ncc(X, mu)
accuracy_full = np.mean(y_pred == y)
print("Accuracy on full dataset:", accuracy_full)

Accuracy on full dataset: 0.9344827586206896


## Train-Test Split Evaluation

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
cids, mu = fit_ncc(X_train, y_train)
y_pred = predict_ncc(X_test, mu)
accuracy_test = np.mean(y_pred == y_test)
print("Accuracy on test set:", accuracy_test)

Accuracy on test set: 0.9293103448275862
