In [1]:
# -------------------------------
# SVM Classification of Penguin Sex
# -------------------------------
import seaborn as sns
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix

# Load dataset
penguins = sns.load_dataset("penguins").dropna()

# Select numerical features
X = penguins[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']]

# Encode target variable (sex) as 0/1
y = penguins['sex']
le = LabelEncoder()
y_encoded = le.fit_transform(y)  # male=1, female=0

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.3, random_state=0
)

# Standardise features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train SVM classifier
svm_clf = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=0)
svm_clf.fit(X_train_scaled, y_train)

# Predictions
y_pred = svm_clf.predict(X_test_scaled)

# Evaluation
acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

print(f"SVM Test Accuracy: {acc:.4f}")
print("Confusion Matrix:")
print(cm)
print("-" * 40)

# Optional: examine support vectors
print(f"Number of support vectors: {len(svm_clf.support_)}")


SVM Test Accuracy: 0.9500
Confusion Matrix:
[[57  3]
 [ 2 38]]
----------------------------------------
Number of support vectors: 94
