In [1]:
# =============================================================
# SVM with Polynomial Kernel (From Scratch)
# Predict Student Performance (Pass/Fail)
# =============================================================

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score

# -------------------------------------------------------------
# 1️⃣ Load Dataset
# -------------------------------------------------------------
file_path = "student_performance_dataset_20.csv" # Update if necessary
try:
    df = pd.read_csv(file_path)
    print("✅ Dataset loaded successfully.")

    # Use relevant features
    X = df[["Study_Hours_per_Week", "Attendance_Rate", "Internal_Scores"]].values
    # Convert target Pass/Fail to 1/-1 for SVM
    y = np.where(df["Pass_Fail"] == "Pass", 1, -1)

    # Standardize features (crucial for SVM)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # Split data into train-test sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    print(f"Training samples: {X_train.shape[0]}")
    print(f"Testing samples: {X_test.shape[0]}")

except FileNotFoundError:
    print(f"❌ Error: File '{file_path}' not found. Please upload it.")
except KeyError as e:
    print(f"❌ Error: Missing column in dataset: {e}")

✅ Dataset loaded successfully.
Training samples: 566
Testing samples: 142


In [2]:
# -------------------------------------------------------------
# 2️⃣ Define Polynomial Kernel (From Scratch)
# -------------------------------------------------------------
def polynomial_kernel(X1, X2, degree=3, c=1):
    """
    Computes Polynomial Kernel:
    K(x1, x2) = (x1 · x2 + c)^degree
    """
    # X1 shape: (n_samples_1, n_features)
    # X2 shape: (n_samples_2, n_features)
    # Result shape: (n_samples_1, n_samples_2)
    return (np.dot(X1, X2.T) + c) ** degree


# -------------------------------------------------------------
# 3️⃣ Define Simplified SVM Class (From Scratch)
# -------------------------------------------------------------
class SVMPolynomialScratch:
    def __init__(self, C=1.0, degree=3, c=1, lr=0.001, epochs=500):
        self.C = C          # Regularization parameter
        self.degree = degree # Polynomial degree
        self.c = c          # Kernel constant
        self.lr = lr        # Learning rate
        self.epochs = epochs # Training iterations

    def fit(self, X, y):
        n_samples = X.shape[0]
        # Precompute Kernel Matrix for all pairs in training data
        self.K = polynomial_kernel(X, X, self.degree, self.c)
        
        # Initialize dual variables (alphas)
        self.alpha = np.zeros(n_samples)

        # Simple Gradient Descent for Dual Problem (simplified approach)
        for _ in range(self.epochs):
            for i in range(n_samples):
                # SVM decision function in dual form: sum(alpha_j * y_j * K(x_j, x_i))
                margin = y[i] * np.sum(self.alpha * y * self.K[:, i])
                
                if margin >= 1:
                     # Correctly classified outside margin -> decay alpha slightly
                    self.alpha[i] -= self.lr * self.alpha[i]
                else:
                    # Misclassified or inside margin -> increase alpha (capped by C)
                    self.alpha[i] += self.lr * (self.C - self.alpha[i])

        # Identify support vectors (points with non-zero alpha)
        idx = self.alpha > 1e-5
        self.support_vectors_ = X[idx]
        self.support_labels_ = y[idx]
        self.support_alphas_ = self.alpha[idx]

    def project(self, X):
        # For prediction, we only need the support vectors
        # K_matrix shape: (n_samples_X, n_support_vectors)
        K_matrix = polynomial_kernel(X, self.support_vectors_, self.degree, self.c)
        
        # Prediction = sum(alpha_i * y_i * K(x_i, x_new)) for all support vectors
        return np.dot(K_matrix, self.support_alphas_ * self.support_labels_)

    def predict(self, X):
        return np.sign(self.project(X))

print("✅ SVM classes defined successfully.")

✅ SVM classes defined successfully.


In [3]:
if 'X_train' in locals():
    # -------------------------------------------------------------
    # 4️⃣ Train and Evaluate Model
    # -------------------------------------------------------------
    print("Training SVM with Polynomial Kernel...")
    
    # Initialize and train
    svm_poly = SVMPolynomialScratch(C=1.0, degree=2, lr=0.001, epochs=1000)
    svm_poly.fit(X_train, y_train)
    print(f"Training complete. Found {len(svm_poly.support_alphas_)} support vectors.")

    # Predict on test set
    y_pred = svm_poly.predict(X_test)

    # Evaluation Metrics
    # pos_label=1 ensures we are looking at metrics for the "Pass" class
    precision = precision_score(y_test, y_pred, pos_label=1, zero_division=0)
    recall = recall_score(y_test, y_pred, pos_label=1, zero_division=0)
    f1 = f1_score(y_test, y_pred, pos_label=1, zero_division=0)

    # -------------------------------------------------------------
    # 5️⃣ Print Results
    # -------------------------------------------------------------
    print("\n=== Polynomial Kernel SVM (From Scratch) Results ===")
    print(f"Precision (Pass): {precision:.4f}")
    print(f"Recall (Pass):    {recall:.4f}")
    print(f"F1-Score (Pass):  {f1:.4f}")

Training SVM with Polynomial Kernel...
Training complete. Found 545 support vectors.

=== Polynomial Kernel SVM (From Scratch) Results ===
Precision (Pass): 0.7821
Recall (Pass):    0.8592
F1-Score (Pass):  0.8188
