In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# --- Cell 1: Generate Synthetic Data ---
# Create a synthetic dataset for student performance
np.random.seed(42)
data_size = 500

# Features: study time (hours/week), absences (days), internal scores (0-50)
study_time = np.random.uniform(1, 20, data_size)
absences = np.random.randint(0, 30, data_size)
internal_scores = np.random.uniform(0, 50, data_size)

# Target: Pass (1) or Fail (0)
# Create a non-linear relationship:
# High internal scores + high study time = Pass
# High absences = Fail
# (internal_scores/50 + study_time/20) - (absences/30) + noise
probability = (internal_scores/50 + study_time/20) - (absences/30) + np.random.normal(0, 0.2, data_size)
y = (probability > 0.8).astype(int) # Threshold defines pass/fail

X = pd.DataFrame({
    'study_time': study_time,
    'absences': absences,
    'internal_scores': internal_scores
})

# --- Cell 2: Preprocess Data ---

# Map labels from {0, 1} to {-1, 1} for SVM
y_svm = y.copy()
y_svm[y == 0] = -1

X_train, X_test, y_train, y_test = train_test_split(X, y_svm, test_size=0.3, random_state=42, stratify=y_svm)

# 1. Scale data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 2. Apply Polynomial Feature Expansion (degree=2)
# This explicitly creates the polynomial features (e.g., a*b, a^2, b^2)
# We then run a *linear* SVM on these *new* features.
# This is equivalent to using a polynomial kernel in the primal form.
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train_scaled)
X_test_poly = poly.transform(X_test_scaled)

# --- Cell 3: SVM "from scratch" Implementation (Sequential) ---
# This is the same linear SVM logic as before, but applied to the
# polynomial-expanded feature set (X_train_poly).

# --- Hyperparameters ---
learning_rate = 0.001
lambda_param = 0.01 # Regularization parameter
n_iters = 1000

# --- Initialization ---
n_samples, n_features = X_train_poly.shape
w = np.zeros(n_features) # Weights
b = 0 # Bias

# --- Gradient Descent Training Loop ---
for _ in range(n_iters):
    for idx, x_i in enumerate(X_train_poly):
        
        # Calculate condition: y_i * (w . x_i + b) >= 1
        condition = y_train[idx] * (np.dot(x_i, w) + b)
        
        # Hinge Loss Gradient
        if condition >= 1:
            # Correct margin
            dw = 2 * lambda_param * w
            db = 0
        else:
            # Inside margin or wrong side
            dw = 2 * lambda_param * w - y_train[idx] * x_i
            db = -y_train[idx]
        
        # Update weights and bias
        w = w - learning_rate * dw
        b = b - learning_rate * db

# --- Cell 4: Predict ---

# Calculate the linear output on the *polynomial* test features
linear_output = np.dot(X_test_poly, w) + b

# Predictions are the sign of the output
y_pred = np.sign(linear_output)

# --- Cell 5: Evaluate Performance ---

# Map labels back to {0, 1} for metrics
y_test_mapped = (y_test + 1) // 2 # {-1 -> 0, 1 -> 1}
y_pred_mapped = (y_pred + 1) // 2 # {-1 -> 0, 1 -> 1}
# Handle cases where all preds are -1 (mapped to 0)
y_pred_mapped[y_pred_mapped == -1] = 0


# --- Cell 6: Display Results ---

accuracy = accuracy_score(y_test_mapped, y_pred_mapped)
cm = confusion_matrix(y_test_mapped, y_pred_mapped)

# Calculate individual metrics for each class
precision = precision_score(y_test_mapped, y_pred_mapped, average=None, labels=[0, 1], zero_division=0)
recall = recall_score(y_test_mapped, y_pred_mapped, average=None, labels=[0, 1], zero_division=0)
f1 = f1_score(y_test_mapped, y_pred_mapped, average=None, labels=[0, 1], zero_division=0)

print("--- Model Evaluation Results (SVM From Scratch w/ Poly Features) ---")
print(f"Accuracy: {accuracy:.4f}")
print("\n--- Confusion Matrix ---")
print(cm)
print("\n--- Performance Metrics ---")

target_names = ['Fail (0)', 'Pass (1)']
print(f"{'':<10} {'precision':<10} {'recall':<10} {'f1-score':<10}")
print("-" * 40)
for i in range(len(target_names)):
    print(f"{target_names[i]:<10} {precision[i]:<10.4f} {recall[i]:<10.4f} {f1[i]:<10.4f}")

# Get metrics for the "Pass" class (label 1)
precision_pass = precision[1]
recall_pass = recall[1]
f1_pass = f1[1]

print("\n--- Key Metrics (for 'Pass' class) ---")
print(f"Precision: {precision_pass:.4f}")
print(f"Recall:    {recall_pass:.4f}")
print(f"F1-Score:  {f1_pass:.4f}")


