In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
data = pd.read_csv("student.csv")

# Print the first 5 rows of the dataset
print("First 5 rows of the dataset:\n")
print(data.head())

# Select relevant numeric features
features = ['Study_Hours_per_Week', 'Attendance_Rate', 'Internal_Scores']
target = 'Pass_Fail'

# Encode target variable: Pass=1, Fail=-1
data[target] = data[target].map({'Pass': 1, 'Fail': -1})

# Extract feature matrix and labels
X = data[features].values
y = data[target].values

# Normalize features
X = (X - X.mean(axis=0)) / X.std(axis=0)

# Train-test split (80-20)
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# Define polynomial kernel
def polynomial_kernel(x1, x2, degree=3, coef0=1):
    return (np.dot(x1, x2) + coef0) ** degree

# SVM with Polynomial Kernel (from scratch, no sklearn)
class SVM_Polynomial:
    def __init__(self, C=1.0, degree=3, lr=0.001, epochs=1000):
        self.C = C
        self.degree = degree
        self.lr = lr
        self.epochs = epochs

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.alpha = np.zeros(n_samples)
        self.b = 0
        self.X = X
        self.y = y

        # Compute Gram matrix (Kernel Matrix)
        self.K = np.zeros((n_samples, n_samples))
        for i in range(n_samples):
            for j in range(n_samples):
                self.K[i, j] = polynomial_kernel(X[i], X[j], self.degree)

        # Gradient-based training
        for _ in range(self.epochs):
            for i in range(n_samples):
                condition = y[i] * (np.sum(self.alpha * y * self.K[:, i]) + self.b) < 1
                if condition:
                    self.alpha[i] += self.lr * (1 - y[i] * (np.sum(self.alpha * y * self.K[:, i]) + self.b))
                else:
                    self.alpha[i] -= self.lr * self.C * self.alpha[i]
            self.b += self.lr * np.sum(y - np.sum(self.alpha * y * self.K, axis=0))

    def project(self, X):
        y_predict = np.zeros(X.shape[0])
        for i in range(X.shape[0]):
            s = 0
            for alpha, y_i, x_i in zip(self.alpha, self.y, self.X):
                if alpha > 1e-6:
                    s += alpha * y_i * polynomial_kernel(X[i], x_i, self.degree)
            y_predict[i] = s
        return y_predict + self.b

    def predict(self, X):
        return np.sign(self.project(X))

# Train the SVM model
svm_poly = SVM_Polynomial(C=1.0, degree=3, lr=0.001, epochs=500)
svm_poly.fit(X_train, y_train)

# Predict on test set
y_pred = svm_poly.predict(X_test)

# Evaluation metrics
TP = np.sum((y_test == 1) & (y_pred == 1))
TN = np.sum((y_test == -1) & (y_pred == -1))
FP = np.sum((y_test == -1) & (y_pred == 1))
FN = np.sum((y_test == 1) & (y_pred == -1))

precision = TP / (TP + FP + 1e-6)
recall = TP / (TP + FN + 1e-6)
f1 = 2 * precision * recall / (precision + recall + 1e-6)

print("\nConfusion Matrix:")
print(f"TP: {TP}, FP: {FP}")
print(f"FN: {FN}, TN: {TN}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

# Confusion Matrix Visualization
cm = np.array([[TP, FP],
               [FN, TN]])

plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Predicted Pass', 'Predicted Fail'],
            yticklabels=['Actual Pass', 'Actual Fail'])
plt.title("Confusion Matrix - SVM with Polynomial Kernel")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

FileNotFoundError: [Errno 2] No such file or directory: 'cancer.csv'