<a href="https://colab.research.google.com/github/inderpreetsingh01/ml_machine_coding/blob/main/kfold_cross_validation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np

def k_fold_split(X, y, k=5, shuffle=True, seed=None):
    """
    Splits data into k folds
    """
    n_samples = len(X)
    indices = np.arange(n_samples)

    if shuffle:
        rng = np.random.default_rng(seed)
        rng.shuffle(indices)

    fold_sizes = np.full(k, n_samples // k, dtype=int)
    fold_sizes[:n_samples % k] += 1  # distribute remainder
    current = 0
    folds = []
    for fold_size in fold_sizes:
        start, stop = current, current + fold_size
        folds.append(indices[start:stop])
        current = stop
    return folds


def k_fold_cross_validation(model_class, X, y, k=5, metric="accuracy", **model_params):
    """
    Perform K-Fold Cross Validation
    - model_class: class of model (must implement fit, predict)
    - X: features (numpy array)
    - y: labels
    - k: number of folds
    - metric: "accuracy", "f1", "precision", "recall"
    - model_params: params to initialize the model
    """
    folds = k_fold_split(X, y, k)
    scores = []

    for i in range(k):
        # Prepare train/validation split
        val_idx = folds[i]
        train_idx = np.hstack([folds[j] for j in range(k) if j != i])
        X_train, y_train = X[train_idx], y[train_idx]
        X_val, y_val = X[val_idx], y[val_idx]

        # Train model
        model = model_class(**model_params)
        model.fit(X_train, y_train)

        # Predictions
        y_pred = model.predict(X_val)

        # Metric
        if metric == "accuracy":
            score = np.mean(y_pred == y_val)
        else:
            raise NotImplementedError(f"Metric {metric} not yet implemented")
        scores.append(score)
        print(f"Fold {i+1}/{k}: {metric} = {score:.4f}")

    print(f"Mean {metric}: {np.mean(scores):.4f}")
    return scores

In [4]:
# Simple Logistic Regression using numpy
class LogisticRegression:
    def __init__(self, lr=0.1, n_iter=1000):
        self.lr = lr
        self.n_iter = n_iter

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iter):
            linear_model = np.dot(X, self.weights) + self.bias
            y_pred = self._sigmoid(linear_model)

            # Gradient descent
            dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / n_samples) * np.sum(y_pred - y)

            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_pred = self._sigmoid(linear_model)
        return np.where(y_pred >= 0.5, 1, 0)

In [5]:
# Dummy dataset
X = np.array([[0],[1],[2],[3],[4],[5],[6],[7],[8],[9]])
y = np.array([0,0,0,0,1,1,1,1,1,1])

# Run 5-fold CV
scores = k_fold_cross_validation(LogisticRegression, X, y, k=5, metric="accuracy", lr=0.1, n_iter=1000)

Fold 1/5: accuracy = 1.0000
Fold 2/5: accuracy = 0.5000
Fold 3/5: accuracy = 1.0000
Fold 4/5: accuracy = 1.0000
Fold 5/5: accuracy = 1.0000
Mean accuracy: 0.9000


In [9]:
# ===== Classification Metrics =====
def accuracy_score(y_true, y_pred):
    return np.mean(y_true == y_pred)

def precision_score(y_true, y_pred):
    tp = np.sum((y_true == 1) & (y_pred == 1))
    fp = np.sum((y_true == 0) & (y_pred == 1))
    return tp / (tp + fp + 1e-10)

def recall_score(y_true, y_pred):
    tp = np.sum((y_true == 1) & (y_pred == 1))
    fn = np.sum((y_true == 1) & (y_pred == 0))
    return tp / (tp + fn + 1e-10)

def f1_score(y_true, y_pred):
    p = precision_score(y_true, y_pred)
    r = recall_score(y_true, y_pred)
    return 2 * p * r / (p + r + 1e-10)

def roc_auc_score(y_true, y_prob):
    """Compute ROC-AUC using trapezoidal integration"""
    desc_score_indices = np.argsort(-y_prob)
    y_true = y_true[desc_score_indices]

    tpr = []
    fpr = []
    P = np.sum(y_true == 1)
    N = np.sum(y_true == 0)
    tp, fp = 0, 0

    for i in range(len(y_true)):
        if y_true[i] == 1:
            tp += 1
        else:
            fp += 1
        tpr.append(tp / P)
        fpr.append(fp / N)

    return np.trapz(np.array(tpr), np.array(fpr))

def top_k_accuracy_score(y_true, y_pred_probs, k=3):
    top_k_preds = np.argsort(y_pred_probs, axis=1)[:, -k:]
    hits = np.array([y_true[i] in top_k_preds[i] for i in range(len(y_true))])
    return np.mean(hits)


# ===== Regression Metrics =====
def mse(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

def mae(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))

def r2_score(y_true, y_pred):
    ss_res = np.sum((y_true - y_pred) ** 2)
    ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
    return 1 - ss_res / (ss_tot + 1e-10)

In [7]:
def k_fold_split(X, y, k=5, shuffle=True, seed=None):
    n_samples = len(X)
    indices = np.arange(n_samples)

    if shuffle:
        rng = np.random.default_rng(seed)
        rng.shuffle(indices)

    fold_sizes = np.full(k, n_samples // k, dtype=int)
    fold_sizes[:n_samples % k] += 1
    folds, current = [], 0
    for fold_size in fold_sizes:
        start, stop = current, current + fold_size
        folds.append(indices[start:stop])
        current = stop
    return folds


def k_fold_cross_validation(model_class, X, y, k=5, metric=accuracy_score, **model_params):
    """
    - model_class: must implement fit(), predict(), optionally predict_proba()
    - metric: metric function
    - Works for classification & regression
    """
    folds = k_fold_split(X, y, k)
    scores = []

    for i in range(k):
        val_idx = folds[i]
        train_idx = np.hstack([folds[j] for j in range(k) if j != i])
        X_train, y_train = X[train_idx], y[train_idx]
        X_val, y_val = X[val_idx], y[val_idx]

        # Train model
        model = model_class(**model_params)
        model.fit(X_train, y_train)

        # Predictions
        if metric == roc_auc_score or metric == top_k_accuracy_score:
            if hasattr(model, "predict_proba"):
                y_pred = model.predict_proba(X_val)
            else:
                raise ValueError("Model must implement predict_proba for ROC-AUC/Top-K")
        else:
            y_pred = model.predict(X_val)

        # Compute metric
        if metric == top_k_accuracy_score:
            score = metric(y_val, y_pred, k=3)
        elif metric == roc_auc_score:
            score = metric(y_val, y_pred[:, 1])
        else:
            score = metric(y_val, y_pred)

        scores.append(score)
        print(f"Fold {i+1}/{k}: {metric.__name__} = {score:.4f}")

    print(f"Mean {metric.__name__}: {np.mean(scores):.4f}")
    return scores

In [8]:
# Dummy dataset
X = np.array([[0],[1],[2],[3],[4],[5],[6],[7],[8],[9]])
y = np.array([0,0,0,0,1,1,1,1,1,1])

class LogisticRegression:
    def __init__(self, lr=0.1, n_iter=1000):
        self.lr = lr
        self.n_iter = n_iter

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iter):
            linear_model = np.dot(X, self.weights) + self.bias
            y_pred = self._sigmoid(linear_model)

            dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / n_samples) * np.sum(y_pred - y)

            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_pred = self._sigmoid(linear_model)
        return np.where(y_pred >= 0.5, 1, 0)

    def predict_proba(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        return np.column_stack([1 - self._sigmoid(linear_model), self._sigmoid(linear_model)])


# Run CV with multiple metrics
print("=== Accuracy ===")
k_fold_cross_validation(LogisticRegression, X, y, k=5, metric=accuracy_score, lr=0.1, n_iter=1000)

print("\n=== F1 ===")
k_fold_cross_validation(LogisticRegression, X, y, k=5, metric=f1_score, lr=0.1, n_iter=1000)

print("\n=== ROC-AUC ===")
k_fold_cross_validation(LogisticRegression, X, y, k=5, metric=roc_auc_score, lr=0.1, n_iter=1000)

=== Accuracy ===
Fold 1/5: accuracy_score = 0.5000
Fold 2/5: accuracy_score = 1.0000
Fold 3/5: accuracy_score = 1.0000
Fold 4/5: accuracy_score = 1.0000
Fold 5/5: accuracy_score = 1.0000
Mean accuracy_score: 0.9000

=== F1 ===
Fold 1/5: f1_score = 1.0000
Fold 2/5: f1_score = 0.0000
Fold 3/5: f1_score = 1.0000
Fold 4/5: f1_score = 0.6667
Fold 5/5: f1_score = 0.6667
Mean f1_score: 0.6667

=== ROC-AUC ===


  fpr.append(fp / N)
  auc = np.trapz(tpr, fpr)


Fold 1/5: roc_auc_score = nan
Fold 2/5: roc_auc_score = 1.0000
Fold 3/5: roc_auc_score = nan
Fold 4/5: roc_auc_score = 1.0000
Fold 5/5: roc_auc_score = nan
Mean roc_auc_score: nan


  tpr.append(tp / P)


[np.float64(nan),
 np.float64(1.0),
 np.float64(nan),
 np.float64(1.0),
 np.float64(nan)]

In [10]:
class LinearRegression:
    def __init__(self, lr=0.01, n_iter=1000):
        self.lr = lr
        self.n_iter = n_iter

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iter):
            y_pred = np.dot(X, self.weights) + self.bias
            dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / n_samples) * np.sum(y_pred - y)
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias


# Regression CV
X = np.linspace(0, 10, 20).reshape(-1, 1)
y = 3 * X.flatten() + 5 + np.random.randn(20)  # noisy linear relation

print("\n=== Linear Regression (MSE) ===")
k_fold_cross_validation(LinearRegression, X, y, k=5, metric=mse, lr=0.01, n_iter=1000)

print("\n=== Linear Regression (R2) ===")
k_fold_cross_validation(LinearRegression, X, y, k=5, metric=r2_score, lr=0.01, n_iter=1000)


=== Linear Regression (MSE) ===
Fold 1/5: mse = 2.0243
Fold 2/5: mse = 1.8645
Fold 3/5: mse = 0.3516
Fold 4/5: mse = 0.1207
Fold 5/5: mse = 1.9784
Mean mse: 1.2679

=== Linear Regression (R2) ===
Fold 1/5: r2_score = 0.8521
Fold 2/5: r2_score = 0.9912
Fold 3/5: r2_score = 0.9834
Fold 4/5: r2_score = 0.9688
Fold 5/5: r2_score = 0.9878
Mean r2_score: 0.9566


[np.float64(0.8520538085845042),
 np.float64(0.9911521411476494),
 np.float64(0.9833944545571156),
 np.float64(0.9687506065233892),
 np.float64(0.9877911154860132)]