In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score
from sklearn.preprocessing import StandardScaler
from scipy.stats import ks_2samp
import unittest

# ---------------- Sample Data Simulation ----------------

def generate_data(n_samples=100, drift=False, anomaly=False):
    np.random.seed(42)
    X = np.random.normal(loc=0.0, scale=1.0, size=(n_samples, 3))
    y = (X[:, 0] + X[:, 1] > 0).astype(int)

    if drift:
        X += np.random.normal(loc=2.0, scale=0.5, size=X.shape)  # simulate drift

    if anomaly:
        X[:5] = X[:5] + np.random.normal(loc=10.0, scale=5.0, size=X[:5].shape)  # simulate anomalies

    return pd.DataFrame(X, columns=['f1', 'f2', 'f3']), pd.Series(y, name="target")

# ------------------ Model Training ----------------------

def train_model(X_train, y_train):
    try:
        model = RandomForestClassifier(random_state=42)
        model.fit(X_train, y_train)
        return model
    except Exception as e:
        print(f"Model training error: {e}")
        return None

# ------------ 1. Model Performance Drift Monitoring ------------

def monitor_model_performance(model, X_prod, y_prod, baseline_metrics, threshold=0.05):
    try:
        y_pred = model.predict(X_prod)
        accuracy = accuracy_score(y_prod, y_pred)
        precision = precision_score(y_prod, y_pred)

        print(f"Prod Accuracy: {accuracy:.2f}, Baseline: {baseline_metrics['accuracy']:.2f}")
        print(f"Prod Precision: {precision:.2f}, Baseline: {baseline_metrics['precision']:.2f}")

        if abs(baseline_metrics['accuracy'] - accuracy) > threshold or \
           abs(baseline_metrics['precision'] - precision) > threshold:
            print("⚠️ Model performance drift detected.")
            return False
        return True
    except Exception as e:
        print(f"Error monitoring model performance: {e}")
        return False

# ----------- 2. Feature Distribution Drift Monitoring -----------

def monitor_feature_drift(X_train, X_prod, p_value_threshold=0.05):
    try:
        drift_detected = False
        for column in X_train.columns:
            stat, p_value = ks_2samp(X_train[column], X_prod[column])
            print(f"{column} KS test p-value: {p_value:.4f}")
            if p_value < p_value_threshold:
                print(f"⚠️ Drift detected in feature '{column}'")
                drift_detected = True
        return not drift_detected
    except Exception as e:
        print(f"Error monitoring feature drift: {e}")
        return False

# ----------- 3. Anomaly Detection in Predictions ----------------

def detect_anomalies(X_prod, model, threshold=3.0):
    try:
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X_prod)
        z_scores = np.abs(X_scaled)
        anomalies = (z_scores > threshold).any(axis=1)

        print(f"Detected {anomalies.sum()} anomalies.")
        return anomalies
    except Exception as e:
        print(f"Error in anomaly detection: {e}")
        return pd.Series([False] * len(X_prod))


# ---------------------- Unit Tests ----------------------

class TestMonitoringPipeline(unittest.TestCase):

    def setUp(self):
        self.X_train, self.y_train = generate_data()
        self.X_prod, self.y_prod = generate_data(drift=True, anomaly=True)
        self.model = train_model(self.X_train, self.y_train)
        self.baseline_metrics = {
            'accuracy': accuracy_score(self.y_train, self.model.predict(self.X_train)),
            'precision': precision_score(self.y_train, self.model.predict(self.X_train))
        }

    def test_performance_drift(self):
        result = monitor_model_performance(self.model, self.X_prod, self.y_prod, self.baseline_metrics)
        self.assertIsInstance(result, bool)

    def test_feature_drift(self):
        result = monitor_feature_drift(self.X_train, self.X_prod)
        self.assertIsInstance(result, bool)

    def test_anomaly_detection(self):
        anomalies = detect_anomalies(self.X_prod, self.model)
        self.assertEqual(len(anomalies), len(self.X_prod))
        self.assertTrue(anomalies.dtype == bool)


# ------------------------ Run Pipeline ------------------------

if __name__ == "__main__":
    print("---- ML Monitoring Pipeline Execution ----")

    X_train, y_train = generate_data()
    X_prod, y_prod = generate_data(drift=True, anomaly=True)
    model = train_model(X_train, y_train)

    baseline_metrics = {
        'accuracy': accuracy_score(y_train, model.predict(X_train)),
        'precision': precision_score(y_train, model.predict(X_train))
    }

    print("\n1️⃣ Checking Model Performance Drift:")
    monitor_model_performance(model, X_prod, y_prod, baseline_metrics)

    print("\n2️⃣ Checking Feature Distribution Drift:")
    monitor_feature_drift(X_train, X_prod)

    print("\n3️⃣ Checking Anomalies in Predictions:")
    anomalies = detect_anomalies(X_prod, model)

    print("\n---- Running Unit Tests ----")
    unittest.main(argv=['first-arg-is-ignored'], exit=False)


---- ML Monitoring Pipeline Execution ----

1️⃣ Checking Model Performance Drift:
Prod Accuracy: 0.43, Baseline: 1.00
Prod Precision: 0.43, Baseline: 1.00
⚠️ Model performance drift detected.

2️⃣ Checking Feature Distribution Drift:
f1 KS test p-value: 0.0000
⚠️ Drift detected in feature 'f1'
f2 KS test p-value: 0.0000
⚠️ Drift detected in feature 'f2'
f3 KS test p-value: 0.0000
⚠️ Drift detected in feature 'f3'

3️⃣ Checking Anomalies in Predictions:
Detected 4 anomalies.

---- Running Unit Tests ----


..

Detected 4 anomalies.
f1 KS test p-value: 0.0000
⚠️ Drift detected in feature 'f1'
f2 KS test p-value: 0.0000
⚠️ Drift detected in feature 'f2'
f3 KS test p-value: 0.0000
⚠️ Drift detected in feature 'f3'


.
----------------------------------------------------------------------
Ran 3 tests in 0.508s

OK


Prod Accuracy: 0.43, Baseline: 1.00
Prod Precision: 0.43, Baseline: 1.00
⚠️ Model performance drift detected.
