<a href="https://colab.research.google.com/github/anshupandey/MSA-analytics/blob/main/Model_Monitoring/Lab5_Model_Monitoring_Dashboard.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Lab 5: Model Monitoring Dashboard Development
**Objective**: Create a dashboard to monitor KPIs and detect anomalies in model behavior.

This lab focuses on building a simple real-time dashboard for monitoring Accuracy, Precision, Recall, F1-Score, and PSI.

In [None]:
import pandas as pd

# Load dataset
url = "https://raw.githubusercontent.com/anshupandey/MSA-analytics/refs/heads/main/datasets/Ocean_Hull_Insurance_datasetv2.csv"
df = pd.read_csv(url)
df.head()

## Simulate Predictions and Calculate Metrics

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

X = df.drop('Claim_Occurred', axis=1)
y = df['Claim_Occurred']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

numeric_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_features = X.select_dtypes(include=['object']).columns.tolist()

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numeric_features),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
])

pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(max_iter=1000))
])

pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)

metrics_dict = {
    'Accuracy': accuracy_score(y_test, y_pred),
    'Precision': precision_score(y_test, y_pred),
    'Recall': recall_score(y_test, y_pred),
    'F1-Score': f1_score(y_test, y_pred)
}

import pandas as pd
metrics_df = pd.DataFrame.from_dict(metrics_dict, orient='index', columns=['Value'])
metrics_df

## Compute PSI for Monitoring Feature Drift

In [None]:
import numpy as np

def calculate_psi(expected, actual, buckets=10):
    def scale_range(series, buckets):
        return pd.qcut(series.rank(method='first'), buckets, labels=False, duplicates='drop')

    if expected.dtype.name == 'category' or expected.dtype == 'object':
        expected_dist = expected.value_counts(normalize=True)
        actual_dist = actual.value_counts(normalize=True)
        all_categories = set(expected_dist.index).union(actual_dist.index)
        psi_val = 0
        for cat in all_categories:
            e_perc = expected_dist.get(cat, 0.0001)
            a_perc = actual_dist.get(cat, 0.0001)
            psi_val += (e_perc - a_perc) * np.log(e_perc / a_perc)
    else:
        expected_bins = scale_range(expected, buckets)
        actual_bins = scale_range(actual, buckets)
        expected_perc = pd.Series(expected_bins).value_counts(normalize=True)
        actual_perc = pd.Series(actual_bins).value_counts(normalize=True)
        psi_val = 0
        for b in range(buckets):
            e_perc = expected_perc.get(b, 0.0001)
            a_perc = actual_perc.get(b, 0.0001)
            psi_val += (e_perc - a_perc) * np.log(e_perc / a_perc)
    return psi_val

psi_results = []
split_index = int(len(df) * 0.7)
df_train = df.iloc[:split_index]
df_current = df.iloc[split_index:]

for col in df.columns:
    if col == 'Claim_Occurred':
        continue
    psi_val = calculate_psi(df_train[col], df_current[col])
    psi_results.append({'Feature': col, 'PSI': psi_val})

psi_df = pd.DataFrame(psi_results).sort_values(by='PSI', ascending=False)
psi_df

## Simulate Dashboard Output

In [None]:
import matplotlib.pyplot as plt

# Plot metric scores
metrics_df.plot(kind='barh', legend=False)
plt.title("Model Evaluation Metrics")
plt.xlabel("Score")
plt.grid(True)
plt.show()

# Plot PSI values
psi_df.set_index('Feature').plot(kind='barh', legend=False)
plt.title("Feature Stability via PSI")
plt.xlabel("PSI Value")
plt.grid(True)
plt.show()

## Simulated Alerts for KPI Deviations
Based on PSI > 0.2 or F1-Score < 0.7

In [None]:
alerts = []

if metrics_dict['F1-Score'] < 0.7:
    alerts.append("⚠️ F1-Score below threshold!")

for _, row in psi_df.iterrows():
    if row['PSI'] > 0.2:
        alerts.append(f"⚠️ High PSI detected in {row['Feature']} (PSI = {row['PSI']:.3f})")

alerts if alerts else ["✅ All metrics are within acceptable range."]