# Cell 1 - Markdown
"""
# ⚙️ 06 – Governance Pipeline: Risk & Compliance for ML

This notebook outlines a simplified, modular AI governance process for credit risk ML models.  
It includes model metadata logging, risk classification, bias detection, explainability and basic audit traceability.

These principles support regulatory compliance and internal accountability (e.g. under EU AI Act, BaFin, or ISO 42001).
"""

In [1]:
# Cell 2 - Code - Imports
import pandas as pd
import os
os.chdir("..")

from src.data_loader import load_and_preprocess_data
from src.model_trainer import train_model
from src.risk_assessment import classify_model_risk, detect_bias
from src.governance import log_model_metadata, generate_audit_entry

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Cell 3 - Code - Load data and train model with dtype fix for integer columns

import pandas as pd
import numpy as np

columns = ["Age", "Sex", "Job", "Housing", "Saving accounts", "Checking account", "Credit amount", "Duration", "Purpose"]

X_train_np, X_test_np, y_train_np, y_test_np = load_and_preprocess_data()

X_train = pd.DataFrame(X_train_np, columns=columns)
X_test = pd.DataFrame(X_test_np, columns=columns)

# Convert all integer columns in X_train and X_test to float64
for df in [X_train, X_test]:
    int_cols = df.select_dtypes(include=['int32', 'int64']).columns
    for col in int_cols:
        df[col] = df[col].astype(np.float64)

# Convert y_train and y_test to Series if not already, and convert to float64 if int
y_train = pd.Series(y_train_np)
y_test = pd.Series(y_test_np)

if y_train.dtype in ['int32', 'int64']:
    y_train = y_train.astype(np.float64)
if y_test.dtype in ['int32', 'int64']:
    y_test = y_test.astype(np.float64)

model = train_model(X_train, y_train)

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 789.10it/s]


# Cell 4 - Markdown - Log Model Metadata (Traceability)
"""
### 1. Log Model Metadata (Traceability)
"""

In [3]:
# Cell 5 - Code - Log key metadata for governance report
log_model_metadata(
    model_name="RandomForestClassifier",
    version="1.0",
    data_source="German Credit Risk",
    owner="ML Security Analyst",
    purpose="Credit risk scoring"
)

Model metadata logged.


# Cell 6 - Markdown - Classify Model Risk
"""
### 2. Classify Model Risk (e.g. per EU AI Act)
"""

In [4]:
# Cell 7 - Code - Risk classification
risk_level = classify_model_risk(
    purpose="credit_scoring",
    sensitive_features=["Sex", "Age"]
)
print("Risk classification:", risk_level)


Risk classification: High Risk (EU AI Act – Annex III)


# Cell 8 - Markdown - Fairness & Bias Detection
"""
### 3. Fairness & Bias Detection (e.g. demographic parity)
"""

In [5]:
# Cell 8 - Code - Convert X_test to DataFrame
import pandas as pd

columns = ["Age", "Sex", "Job", "Housing", "Saving accounts", "Checking account", "Credit amount", "Duration", "Purpose"]
X_test_df = pd.DataFrame(X_test, columns=columns)

In [6]:
# Cell 9 - Code - Bias detection report
# Ensure y_test is a pandas Series with the same index as X_test_df
y_test_series = pd.Series(y_test, index=X_test_df.index)

# Remove rows with NaN values in either y_test_series or any column in X_test_df
mask = y_test_series.notna() & X_test_df.notna().all(axis=1)

# Filter X_test_df and y_test_series with the mask to keep only clean rows
X_test_clean = X_test_df[mask]
y_test_clean = y_test_series[mask]

# Call detect_bias with cleaned data
bias_report = detect_bias(
    X_test_clean,
    y_test_clean,
    sensitive_feature="Sex",
    model=model
)

bias_report

{np.float64(0.6702800625998365): 0.625, np.float64(-1.4919136877222166): 0.556}

# Cell 10 - Markdown - Create audit entry
"""
### 4. Create audit entry  
Generate an auditable JSON log that can be stored in GRC tools.
"""

In [7]:
# Cell 11 - Code - Generate audit entry
generate_audit_entry(
    model_name="RandomForestClassifier",
    risk_level=risk_level,
    bias=bias_report
)

Audit entry generated.


# Cell 12 - Markdown - Summary
"""
## Summary

- Logged metadata for traceability  
- Classified model risk based on usage & sensitivity  
- Detected potential bias based on demographic group  
- Generated audit entry for review/compliance  

This pipeline demonstrates a governance layer on top of any AI system – especially valuable in finance and regulated industries.
"""