In [4]:
import time
import uuid
import numpy as np
import pandas as pd
import json
import requests
from evidently.report import Report
from evidently.metric_preset import DataDriftPreset
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, matthews_corrcoef, balanced_accuracy_score, jaccard_score, cohen_kappa_score

class AIModelProcessor:
    def __init__(self, model, model_version="1.0", model_type="Model", training_data=None, column_names=None):
        self.model = model
        self.model_type = model_type
        self.model_version = model_version
        self.training_data = training_data
        self.column_names = column_names
        self.x_train = None
        self.y_train = None
        self.training_data = None
        if training_data:
            self.update_training_data(*training_data, column_names=column_names)

    def update_training_data(self, x_train, y_train, column_names=None):
        self.x_train = x_train
        self.y_train = y_train
        self.training_data = pd.DataFrame(x_train, columns=column_names)
        self.training_data["target"] = y_train
        self.column_names = column_names
        print("Training data updated successfully.")

    def process_agent_data(self, input_data, decision_outcome, user_feedback=None):
        event_id = f"EVT-{uuid.uuid4().hex[:8]}"
        agent_data = {
            "Model": self.model,
            "Timestamp": time.time(),
            "Event ID": event_id,
            "AI Model Version": self.model_version,
            "Model Type": self.model_type,
            "Input Data": input_data,
            "Decision Outcome": decision_outcome,
            "User Feedback": user_feedback,
        }
        return self.analyze_agent_data(agent_data)

    def analyze_agent_data(self, agent_data, actual_model, reference_data):
        model_performance_metrics = {
            "accuracy": actual_model.score(agent_data["Input Data"], agent_data["Decision Outcome"]),
            "precision": precision_score(agent_data["Decision Outcome"]),
            "recall": recall_score(agent_data["Decision Outcome"])
        }
        
        test_data = pd.DataFrame(agent_data["Input Data"], columns=self.column_names)
        test_data["target"] = agent_data["Decision Outcome"]
        drift_report = self.evidentlyAi(test_data)
        
        processed_data = {
            "Alert Severity": "High" if model_performance_metrics["accuracy"] < 90 else "Low",
            "Model Performance Metrics": model_performance_metrics,
            "Decision Rationale": "Placeholder or explanation based on model interpretation methods.",
            "AI Model Drift": drift_report,
            "Data Drift Report": drift_report
        }
    
        return processed_data
    
    def evidentlyAi(self, test_data):
        if isinstance(test_data, np.ndarray):
            test_data = pd.DataFrame(test_data, columns=self.column_names)
    
        if 'target' in test_data.columns:
            test_data = test_data.drop(columns=['target'])
        
        if 'target' in self.training_data.columns:
            self.training_data = self.training_data.drop(columns=['target'])
    
        report = Report(metrics=[DataDriftPreset()])
        report.run(current_data=test_data, reference_data=self.training_data)
        return report.json()

    def calculate_metrics(self, y_true, y_pred, y_prob):
        metrics = {
            "Accuracy": accuracy_score(y_true, y_pred),
            "Precision": precision_score(y_true, y_pred, average="weighted"),
            "Recall": recall_score(y_true, y_pred, average="weighted"),
            "F1 Score": f1_score(y_true, y_pred, average="weighted"),
            "AUC-ROC": roc_auc_score(y_true, y_prob[:, 1], multi_class="ovr"),
            "Matthews Correlation Coefficient": matthews_corrcoef(y_true, y_pred),
            "Balanced Accuracy": balanced_accuracy_score(y_true, y_pred),
            "Jaccard Index": jaccard_score(y_true, y_pred, average="weighted"),
            "Cohen's Kappa": cohen_kappa_score(y_true, y_pred)
        }
        print("Metrics calculated:", metrics)
        return metrics

    def cross_validation_metrics(self, X, y):
        cross_val_scores = cross_val_score(self.model, X, y, cv=5, scoring="accuracy")
        print(f"Cross-validation scores: {cross_val_scores}")
        print(f"Mean cross-validation score: {np.mean(cross_val_scores)}")
        return cross_val_scores

    def save_log(self, log_data, log_file="model_logs.json"):
        timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
        log_entry = {
            "Timestamp": timestamp,
            "Log Data": log_data
        }

        try:
            with open(log_file, 'a') as f:
                json.dump(log_entry, f, indent=4)
                f.write(",\n")
        except Exception as e:
            print(f"Error saving log to file: {e}")

# Example Usage
def train_and_process_model(model, X_train, y_train, X_test, y_test, column_names):
    processor = AIModelProcessor(model=model)
    processor.update_training_data(X_train, y_train, column_names=column_names)

    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)
    metrics = processor.calculate_metrics(y_test, y_pred, y_prob)

    cross_val_scores = processor.cross_validation_metrics(X_train, y_train)

    log_data = {
        "Model Metrics": metrics,
        "Cross Validation Scores": cross_val_scores.tolist(),
        "Data Drift Report": processor.evidentlyAi(X_test),
    }
    processor.save_log(log_data)

# Example dataset loading
file_path = 'loan_data.csv'
df = pd.read_csv(file_path)
X = df.drop(['loan_status'], axis=1)
y = df["loan_status"]

for col in X.columns:
    if X[col].dtype == 'object':
        X[col] = X[col].astype('category').cat.codes

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train, y_train)

train_and_process_model(model, X_train, y_train, X_test, y_test, list(X.columns))

FileNotFoundError: [Errno 2] No such file or directory: 'loan_data.csv'