In [1]:
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

# Load model and data
best_model = joblib.load('models/random_forest_model.pkl')
X_test = pd.read_csv('data/processed/X_test.csv')
y_test = pd.read_csv('data/processed/y_test.csv').iloc[:, 0]

y_pred = best_model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = cm.ravel()

# Business calculations
avg_loan_amount = 25000000  # ₹2.5 Cr
default_rate = 0.05
loss_per_default = avg_loan_amount * 0.70
opportunity_cost = avg_loan_amount * 0.02
processing_savings = len(y_test) * 450

potential_loss = fp * default_rate * loss_per_default
opportunity_loss = fn * opportunity_cost
net_benefit = processing_savings - potential_loss - opportunity_loss

print("BUSINESS IMPACT ANALYSIS")
print(f"Model Accuracy: 99.65%")
print(f"Correctly classified: {tp + tn}/{len(y_test)}")
print(f"Potential bad loan losses: ₹{potential_loss:,.0f}")
print(f"Missed opportunities: ₹{opportunity_loss:,.0f}")
print(f"Processing cost savings: ₹{processing_savings:,.0f}")
print(f"Net business benefit: ₹{net_benefit:,.0f}")

BUSINESS IMPACT ANALYSIS
Model Accuracy: 99.65%
Correctly classified: 851/854
Potential bad loan losses: ₹0
Missed opportunities: ₹1,500,000
Processing cost savings: ₹384,300
Net business benefit: ₹-1,115,700
