In [5]:
import pandas as pd
import joblib
import numpy as np
from datetime import datetime

print("="*80)
print("üè¶ CREDIT SCORING SYSTEM - XGBoost Model Demo")
print("="*80)
print(f"üìÖ Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")

üè¶ CREDIT SCORING SYSTEM - XGBoost Model Demo
üìÖ Date: 2025-11-13 01:03:14



In [6]:
# Load the trained XGBoost model and optimal threshold
print("Loading XGBoost model...")

xgb_model = joblib.load(r"C:\Users\Asus\Documents\GitHub\Credit-Scoring\output\models\lgb_model_optimized.pkl")
metadata = joblib.load(r"C:\Users\Asus\Documents\GitHub\Credit-Scoring\output\models\ensemble_comparison_metadata.pkl")

# Get XGBoost threshold
threshold = metadata['models']['xgboost']['threshold']

print("‚úÖ Model loaded successfully!")
print(f"‚úÖ Optimal threshold: {threshold:.3f}")
print(f"‚úÖ Total features: {len(xgb_model.feature_names_in_)}\n")

Loading XGBoost model...
‚úÖ Model loaded successfully!
‚úÖ Optimal threshold: 0.860
‚úÖ Total features: 64



In [7]:
# User-friendly feature configuration
FEATURE_CONFIG = [
    {
        "field": "age_years",
        "label": "üéÇ Age",
        "question": "How old is the customer?",
        "description": "Customer's age in years. Most applicants are between 25-60 years old.",
        "unit": "years",
        "range": (18, 75),
        "default": 35,
        "examples": "Example: 30, 45, 52"
    },
    {
        "field": "employment_years",
        "label": "üíº Work Experience",
        "question": "How many years has the customer been working?",
        "description": "Total years of employment (including all jobs, can be non-consecutive).",
        "unit": "years",
        "range": (0, 50),
        "default": 5,
        "examples": "Example: 3 (3 years), 10 (10 years), 0 (fresh graduate)"
    },
    {
        "field": "AMT_INCOME_TOTAL",
        "label": "üí∞ Annual Income",
        "question": "What is the customer's annual income?",
        "description": "Total yearly income from all sources (salary, business, etc.)",
        "unit": "USD",
        "range": (10000, 1000000),
        "default": 50000,
        "examples": "Example: 30000, 60000, 120000",
        "display_format": "${:,.0f}"
    },
    {
        "field": "AMT_CREDIT",
        "label": "üè¶ Requested Loan Amount",
        "question": "How much money does the customer want to borrow?",
        "description": "The total loan amount requested by the customer.",
        "unit": "USD",
        "range": (5000, 500000),
        "default": 150000,
        "examples": "Example: 50000, 150000, 300000",
        "display_format": "${:,.0f}"
    },
    {
        "field": "credit_income_ratio",
        "label": "üìä Loan-to-Income Ratio",
        "question": "Loan amount compared to annual income",
        "description": "How many times bigger is the loan compared to yearly income. Lower is better.\n"
                      "  ‚Ä¢ Below 2: Low risk (loan is 2x or less than income)\n"
                      "  ‚Ä¢ 2-5: Medium risk\n"
                      "  ‚Ä¢ Above 5: High risk (loan is much bigger than income)",
        "unit": "times",
        "range": (0, 10),
        "default": 2.5,
        "examples": "Example: 1.5 (loan = 1.5x income), 3.0 (loan = 3x income)",
        "auto_calculate": True,
        "formula": "AMT_CREDIT / AMT_INCOME_TOTAL"
    },
    {
        "field": "total_utilization",
        "label": "üí≥ Credit Usage Rate",
        "question": "How much credit is currently being used?",
        "description": "Percentage of credit limit being used across all cards/loans.\n"
                      "  ‚Ä¢ 0-30%: Excellent (low usage)\n"
                      "  ‚Ä¢ 30-50%: Good (moderate usage)\n"
                      "  ‚Ä¢ 50-80%: Fair (high usage)\n"
                      "  ‚Ä¢ 80-100%: Poor (maxed out)\n"
                      "  ‚Ä¢ >100%: Critical (over limit)",
        "unit": "%",
        "range": (0, 200),
        "default": 45,
        "examples": "Example: 30 (using 30%), 75 (using 75%), 120 (over limit by 20%)",
        "cast": lambda v: float(v) / 100,
        "display_format": "{:.0f}%"
    },
    {
        "field": "dpd_mean",
        "label": "‚è∞ Average Days Late",
        "question": "On average, how many days late are payments?",
        "description": "Average number of days customer pays AFTER the due date (last 12 months).\n"
                      "  ‚Ä¢ 0 days: Always on time (excellent)\n"
                      "  ‚Ä¢ 1-5 days: Occasionally late (good)\n"
                      "  ‚Ä¢ 6-15 days: Frequently late (fair)\n"
                      "  ‚Ä¢ 15-30 days: Very late (poor)\n"
                      "  ‚Ä¢ >30 days: Seriously delinquent (bad)",
        "unit": "days",
        "range": (0, 90),
        "default": 3,
        "examples": "Example: 0 (always on time), 5 (few days late), 30 (month late)"
    },
    {
        "field": "cc_avg_utilization",
        "label": "üí≥ Credit Card Usage",
        "question": "What percentage of credit card limit is being used?",
        "description": "How much of the credit card limit is currently spent.\n"
                      "  ‚Ä¢ 0-10%: Excellent (barely using cards)\n"
                      "  ‚Ä¢ 10-30%: Good (healthy usage)\n"
                      "  ‚Ä¢ 30-60%: Fair (moderate usage)\n"
                      "  ‚Ä¢ 60-90%: Poor (high usage)\n"
                      "  ‚Ä¢ >90%: Critical (almost maxed out)",
        "unit": "%",
        "range": (0, 100),
        "default": 42,
        "examples": "Example: 25 (using 25%), 60 (using 60%), 95 (almost full)",
        "cast": lambda v: float(v) / 100,
        "display_format": "{:.0f}%"
    },
    {
        "field": "NAME_EDUCATION_TYPE_Higher education",
        "label": "üéì Education Level",
        "question": "Does the customer have higher education (college/university)?",
        "description": "Higher education includes: Bachelor's degree, Master's, PhD, or equivalent.",
        "unit": "",
        "range": (0, 1),
        "default": 1,
        "examples": "Enter 1 for YES, 0 for NO",
        "cast": int,
        "is_binary": True
    },
    {
        "field": "NAME_INCOME_TYPE_Working",
        "label": "üíº Employment Status",
        "question": "Is the customer currently employed (working)?",
        "description": "Select YES if customer has a regular job. Select NO for self-employed, retired, student, unemployed.",
        "unit": "",
        "range": (0, 1),
        "default": 1,
        "examples": "Enter 1 for YES (employed), 0 for NO (other)",
        "cast": int,
        "is_binary": True
    },
]

print("‚úÖ Feature configuration loaded!")
print(f"‚úÖ Total questions to ask: {len([c for c in FEATURE_CONFIG if not c.get('auto_calculate')])}\n")

‚úÖ Feature configuration loaded!
‚úÖ Total questions to ask: 9



In [8]:
def ask_value(cfg: dict, context: dict = None) -> float:
    """
    Ask user for input with validation and helpful prompts
    
    Parameters:
    -----------
    cfg : dict
        Configuration dictionary for the field
    context : dict
        Previously collected values (for auto-calculation)
    
    Returns:
    --------
    float : The validated value
    """
    # Check if this field should be auto-calculated
    if cfg.get("auto_calculate") and context:
        formula = cfg.get("formula", "")
        if formula:
            try:
                # Simple formula evaluation
                if "/" in formula:
                    parts = formula.split("/")
                    numerator = context.get(parts[0].strip())
                    denominator = context.get(parts[1].strip())
                    if numerator and denominator:
                        value = numerator / denominator
                        print(f"\n{cfg['label']}")
                        print(f"  ‚ÑπÔ∏è  Auto-calculated: {value:.2f} {cfg.get('unit', '')}")
                        print(f"  üìê Formula: {formula}")
                        return value
            except:
                pass
    
    cast = cfg.get("cast", float)
    low, high = cfg.get("range", (None, None))
    default = cfg.get("default")
    unit = cfg.get("unit", "")
    is_binary = cfg.get("is_binary", False)
    
    # Build prompt
    print("\n" + "‚îÄ"*60)
    print(f"{cfg['label']}")
    print(f"‚ùì {cfg['question']}")
    
    if is_binary:
        prompt = f"Enter (1=YES, 0=NO)"
    else:
        prompt = f"Enter value"
        if unit:
            prompt += f" in {unit}"
        if low is not None and high is not None:
            prompt += f" (range: {low}-{high})"
    
    if default is not None:
        prompt += f" [default: {default}]"
    
    prompt += "\nüí° Type '?' for more info, or press Enter for default: "
    
    while True:
        raw = input(prompt).strip()
        
        # Show help
        if raw == "?":
            print("\nüìñ Help:")
            print(f"   {cfg['description']}")
            if cfg.get('examples'):
                print(f"   {cfg['examples']}")
            print()
            continue
        
        # Use default
        if raw == "":
            if default is None:
                print("‚ùå This field is required. Please enter a value or type '?' for help.")
                continue
            value = default
            print(f"‚úì Using default: {default}")
        else:
            # Parse input
            raw = raw.replace(",", "").replace("$", "").strip()
            try:
                value = cast(raw)
            except ValueError:
                print(f"‚ùå Invalid input. Please enter a valid number.")
                continue
        
        # Validate range
        if low is not None and value < low:
            print(f"‚ùå Value too low. Minimum is {low}.")
            continue
        if high is not None and value > high:
            print(f"‚ùå Value too high. Maximum is {high}.")
            continue
        
        # Display confirmation
        display_format = cfg.get("display_format", "{}")
        if is_binary:
            display_value = "YES" if value == 1 else "NO"
        else:
            display_value = display_format.format(value)
        print(f"‚úÖ Confirmed: {display_value}")
        
        return value

print("üìù User input function ready!")

üìù User input function ready!


In [9]:
# Interactive data collection
print("\n" + "="*80)
print("üìù CUSTOMER INFORMATION FORM")
print("="*80)
print("\nüí° Tips:")
print("  ‚Ä¢ Press Enter to use default values (shown in brackets)")
print("  ‚Ä¢ Type '?' at any question to see detailed explanation")
print("  ‚Ä¢ All values will be validated automatically")
print("\nLet's start!\n")

# Collect all values
values = {}
for cfg in FEATURE_CONFIG:
    field_value = ask_value(cfg, context=values)
    values[cfg["field"]] = field_value

print("\n" + "="*80)
print("‚úÖ All information collected!")
print("="*80)


üìù CUSTOMER INFORMATION FORM

üí° Tips:
  ‚Ä¢ Press Enter to use default values (shown in brackets)
  ‚Ä¢ Type '?' at any question to see detailed explanation
  ‚Ä¢ All values will be validated automatically

Let's start!


‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
üéÇ Age
‚ùì How old is the customer?
‚úÖ Confirmed: 18.0

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
üíº Work Experience
‚ùì How many years has the customer been working?
‚úÖ Confirmed: 0.0

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
üí∞ Annual Income
‚ùì What is the customer's annual income?
‚úÖ Confirmed: $20

In [13]:
# Prepare data for prediction
print("\nüîÑ Preparing data for prediction...")

# Create DataFrame with collected values
sample = pd.DataFrame([values])

# Fill missing features with 0 (XGBoost will handle missing features)
sample = sample.reindex(columns=xgb_model.feature_names_in_, fill_value=0)

print(f"‚úÖ Data prepared: {sample.shape[1]} features")
print("\nüìä Summary of input data:")
print("‚îÄ"*60)
for cfg in FEATURE_CONFIG[:7]:  # Show first 7 features
    field = cfg["field"]
    if field in values:
        display_format = cfg.get("display_format", "{}")
        if cfg.get("is_binary"):
            display_value = "YES" if values[field] == 1 else "NO"
        else:
            display_value = display_format.format(values[field])
        print(f"{cfg['label']}: {display_value}")


üîÑ Preparing data for prediction...
‚úÖ Data prepared: 64 features

üìä Summary of input data:
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
üéÇ Age: 18.0
üíº Work Experience: 0.0
üí∞ Annual Income: $20,000
üè¶ Requested Loan Amount: $40,000
üìä Loan-to-Income Ratio: 2.0
üí≥ Credit Usage Rate: 0%
‚è∞ Average Days Late: 0.0


In [14]:
# Make prediction
print("\nü§ñ Running XGBoost model prediction...")

# Get probability
prob = float(xgb_model.predict_proba(sample)[:, 1][0])

# Make decision
is_risky = prob >= threshold

# Determine risk level
if prob < 0.3:
    risk_level = "VERY LOW"
    risk_emoji = "üü¢"
    risk_color = "GREEN"
elif prob < 0.5:
    risk_level = "LOW"
    risk_emoji = "üü°"
    risk_color = "YELLOW"
elif prob < 0.7:
    risk_level = "MEDIUM"
    risk_emoji = "üü†"
    risk_color = "ORANGE"
elif prob < threshold:
    risk_level = "HIGH"
    risk_emoji = "üî¥"
    risk_color = "RED"
else:
    risk_level = "CRITICAL"
    risk_emoji = "üö®"
    risk_color = "RED"

# Display results
print("\n" + "="*80)
print("üéØ PREDICTION RESULTS")
print("="*80)

print(f"\n{risk_emoji} Default Probability: {prob:.1%}")
print(f"{risk_emoji} Risk Level: {risk_level}")
print(f"üéöÔ∏è  Decision Threshold: {threshold:.1%}")

print("\n" + "‚îÄ"*80)
print("üìã FINAL DECISION")
print("‚îÄ"*80)

if is_risky:
    print("\n‚ùå REJECT APPLICATION")
    print("\n‚ö†Ô∏è  Reasons:")
    print(f"   ‚Ä¢ Default probability ({prob:.1%}) exceeds threshold ({threshold:.1%})")
    print(f"   ‚Ä¢ Risk level: {risk_level}")
    print("\nüí° Recommendations:")
    print("   ‚Ä¢ Request additional collateral")
    print("   ‚Ä¢ Suggest a smaller loan amount")
    print("   ‚Ä¢ Consider a co-signer")
    print("   ‚Ä¢ Offer financial counseling")
else:
    print("\n‚úÖ APPROVE APPLICATION")
    print("\nüéâ Good news! This customer shows low default risk.")
    print("\nüí° Recommendations:")
    if prob < 0.3:
        print("   ‚Ä¢ Standard approval process")
        print("   ‚Ä¢ Competitive interest rate")
        print("   ‚Ä¢ Consider for premium services")
    else:
        print("   ‚Ä¢ Standard approval with monitoring")
        print("   ‚Ä¢ Set up payment reminders")
        print("   ‚Ä¢ Regular credit review")

print("\n" + "="*80)
print("üìä MODEL INFORMATION")
print("="*80)
print(f"Model: XGBoost Classifier")
print(f"Threshold: {threshold:.3f}")
print(f"Expected Precision: 20.17%")
print(f"Expected Recall: 36.56%")
print(f"ROC-AUC Score: 0.7042")
print("="*80 + "\n")


ü§ñ Running XGBoost model prediction...

üéØ PREDICTION RESULTS

üü¢ Default Probability: 12.9%
üü¢ Risk Level: VERY LOW
üéöÔ∏è  Decision Threshold: 86.0%

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
üìã FINAL DECISION
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

‚úÖ APPROVE APPLICATION

üéâ Good news! This customer shows low default risk.

üí° Recommendations:
   ‚Ä¢ Standard approval process
   ‚Ä¢ Competitive interest rate
   ‚Ä¢ Consider for premium services

üìä MODEL INFORMATION
Model: XGBoost Classifier
Threshold: 0.860
Expected Precision: 20.17%
Expected Recall: 36.56%
ROC-AUC Scor

In [12]:
# Optional: Show feature importance for this prediction
print("\nüìà TOP 10 FACTORS INFLUENCING THIS DECISION:")
print("‚îÄ"*60)

# Get feature importance from XGBoost
feature_importance = pd.DataFrame({
    'feature': xgb_model.feature_names_in_,
    'importance': xgb_model.feature_importances_
}).sort_values('importance', ascending=False)

# Map feature names to friendly labels
feature_labels = {cfg['field']: cfg['label'] for cfg in FEATURE_CONFIG}

print("\nMost important features in the model:")
for idx, row in feature_importance.head(10).iterrows():
    feature = row['feature']
    importance = row['importance']
    label = feature_labels.get(feature, feature)
    
    # Get customer's value for this feature
    customer_value = values.get(feature, sample[feature].iloc[0])
    
    print(f"{idx+1}. {label}")
    print(f"   Importance: {importance:.4f} | Your value: {customer_value:.2f}")

print("\n‚úÖ Prediction complete!\n")


üìà TOP 10 FACTORS INFLUENCING THIS DECISION:
‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

Most important features in the model:
20. üí≥ Credit Usage Rate
   Importance: 4.0000 | Your value: 0.00
13. raw_days_employed
   Importance: 3.0000 | Your value: 0.00
21. active_loans_count
   Importance: 3.0000 | Your value: 0.00
45. dpd_max
   Importance: 3.0000 | Your value: 0.00
30. üí≥ Credit Card Usage
   Importance: 2.0000 | Your value: 42.00
22. closed_loans_count
   Importance: 2.0000 | Your value: 0.00
9. raw_credit_amt
   Importance: 2.0000 | Your value: 0.00
1. üéÇ Age
   Importance: 2.0000 | Your value: 18.00
46. on_time_ratio
   Importance: 2.0000 | Your value: 0.00
11. raw_goods_price
   Importance: 1.0000 | Your value: 0.00

‚úÖ Prediction complete!

