In [2]:
import onnxruntime as ort
import numpy as np
import pandas as pd
import joblib
from lime.lime_tabular import LimeTabularExplainer

# Load the training columns (make sure 'training_columns.pkl' exists in the directory)
training_columns = joblib.load("training_columns.pkl")

# Define a sample input for testing
new_data_default_risk = {
    'AMT_INCOME_TOTAL': 25000,
    'AMT_CREDIT': 8000,
    'AMT_ANNUITY': 2000,
    'AMT_GOODS_PRICE': 5000,
    'DAYS_BIRTH': -16000,
    'DAYS_EMPLOYED': 0,
    'REGION_POPULATION_RELATIVE': 0.04,
    'CNT_FAM_MEMBERS': 4,
    'FLAG_MOBIL': 1,
    'FLAG_EMAIL': 1,
    'FLAG_WORK_PHONE': 0,
    'NAME_INCOME_TYPE_Working': 0,
    'NAME_INCOME_TYPE_Unemployed': 1,
    'NAME_EDUCATION_TYPE_Higher_education': 0,
    'NAME_EDUCATION_TYPE_Secondary_education': 1,
    'NAME_FAMILY_STATUS_Married': 0,
    'NAME_FAMILY_STATUS_Single': 1,
    'NAME_HOUSING_TYPE_House_apartment': 1,
    'NAME_HOUSING_TYPE_With_parents': 0,
    'OCCUPATION_TYPE_Laborers': 1,
    'OCCUPATION_TYPE_Sales_staff': 0
}

# Convert the dictionary to a DataFrame and reindex to match the training columns
test_data = pd.DataFrame([new_data_default_risk])
test_data = test_data.reindex(columns=training_columns, fill_value=0)

# Load your saved preprocessor
best_model = joblib.load("best_model.joblib")
preprocessor = best_model.named_steps['preprocessor']

# Preprocess the test data
test_data_processed = preprocessor.transform(test_data).astype(np.float32)

# Load the ONNX model
onnx_model_path = "models/1/xgb_classifier.onnx"  # Ensure this file path is correct
session = ort.InferenceSession(onnx_model_path)

# Prepare input data in the required format for ONNX (numpy array)
input_name = session.get_inputs()[0].name
test_data_array = test_data_processed

# Run the model and get the output
prediction = session.run(None, {input_name: test_data_array})[0]

# Display the prediction result
predicted_class = "Default" if prediction[0] == 1 else "Non-default"
print(f"Predicted Class: {predicted_class}")
print(f"Prediction Probability: {prediction}")

Predicted Class: Non-default
Prediction Probability: [0]


In [21]:
import pandas as pd
import joblib

# Define a sample input for testing
new_data_default_risk = {
    'AMT_INCOME_TOTAL': 25000,
    'AMT_CREDIT': 8000,
    'AMT_ANNUITY': 2000,
    'AMT_GOODS_PRICE': 5000,
    'DAYS_BIRTH': -16000,
    'DAYS_EMPLOYED': 0,
    'REGION_POPULATION_RELATIVE': 0.04,
    'CNT_FAM_MEMBERS': 4,
    'FLAG_MOBIL': 1,
    'FLAG_EMAIL': 1,
    'FLAG_WORK_PHONE': 0,
    'NAME_INCOME_TYPE_Working': 0,
    'NAME_INCOME_TYPE_Unemployed': 1,
    'NAME_EDUCATION_TYPE_Higher_education': 0,
    'NAME_EDUCATION_TYPE_Secondary_education': 1,
    'NAME_FAMILY_STATUS_Married': 0,
    'NAME_FAMILY_STATUS_Single': 1,
    'NAME_HOUSING_TYPE_House_apartment': 1,
    'NAME_HOUSING_TYPE_With_parents': 0,
    'OCCUPATION_TYPE_Laborers': 1,
    'OCCUPATION_TYPE_Sales_staff': 0
}

# Load the trained pipeline model (make sure 'best_model.joblib' exists in the directory)
best_model = joblib.load("best_model.joblib")

# Load the training data structure for column alignment
training_columns = joblib.load("training_columns.pkl")

# Create a DataFrame from the input data
new_data_df = pd.DataFrame([new_data_default_risk], columns=training_columns)

# Define a function to preprocess and predict new input data
def preprocess_and_predict(new_data, model, X_train_columns):
    """
    Preprocess the input data and predict the class and probabilities.

    Args:
    - new_data (dict): Input data for prediction.
    - model (Pipeline): Trained pipeline containing preprocessor and classifier.
    - X_train_columns (list): Column names used during training.

    Returns:
    - prediction (int): Predicted class (0 or 1).
    - prediction_proba (list): Probabilities for each class.
    """
    # Ensure new_data is a DataFrame and matches the format used during training
    new_data_df = pd.DataFrame([new_data], columns=X_train_columns)

    # Preprocess the input data (e.g., scaling and encoding using the fitted pipeline)
    processed_data = model.named_steps['preprocessor'].transform(new_data_df)

    # Make a prediction using the trained model
    prediction = model.named_steps['classifier'].predict(processed_data)
    prediction_proba = model.named_steps['classifier'].predict_proba(processed_data)

    # Output the prediction and probability
    predicted_class = "Default" if prediction[0] == 1 else "Non-default"
    probability_class_0 = prediction_proba[0][0]
    probability_class_1 = prediction_proba[0][1]

    print(f"Predicted Class: {predicted_class}")
    print(f"Probability of Non-default (class 0): {probability_class_0:.4f}")
    print(f"Probability of Default (class 1): {probability_class_1:.4f}")

    return prediction[0], prediction_proba[0]

# Call the function to predict
predicted_class, prediction_proba = preprocess_and_predict(new_data_default_risk, best_model, training_columns)


Predicted Class: Non-default
Probability of Non-default (class 0): 0.7245
Probability of Default (class 1): 0.2755


In [9]:
def classify_credit_score(probability_class_0):
    """
    Classify the probability of class 0 into credit score categories.

    Args:
    - probability_class_0 (float): Probability of class 0 (Non-default).

    Returns:
    - credit_score (str): The credit score category.
    """
    if 0.9889 <= probability_class_0 <= 1.0:
        return "Excellent"
    elif 0.9811 <= probability_class_0 < 0.9889:
        return "Excellent"
    elif 0.9729 <= probability_class_0 < 0.9811:
        return "Good"
    elif 0.9633 <= probability_class_0 < 0.9729:
        return "Good"
    elif 0.9513 <= probability_class_0 < 0.9633:
        return "Good"
    elif 0.9364 <= probability_class_0 < 0.9513:
        return "Fair"
    elif 0.9154 <= probability_class_0 < 0.9364:
        return "Fair"
    elif 0.8818 <= probability_class_0 < 0.9154:
        return "Poor"
    elif 0.8158 <= probability_class_0 < 0.8818:
        return "Very Poor"
    elif 0.1435 <= probability_class_0 < 0.8158:
        return "Very Poor"
    else:
        return "Uncategorized"

# Example: Predict and classify the credit score
predicted_class, prediction_proba = preprocess_and_predict(new_data_default_risk, best_model, training_columns)

# Extract the probability for class 0
probability_class_0 = prediction_proba[0]

# Classify the credit score
credit_score = classify_credit_score(probability_class_0)

# Output the credit score classification
print(f"Credit Score Category: {credit_score}")

Predicted Class: Non-default
Probability of Non-default (class 0): 0.7186
Probability of Default (class 1): 0.2814
Credit Score Category: Very Poor


In [10]:
import pandas as pd
import joblib

# Define a sample input for testing
new_data_default_risk = {
    'AMT_INCOME_TOTAL': 150000,            # Higher income
    'AMT_CREDIT': 200000,                  # Lower credit amount
    'AMT_ANNUITY': 10000,                  # Lower annuity relative to income
    'AMT_GOODS_PRICE': 180000,             # Lower goods price relative to income
    'DAYS_BIRTH': -12000,                  # Middle-aged
    'DAYS_EMPLOYED': -3000,                # Long-term employment
    'REGION_POPULATION_RELATIVE': 0.02,    # Less densely populated area
    'CNT_FAM_MEMBERS': 2,                  # Smaller family size
    'FLAG_MOBIL': 1,                       # Owns a mobile phone
    'FLAG_EMAIL': 1,                       # Has an email
    'FLAG_WORK_PHONE': 1,                  # Has a work phone
    # Encoded categorical features based on a lower-risk profile
    'NAME_INCOME_TYPE_Working': 1,
    'NAME_INCOME_TYPE_Unemployed': 0,
    'NAME_EDUCATION_TYPE_Higher_education': 1,
    'NAME_EDUCATION_TYPE_Secondary_education': 0,
    'NAME_FAMILY_STATUS_Married': 1,
    'NAME_FAMILY_STATUS_Single': 0,
    'NAME_HOUSING_TYPE_House_apartment': 1,
    'NAME_HOUSING_TYPE_With_parents': 0,
    'OCCUPATION_TYPE_Laborers': 0,
    'OCCUPATION_TYPE_Sales_staff': 1,
}

# Load the trained pipeline model (make sure 'best_model.joblib' exists in the directory)
best_model = joblib.load("best_model.joblib")

# Load the training data structure for column alignment
training_columns = joblib.load("training_columns.pkl")

# Create a DataFrame from the input data
new_data_df = pd.DataFrame([new_data_default_risk], columns=training_columns)

# Define a function to preprocess and predict new input data
def preprocess_and_predict(new_data, model, X_train_columns):
    """
    Preprocess the input data and predict the class and probabilities.

    Args:
    - new_data (dict): Input data for prediction.
    - model (Pipeline): Trained pipeline containing preprocessor and classifier.
    - X_train_columns (list): Column names used during training.

    Returns:
    - prediction (int): Predicted class (0 or 1).
    - prediction_proba (list): Probabilities for each class.
    """
    # Ensure new_data is a DataFrame and matches the format used during training
    new_data_df = pd.DataFrame([new_data], columns=X_train_columns)

    # Preprocess the input data (e.g., scaling and encoding using the fitted pipeline)
    processed_data = model.named_steps['preprocessor'].transform(new_data_df)

    # Make a prediction using the trained model
    prediction = model.named_steps['classifier'].predict(processed_data)
    prediction_proba = model.named_steps['classifier'].predict_proba(processed_data)

    # Output the prediction and probability
    predicted_class = "Default" if prediction[0] == 1 else "Non-default"
    probability_class_0 = prediction_proba[0][0]
    probability_class_1 = prediction_proba[0][1]

    print(f"Predicted Class: {predicted_class}")
    print(f"Probability of Non-default (class 0): {probability_class_0:.4f}")
    print(f"Probability of Default (class 1): {probability_class_1:.4f}")

    return prediction[0], prediction_proba[0]

# Call the function to predict
predicted_class, prediction_proba = preprocess_and_predict(new_data_default_risk, best_model, training_columns)


Predicted Class: Non-default
Probability of Non-default (class 0): 0.9497
Probability of Default (class 1): 0.0503


In [11]:
# Example: Predict and classify the credit score
predicted_class, prediction_proba = preprocess_and_predict(new_data_default_risk, best_model, training_columns)

# Extract the probability for class 0
probability_class_0 = prediction_proba[0]

# Classify the credit score
credit_score = classify_credit_score(probability_class_0)

# Output the credit score classification
print(f"Credit Score Category: {credit_score}")

Predicted Class: Non-default
Probability of Non-default (class 0): 0.9497
Probability of Default (class 1): 0.0503
Credit Score Category: Fair


This one is for defaulters

In [20]:
import onnxruntime as ort
import numpy as np
import pandas as pd
import joblib
from lime.lime_tabular import LimeTabularExplainer

# Load the training columns (make sure 'training_columns.pkl' exists in the directory)
training_columns = joblib.load("training_columns.pkl")

# Define a default-risk scenario input for testing
new_data_default_risk = {
    'AMT_INCOME_TOTAL': 10000,  # Extremely low income
    'AMT_CREDIT': 200000,  # Extremely high credit amount
    'AMT_ANNUITY': 180000,  # Very high annuity amount
    'AMT_GOODS_PRICE': 135000,  # Very high goods price
    'DAYS_BIRTH': -29200,  # Age: ~82 years
    'DAYS_EMPLOYED': 0,  # Unemployed
    'REGION_POPULATION_RELATIVE': 0.01,  # Extremely sparsely populated region
    'CNT_FAM_MEMBERS': 8,  # Very large family size
    'FLAG_MOBIL': 1,  # Has a mobile phone
    'FLAG_EMAIL': 0,  # No email
    'FLAG_WORK_PHONE': 0,  # No work phone
    'NAME_INCOME_TYPE_Working': 1,  # Not Working
    'NAME_INCOME_TYPE_Unemployed': 0,  # Unemployed
    'NAME_EDUCATION_TYPE_Higher_education': 0,  # No higher education
    'NAME_EDUCATION_TYPE_Secondary_education': 1,  # Secondary education
    'NAME_FAMILY_STATUS_Married': 1,  # Not married
    'NAME_FAMILY_STATUS_Single': 0,  # Single
    'NAME_HOUSING_TYPE_House_apartment': 0,  # Doesn't own a house/apartment
    'NAME_HOUSING_TYPE_With_parents': 1,  # Lives with parents
    'OCCUPATION_TYPE_Laborers': 1,  # Laborer
    'OCCUPATION_TYPE_Sales_staff': 0  # Not Sales staff
}

# Convert the dictionary to a DataFrame and reindex to match the training columns
test_data = pd.DataFrame([new_data_default_risk])
test_data = test_data.reindex(columns=training_columns, fill_value=0)

# Load your saved preprocessor
best_model = joblib.load("best_model.joblib")
preprocessor = best_model.named_steps['preprocessor']

# Preprocess the test data
test_data_processed = preprocessor.transform(test_data).astype(np.float32)

# Load the ONNX model
onnx_model_path = "models/1/xgb_classifier.onnx"  # Ensure this file path is correct
session = ort.InferenceSession(onnx_model_path)

# Prepare input data in the required format for ONNX (numpy array)
input_name = session.get_inputs()[0].name
test_data_array = test_data_processed

# Run the model and get the output
prediction = session.run(None, {input_name: test_data_array})[0]

# Display the prediction result
predicted_class = "Default" if prediction[0] == 1 else "Non-default"
print(f"Predicted Class: {predicted_class}")
print(f"Prediction Probability: {prediction}")


Predicted Class: Default
Prediction Probability: [1]


In [22]:
import pandas as pd
import joblib

# Define a sample input for testing
new_data_default_risk = {
    'AMT_INCOME_TOTAL': 10000,  # Extremely low income
    'AMT_CREDIT': 200000,  # Extremely high credit amount
    'AMT_ANNUITY': 180000,  # Very high annuity amount
    'AMT_GOODS_PRICE': 135000,  # Very high goods price
    'DAYS_BIRTH': -29200,  # Age: ~82 years
    'DAYS_EMPLOYED': 0,  # Unemployed
    'REGION_POPULATION_RELATIVE': 0.01,  # Extremely sparsely populated region
    'CNT_FAM_MEMBERS': 8,  # Very large family size
    'FLAG_MOBIL': 1,  # Has a mobile phone
    'FLAG_EMAIL': 0,  # No email
    'FLAG_WORK_PHONE': 0,  # No work phone
    'NAME_INCOME_TYPE_Working': 1,  # Not Working
    'NAME_INCOME_TYPE_Unemployed': 0,  # Unemployed
    'NAME_EDUCATION_TYPE_Higher_education': 0,  # No higher education
    'NAME_EDUCATION_TYPE_Secondary_education': 1,  # Secondary education
    'NAME_FAMILY_STATUS_Married': 1,  # Not married
    'NAME_FAMILY_STATUS_Single': 0,  # Single
    'NAME_HOUSING_TYPE_House_apartment': 0,  # Doesn't own a house/apartment
    'NAME_HOUSING_TYPE_With_parents': 1,  # Lives with parents
    'OCCUPATION_TYPE_Laborers': 1,  # Laborer
    'OCCUPATION_TYPE_Sales_staff': 0  # Not Sales staff
}

# Load the trained pipeline model (make sure 'best_model.joblib' exists in the directory)
best_model = joblib.load("best_model.joblib")

# Load the training data structure for column alignment
training_columns = joblib.load("training_columns.pkl")

# Create a DataFrame from the input data
new_data_df = pd.DataFrame([new_data_default_risk], columns=training_columns)

# Define a function to preprocess and predict new input data
def preprocess_and_predict(new_data, model, X_train_columns):
    """
    Preprocess the input data and predict the class and probabilities.

    Args:
    - new_data (dict): Input data for prediction.
    - model (Pipeline): Trained pipeline containing preprocessor and classifier.
    - X_train_columns (list): Column names used during training.

    Returns:
    - prediction (int): Predicted class (0 or 1).
    - prediction_proba (list): Probabilities for each class.
    """
    # Ensure new_data is a DataFrame and matches the format used during training
    new_data_df = pd.DataFrame([new_data], columns=X_train_columns)

    # Preprocess the input data (e.g., scaling and encoding using the fitted pipeline)
    processed_data = model.named_steps['preprocessor'].transform(new_data_df)

    # Make a prediction using the trained model
    prediction = model.named_steps['classifier'].predict(processed_data)
    prediction_proba = model.named_steps['classifier'].predict_proba(processed_data)

    # Output the prediction and probability
    predicted_class = "Default" if prediction[0] == 1 else "Non-default"
    probability_class_0 = prediction_proba[0][0]
    probability_class_1 = prediction_proba[0][1]

    print(f"Predicted Class: {predicted_class}")
    print(f"Probability of Non-default (class 0): {probability_class_0:.4f}")
    print(f"Probability of Default (class 1): {probability_class_1:.4f}")

    return prediction[0], prediction_proba[0]

# Call the function to predict
predicted_class, prediction_proba = preprocess_and_predict(new_data_default_risk, best_model, training_columns)


Predicted Class: Default
Probability of Non-default (class 0): 0.0597
Probability of Default (class 1): 0.9403


In [2]:
!pip install onnxruntime

Collecting onnxruntime
  Obtaining dependency information for onnxruntime from https://files.pythonhosted.org/packages/11/ac/4120dfb74c8e45cce1c664fc7f7ce010edd587ba67ac41489f7432eb9381/onnxruntime-1.20.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata
  Downloading onnxruntime-1.20.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting coloredlogs (from onnxruntime)
  Obtaining dependency information for coloredlogs from https://files.pythonhosted.org/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl.metadata
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting flatbuffers (from onnxruntime)
  Obtaining dependency information for flatbuffers from https://files.pythonhosted.org/packages/fb/b4/31c461eef98b96b8ab736d97274548eaf2b2e349bf09e4de3902f7d53084/flatbuffers-24.12.23-py2.py3-none-any.whl.metadata
  Downloading flatbuffers-24.12.23

In [4]:
!pip install lime

Collecting lime
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting scikit-image>=0.12 (from lime)
  Obtaining dependency information for scikit-image>=0.12 from https://files.pythonhosted.org/packages/35/e8/67e4bd1c5f6c4cd0f53505ebb9eb15f143d6fed1fb4938b542013fa3ec25/scikit_image-0.25.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Downloading scikit_image-0.25.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (14 kB)
Collecting imageio!=2.35.0,>=2.33 (from scikit-image>=0.12->lime)
  Obtaining dependency information for imageio!=2.35.0,>=2.33 from https://files.pythonhosted.org/packages/5c/f9/f78e7f5ac8077c481bf6b43b8bc736605363034b3d5eb3ce8eb79f53f5f1/imageio-2.36.1-py3-none-any.whl.metadata
  Downloading imageio-2.36.1-py3-none-any.whl.metadata (5.2 kB)
Collecting t