In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ks_2samp
from IPython.display import display
import os
import sys
import pickle

import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
from sklearn.model_selection import train_test_split, cross_val_score, learning_curve
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score, log_loss, roc_curve, precision_recall_curve, confusion_matrix, classification_report, average_precision_score
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE

# binning
try:
    from optbinning import OptimalBinning
except:
    ! pip install optbinning
    from optbinning import OptimalBinning

# silence warnings
import warnings
warnings.filterwarnings('ignore')


from Stored_Functions_and_Params.impute_using_bounds import ImputeUsingBounds
from Stored_Functions_and_Params.data_imputation import DataImputation
from Stored_Functions_and_Params.woe_transformer import WoETransformer
from Stored_Functions_and_Params.model_scorer import ModelScorer


(CVXPY) Feb 12 04:58:09 PM: Encountered unexpected exception importing solver GLOP:
RuntimeError('Unrecognized new version of ortools (9.11.4210). Expected < 9.10.0. Please open a feature request on cvxpy to enable support for this version.')
(CVXPY) Feb 12 04:58:09 PM: Encountered unexpected exception importing solver PDLP:
RuntimeError('Unrecognized new version of ortools (9.11.4210). Expected < 9.10.0. Please open a feature request on cvxpy to enable support for this version.')


In [2]:
# File path

file_path = r'M:/Risk Management/DW/Scorecard/Capacity Model/01_Input_Files/df_test_raw.csv'

In [3]:
# Read in test file

df_test = pd.read_csv(file_path, low_memory=False)

## EDA - Reduce Data Frame to Initial Informative Features

In [4]:
# Specify columns to drop on df subset

cols_to_keep = [
    'BIGACCOUNTID',
    'APPLICATIONDATE',
    'SUM_OF_COMBINED_INCOME',
    'PAYMENT',
    'INT_N_EMPS',
    'PTI',
    'LTV',
    'FLTADVANCE',
    'TOTAL_INCOME',
    'FLTDOWNCASH',
    'VEHICLEYEAR',
    'BIGMILEAGE_ODOMETER',
    'DTI',
    'NUMOFDEBTS',
    'BITTARGET24MONTHS',

    'INQUIRYBANKING12MONTH',
    'INQUIRYAUTO12MONTH',
    'INQUIRYCOLLECTIONS12MONTH',
    'ADDRCHANGECOUNT06MONTH',
    'ADDRCHANGECOUNT12MONTH',
    'ADDRCHANGECOUNT24MONTH',
    'ADDRCHANGECOUNT60MONTH',
    'ADDRCURRENTLENGTHOFRES',
    'ADDRCURRENTTIMENEWEST',
    'ADDRCURRENTTIMEOLDEST',
    'ADDRINPUTTIMENEWEST',
    'ADDRINPUTTIMEOLDEST',
    'ADDRINPUTLENGTHOFRES',
    'ADDRPREVIOUSLENGTHOFRES',
    'CONFIRMATIONINPUTADDRESS',
    'ADDRINPUTSUBJECTCOUNT',
    'EVICTIONCOUNT',
    'ADDRSTABILITYINDEX',
    'ADDRONFILECOUNT',

    'ADDRINPUTMATCHINDEX',
    'ADDRINPUTOWNERSHIPINDEX',
    'ASSETPROP'
    
]


In [5]:
# Subset df to cols to keep

df_test = df_test[cols_to_keep]

#### Impute using bounds function

In [6]:
# Load and using the class function

bounds = {"TOTAL_INCOME": (2150, 15000, 2150)}
imputer = ImputeUsingBounds(bounds)
df_test = imputer.process(df_test)


Processing column: TOTAL_INCOME


#### Special Value Imputation

In [7]:
# Example usage
imputer = DataImputation()
df_test = imputer.process(df_test)

Replaced special values [-1] in column 'INQUIRYBANKING12MONTH' with NaN.
Replaced special values [-1] in column 'INQUIRYAUTO12MONTH' with NaN.
Replaced special values [-1] in column 'ADDRCHANGECOUNT24MONTH' with NaN.
Replaced special values [-1] in column 'ADDRCHANGECOUNT60MONTH' with NaN.
Replaced special values [-1] in column 'ADDRINPUTLENGTHOFRES' with NaN.
Replaced special values [-1] in column 'ADDRPREVIOUSLENGTHOFRES' with NaN.
Replaced special values [-1] in column 'CONFIRMATIONINPUTADDRESS' with NaN.
Replaced special values [-1] in column 'ADDRINPUTMATCHINDEX' with NaN.
Replaced special values [-1] in column 'ADDRINPUTOWNERSHIPINDEX' with NaN.
Replaced special values [-1] in column 'ASSETPROP' with NaN.
Imputed 85 NaNs in column 'INQUIRYBANKING12MONTH' using strategy 'constant'.
Imputed 85 NaNs in column 'INQUIRYAUTO12MONTH' using strategy 'constant'.
Imputed 85 NaNs in column 'ADDRCHANGECOUNT24MONTH' using strategy 'max' with value 4.0.
Imputed 85 NaNs in column 'ADDRCHANGECOU

## Optimal Binning, WOE & IV Scores

In [8]:
 list_vars_reduced = [
     'TOTAL_INCOME',
     'ADDRCHANGECOUNT24MONTH',
     'ADDRCHANGECOUNT60MONTH',
     'ADDRINPUTLENGTHOFRES',
     'ADDRPREVIOUSLENGTHOFRES',
     'CONFIRMATIONINPUTADDRESS',

     'ADDRINPUTMATCHINDEX',
     'ADDRINPUTOWNERSHIPINDEX',
     'ASSETPROP'
]

In [9]:
# Load the pickled binning models
with open(r'M:/Risk Management/DW/Scorecard/Capacity Model/Stored_Functions_and_Params/binning_models.pkl', 'rb') as f:
    binning_models = pickle.load(f)

# Initialize the transformer
woe_transformer = WoETransformer(binning_models)

# Apply WoE transformation on unseen data
for var in list_vars_reduced:
    print(f"Applying WoE transformation for variable '{var}' on unseen data:")
    df_test = woe_transformer.apply_woe_transform(df_test, var)
    woe_transformer.print_binning_table(var)


Applying WoE transformation for variable 'TOTAL_INCOME' on unseen data:
Binning Table for TOTAL_INCOME:
                       Bin  Count  Count (%)  Non-event  Event  Event rate
0          (-inf, 2183.93)   2493   0.088282       1847    646    0.259126
1       [2183.93, 3468.97)   9189   0.325401       7145   2044    0.222440
2       [3468.97, 3773.05)   2134   0.075569       1668    466    0.218369
3       [3773.05, 4988.94)   7302   0.258579       5878   1424    0.195015
4       [4988.94, 5620.52)   2408   0.085272       1948    460    0.191030
5       [5620.52, 7400.54)   3228   0.114310       2619    609    0.188662
6           [7400.54, inf)   1485   0.052587       1274    211    0.142088
7                  Special      0   0.000000          0      0    0.000000
8                  Missing      0   0.000000          0      0    0.000000
Totals                      28239   1.000000      22379   5860    0.207514
Applying WoE transformation for variable 'ADDRCHANGECOUNT24MONTH' on un

In [10]:
# Load the trained logistic regression model and scaling values from v4
with open(r'M:/Risk Management/DW/Scorecard/Capacity Model/Stored_Functions_and_Params/logreg_model.pkl', 'rb') as f:
    model_data = pickle.load(f)  # Load dictionary

# Extract components correctly
logreg = model_data['model']  # This is the actual trained model
min_score_train = model_data['min_score_train']  # Min score from training
max_score_train = model_data['max_score_train']  # Max score from training



In [11]:
# Define the selected WoE features (must match v4)
features = [f"{var}_woe" for var in list_vars_reduced]

# Apply the WoE transformation
for var in list_vars_reduced:
    df_test = woe_transformer.apply_woe_transform(df_test, var)  # Ensure WoE is applied


In [12]:
# Initialize ModelScorer with the trained model and features
scorer = ModelScorer(logreg, features)

# Ensure that the min/max scaling values are set for unseen data scoring
scorer.min_score_train = min_score_train
scorer.max_score_train = max_score_train


In [13]:
# # Compute scores for test data
# df_test['Total_Score_Scaled'] = scorer.calculate_scores(df_test)

# # Print first few rows to verify scores
# print("Test Dataset with Scaled and Rounded Total Scores:")
# print(df_test[['Total_Score_Scaled']].head())


In [16]:
# Compute feature scores and total score for test data
df_test = scorer.calculate_feature_scores(df_test)

# Print first few rows to verify
# Print first few rows to verify
print("Test Dataset with Scaled and Rounded Feature Scores:")
print(df_test[['Total_Score_Scaled', 'Sum_Feature_Scores'] + [f"{var}_woe_score" for var in list_vars_reduced]].head())


Test Dataset with Scaled and Rounded Feature Scores:
   Total_Score_Scaled  Sum_Feature_Scores  TOTAL_INCOME_woe_score  \
0                 700         6090.002544                      78   
1                 698         6328.311104                      86   
2                 756         6228.023706                      96   
3                 714         6223.910639                     103   
4                 743         6737.427543                      78   

   ADDRCHANGECOUNT24MONTH_woe_score  ADDRCHANGECOUNT60MONTH_woe_score  \
0                                57                                82   
1                                55                                55   
2                                61                                61   
3                                57                                57   
4                                55                                99   

   ADDRINPUTLENGTHOFRES_woe_score  ADDRPREVIOUSLENGTHOFRES_woe_score  \
0                    

In [17]:
# Define export file paths
# csv_output_path = r'M:/Risk Management/DW/Scorecard/Capacity Model/02_Output_Files/test_data_with_scores.csv'
excel_output_path = r'M:/Risk Management/DW/Scorecard/Capacity Model/02_Output_Files/test_data_v5_with_scores.xlsx'

# Save as CSV
# df_test.to_csv(csv_output_path, index=False)
# print(f"Test data with scores saved to: {csv_output_path}")

# Save as Excel
df_test.to_excel(excel_output_path, index=False)
print(f"Test data with scores saved to: {excel_output_path}")


Test data with scores saved to: M:/Risk Management/DW/Scorecard/Capacity Model/02_Output_Files/test_data_v5_with_scores.xlsx


In [None]:
# Baseline model: 
#     Logistic Regression with regularization parameter of C=0.1
#     Baseline metrics: 
#     AUC: Train = .6493, Valid = .5897, Test = .5746
#     LogLoss: Train = .6551, Valid = .6409, Test = .6289
#     Classification Report Poor recall and F2 for defaults. Indicating imbalance
#     KS: Train = .7623, Valid = .7141, Test = .7455


# Steps taken trying to improve model performance:
# 1. Ran multiple model variations applying various feature combinations, iteratively removing features based on low IV scores
# 2. Added class weight balancing to penalize incorrect positives (defaults) and L2
# 3. Reduced regularization strength from 0.1 to 0.05 to mitigate overfitting
#     Result: Regularization change had negligible impact on AUC (Train: 0.6490, Validation: 0.5904, Test: 0.5743)
#     KS, Log Loss remained stable with only minor fluctuations within expected variance.
#     This suggests regularization changes had no impact on improving model performance and may be due to features
# 4. Added SMOTE to further balance class weights, equalizing num of pos. and neg. samples in training data. 
#     This balanced the class distribution from ~76:24 to 50:50
#     Result: Slight decrease in AUC (Train: 0.6483, Validation: 0.5898, Test: 0.5723). 
#     Decrease in LogLoss (Train: 0.6552, Validation: 0.6405, Test: 0.6303).
#     KS changes showed no meaningful improvement
#     Negligible changes to precision recall. The decrease in AUC and and precision recall
#     scores may be due to noise introduced by SMOTE. Persistent poor recall and F1 indicate SMOTE did not crete informative (syntehtic) samples that 
#     improved predictiveness.
# 5. Threshold tuning: Adjusted the classification threshold to improve recall for defaults. 
#     Result: Recall improved but at the cost of worsening precision. Precision dropped significantly. F1 marginally improved for defaults 
#     due to higher recall, but overall performance remained poor.
#     Threshold tuning appeared to work shifting focus to recall, but the trade off in precision and F1 worsened model predictiveness.

# Consistently low AUC and precision recall across all data sets indicate weak separatability between classes. Model struggles to achieve high recall 
# for defaults, even after threshold tuning and SMOTE. No meaningful changes improved performance, indicating features likely are too weak to predict on.


In [None]:
# Initial feature list:
#     'SUM_OF_COMBINED_INCOME',
#     'INT_N_EMPS',
#     'PTI',
#     'VEHICLEYEAR',
#     'LTV',
#     'FLTADVANCE',
#     'BIGMILEAGE_ODOMETER',
#     'INT_N_EMPS',
#     'TOTAL_INCOME',
#     'FLTDOWNCASH',
#     'DTI',
#     'NUMOFDEBTS',
#     'INQUIRYBANKING12MONTH',
#     'INQUIRYAUTO12MONTH',
#     'ADDRCHANGECOUNT12MONTH',
#     'ADDRCHANGECOUNT24MONTH',
#     'ADDRCHANGECOUNT60MONTH',
#     'ADDRCURRENTLENGTHOFRES',
#     'ADDRCURRENTTIMEOLDEST',
#     'ADDRINPUTLENGTHOFRES',
#     'ADDRPREVIOUSLENGTHOFRES',
#     'ADDRINPUTSUBJECTCOUNT',
#     'ADDRSTABILITYINDEX',
#     'ADDRONFILECOUNT',
#     'CONFIRMATIONINPUTADDRESS',
#     'ADDRINPUTTIMENEWEST',
#     'ADDRINPUTTIMEOLDEST',
#     'ADDRCURRENTTIMENEWEST',
#     'EVICTIONCOUNT',
#     'INQUIRYCOLLECTIONS12MONTH',
#     'ADDRCHANGECOUNT06MONTH'

# 2nd model iteration: Removed several application and collateral features (VEHICLEYEAR, SUM_OF_COMBINED_INCOME, INT_N_EMPS, PTI).
# Decision was made to remove some structural features. 

# 3rd model iteration: Removed LN features (ADDRCURRENTTIMEOLDEST, ADDRCHANGECOUNT12MONTH, CONFIRMATIONINPUTADDRESS, 
#                     ADDRINPUTTIMENEWEST, ADDRINPUTTIMEOLDEST, ADDRCURRENTTIMENEWEST, ADDRINPUTSUBJECTCOUNT, ADDRCHANGECOUNT06MONTH)
# Removed additional features for explainability and simplification. 
# Features removed were either deemed as redundant or not informative enough to retain. Model performance sub-optimal, auc range .60-.63 range. 
# False positive recall ranged in the 30-40% area

# 4th model iteration: Removed features (BIGMILEAGE_ODOMETER, FLTADVANCE, LTV, ADDRPREVIOUSLENGTHOFRES, INQUIRYCOLLECTIONS12MONTH, EVICTIONCOUNT)
# Additionally removed any remaining structural features per Steve. Performance and predicability dropped. Further iterated through random feature 
# selection and model fitting trying to increase performance. AUC remained .56-.59 on valid and test sets. False positive recall still the same

# 5-6th model iterations: Reran switching out various LN features.
# Added class weight balancing, increased penalization for incorrect classifications, added SMOTE to help with overfitting/class balance. 
# AUC remained in the same range, slightly improved but false positives on the default class stayed the same

# 7-8th model iteration (current) feature list: 
# (TOTAL_INCOME, FLTDOWNCASH, DTI, INQUIRYBANKING12MONTH, ADDRCHANGECOUNT24MONTH, ADDRCHANGECOUNT60MONTH, ADDRCURRENTLENGTHOFRES, ADDRINPUTLENGTHOFRES)
# Continued refitting model, performance metrics improved slightly (auc in the low 6's). False positives ranged 25-35%
# Standardized overall score output to match TUX model score range

# 9-11th model iterations: 
# feature list (TOTAL_INCOME, DTI, INQUIRYBANKING12MONTH, ADDRCHANGECOUNT24MONTH, ADDRCHANGECOUNT60MONTH, ADDRINPUTLENGTHOFRES, 
# ADDRPREVIOUSLENGTHOFRES, CONFIRMATIONINPUTADDRESS). 
# Continued training model on tighter penalization, balancing parameters. Reduced regularization for overfitting. Performance metrics did not improve. 
# Changed imputation on Total_Income from mode (3120) to min income value (2150). 
# Done to resolve WOE binning issues for imputed population. Bin created non-linear woe value and small bin range. 
# Result - auc ranged .60-.64, false positive recall remained the same. 

# 12th iteration:
# Feature list TOTAL_INCOME(with new imputation values), DTI_Eng, INQUIRYBANKING12MONTH, ADDRCHANGECOUNT24MONTH, ADDRCHANGECOUNT60MONTH, 
# ADDRINPUTLENGTHOFRES, ADDRPREVIOUSLENGTHOFRES, CONFIRMATIONINPUTADDRESS
# Resampled model features including EVICTIONCOUNT, ADDRCURRENTLENGTHOFRES, INQUIRYAUTO12MONTH, ADDRESSINPUTSUBJECTCOUNT
# Engineered DTI removing our payment from the tempstatic DTI value in data set. Eng_DTI only looking at debts on cbr that we used
# model performance suffered. AUC .56-.62 range. 
# Refit and calibrated to test possible over-penalization and class balancing. Model performance did not improve 

# 13th-14th Iteration:
# Imputed DTI_Eng outliers to 0.30. Model performance stayed the same as 12th
# Features added: ADDRCURRENTDWELLTYPE, ADDRCURRENTOWNERSHIPINDEX, ADDRINPUTMATCHINDEX, ADDRINPUTOWNERSHIPINDEX, ADDRINPUTPHONECOUNT, ADDRINPUTPROBLEMS,
#          ASSETPROP, SHORTTERMLOANREQUEST12MONTH, SHORTTERMLOANREQUEST24MONTH
# AUC stayed the same through both runs (mid-high .50s). Accuracy and precision recall remained insufficient.




In [None]:
# Build code in v5 to incorporate pickled functions and replicable code for integration

# update model output to include logodds score and adjusted logodds as needed
