## Importing Datasets

In [259]:
# Importing libraries
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [261]:
# Loading the dataset
data = pd.read_csv("dataset/loan_data.csv")
original_data = data.copy()
data["Default"] = data["Default"].astype(bool)

In [262]:
# Analysing the dataset
print(f"DATA PREVIEW: \n{data.head()}\n")

# Handling Missing Values
print(f"MISSING VALUES: \n{data.isnull().sum()}\n")
print(f"DATATYPES: \n{data.dtypes}\n")
print(f"CLASS BALANCE: \n{data["LoanApprovalStatus"].value_counts(normalize = True)}\n")

DATA PREVIEW: 
       LoanID  Age  Income  LoanAmount  CreditScore  MonthsEmployed  \
0  I38PQUQS96   56   85994       50587          520              80   
1  HPSK72WA7R   69   50432      124440          458              15   
2  C1OZ6DPJ8Y   46   84208      129188          451              26   
3  V2KKSFM3UN   32   31713       44799          743               0   
4  EY08JDHTZP   60   20437        9139          633               8   

   NumCreditLines  InterestRate  LoanTerm  DTIRatio    Education  \
0               4         15.23        36      0.44   Bachelor's   
1               1          4.81        60      0.68     Master's   
2               3         21.17        24      0.31     Master's   
3               3          7.07        24      0.23  High School   
4               4          6.51        48      0.73   Bachelor's   

  EmploymentType MaritalStatus HasMortgage HasDependents LoanPurpose  \
0      Full-time      Divorced         Yes           Yes       Other   
1    

## One-Hot Encoding

In [264]:
# Identify categorical columns (excluding numerical ones)
cat_columns = data.select_dtypes(include=['object']).columns.tolist()

# Exclude LoanID from categorical columns before encoding
cat_columns = [col for col in cat_columns if col != "LoanID"]

# Manually add 'Default' as a categorical column
cat_columns.append("Default")

print(f"Categorical columns: {cat_columns}")

Categorical columns: ['Education', 'EmploymentType', 'MaritalStatus', 'HasMortgage', 'HasDependents', 'LoanPurpose', 'HasCoSigner', 'LoanApprovalStatus', 'Default']


In [266]:
# Check if any categorical columns exist before encoding
if not cat_columns:
    print("⚠️ No categorical columns found for encoding!")
else:
    # One-Hot Encoding for categorical features
    encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
    encoded_cat_features = encoder.fit_transform(data[cat_columns])

    # Convert encoded categorical features into a DataFrame
    encoded_df = pd.DataFrame(encoded_cat_features, columns=encoder.get_feature_names_out(cat_columns))

    # Drop original categorical columns and concatenate encoded data
    data = data.drop(columns=cat_columns).reset_index(drop=True)
    data = pd.concat([data, encoded_df], axis=1)

    print(f"✅ Categorical encoding complete! Updated data preview:\n{data.head()}")

✅ Categorical encoding complete! Updated data preview:
       LoanID  Age  Income  LoanAmount  CreditScore  MonthsEmployed  \
0  I38PQUQS96   56   85994       50587          520              80   
1  HPSK72WA7R   69   50432      124440          458              15   
2  C1OZ6DPJ8Y   46   84208      129188          451              26   
3  V2KKSFM3UN   32   31713       44799          743               0   
4  EY08JDHTZP   60   20437        9139          633               8   

   NumCreditLines  InterestRate  LoanTerm  DTIRatio  ...  \
0               4         15.23        36      0.44  ...   
1               1          4.81        60      0.68  ...   
2               3         21.17        24      0.31  ...   
3               3          7.07        24      0.23  ...   
4               4          6.51        48      0.73  ...   

   LoanPurpose_Business  LoanPurpose_Education  LoanPurpose_Home  \
0                   0.0                    0.0               0.0   
1                   0

In [269]:
# Identify numerical columns
num_columns = original_data.select_dtypes(include=['int64', 'float64']).columns.tolist()

# Ensure only existing numerical columns are used for scaling
num_columns = [col for col in num_columns if col in data.columns]

print(f"Numerical Columns before scaling: {num_columns}")



Numerical Columns before scaling: ['Age', 'Income', 'LoanAmount', 'CreditScore', 'MonthsEmployed', 'NumCreditLines', 'InterestRate', 'LoanTerm', 'DTIRatio']


## Splitting Data

In [271]:
from sklearn.model_selection import train_test_split

# 🚀 Step 7: Drop LoanID (if it exists)
if "LoanID" in data.columns:
    data = data.drop(columns=["LoanID"])
    

# 🔹 Extract Target Column
y = data["LoanApprovalStatus_Approved"].astype(int)  # Convert target to numeric
X = data.drop(columns=["LoanApprovalStatus_Approved", "LoanApprovalStatus_Rejected"], errors='ignore')
X = X.drop(columns=["LoanID"], errors='ignore')  # Remove LoanID if it exists

# 🚀 Split data BEFORE applying SMOTE
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)


print(f"✅ Data Split | Train: {len(X_train)}, Val: {len(X_val)}, Test: {len(X_test)}")


✅ Data Split | Train: 178742, Val: 38302, Test: 38303


In [274]:
print(y.value_counts())


LoanApprovalStatus_Approved
0    173492
1     81855
Name: count, dtype: int64


## SMOTE (Synthetic Minority Over-sampling Technique)

In [278]:
from imblearn.over_sampling import SMOTE

# 🚀 Apply SMOTE only to the training data
smote = SMOTE(sampling_strategy=0.7, random_state=42)  # Adjust ratio if needed
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# ✅ Convert back to DataFrame (SMOTE may return NumPy arrays)
X_train_smote = pd.DataFrame(X_train_smote, columns=X_train.columns)
y_train_smote = pd.Series(y_train_smote)  # Ensure target remains a Series


In [279]:
# ✅ Correct Dataset Size Comparison
print(f"Dataset size before SMOTE: {X_train.shape[0]} samples")
print(f"Dataset size after SMOTE: {X_train_smote.shape[0]} samples")
print(f"🔍 New samples added: {X_train_smote.shape[0] - X_train.shape[0]}")

# ✅ Class Distribution Before and After SMOTE
before_smote = pd.DataFrame({
    "Count": y_train.value_counts(),
    "Percentage": y_train.value_counts(normalize=True) * 100
})

after_smote = pd.DataFrame({
    "Count": y_train_smote.value_counts(),
    "Percentage": y_train_smote.value_counts(normalize=True) * 100
})

print("\n\n🔴 Before SMOTE:\n", before_smote)
print("\n✅ After SMOTE:\n", after_smote)


Dataset size before SMOTE: 178742 samples
Dataset size after SMOTE: 206454 samples
🔍 New samples added: 27712


🔴 Before SMOTE:
                               Count  Percentage
LoanApprovalStatus_Approved                    
0                            121444    67.94374
1                             57298    32.05626

✅ After SMOTE:
                               Count  Percentage
LoanApprovalStatus_Approved                    
0                            121444   58.823757
1                             85010   41.176243


## Feature Scaling

In [285]:
from sklearn.preprocessing import StandardScaler

# 🚀 Step 1: Identify Numerical Columns
num_cols = X_train_smote.select_dtypes(include=['int64', 'float64']).columns.tolist()

# 🚀 Step 2: Initialize StandardScaler and Fit on Training Data
scaler = StandardScaler()
scaler.fit(X_train_smote[num_cols])  # ✅ Fit only on training data

# 🚀 Step 3: Create Copies for Scaling
X_train_scaled = X_train_smote.copy()
X_val_scaled = X_val.copy()
X_test_scaled = X_test.copy()

# 🚀 Step 4: Apply Standardization (Only on Numerical Columns)
X_train_scaled[num_cols] = scaler.transform(X_train_smote[num_cols])
X_val_scaled[num_cols] = scaler.transform(X_val[num_cols])
X_test_scaled[num_cols] = scaler.transform(X_test[num_cols])

print("✅ Feature Scaling Applied Successfully!")


✅ Feature Scaling Applied Successfully!


## Train the Model

In [288]:
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import StratifiedKFold, cross_val_score
import numpy as np
import joblib


# 🚀 Check class balance to adjust `scale_pos_weight`
unique, counts = np.unique(y_train, return_counts=True)
scale_pos_weight = counts[0] / counts[1] if counts[1] > 0 else 1  # Prevent division by zero

# 🚀 Optimized Hyperparameters
best_params = {
    "objective": "binary:logistic",
    "eval_metric": "aucpr",
    "random_state": 42,
    "max_depth": 2,
    "learning_rate": 0.1,
    "subsample": 0.7,
    "colsample_bytree": 0.65,
    "reg_lambda": 100,
    "reg_alpha": 50,
    "gamma": 0.5,
    "n_jobs": 4,
    "scale_pos_weight": scale_pos_weight
}

# 🚀 Train XGBClassifier (Manually Implementing Early Stopping)
xgb_model = xgb.XGBClassifier(**best_params, n_estimators=50)  # ✅ No early stopping argument

best_iteration = 0
best_score = float("-inf")
patience = 20  # Stop if no improvement for 20 rounds
wait = 0

for i in range(1, 51):  # Train up to 50 boosting rounds
    xgb_model.n_estimators = i
    xgb_model.fit(X_train_scaled, y_train_smote)  # Train with the updated estimator count
    
    # Validate the model
    y_val_pred = xgb_model.predict(X_val_scaled)
    score = accuracy_score(y_val, y_val_pred)
    
    # Check improvement
    if score > best_score:
        best_score = score
        best_iteration = i
        wait = 0  # Reset patience counter
    else:
        wait += 1  # Increase patience counter
    
    if wait >= patience:
        print(f"✅ Early stopping triggered at {best_iteration} iterations.")
        break  # Stop training

# Set the final model to best iteration
xgb_model.n_estimators = best_iteration
xgb_model.fit(X_train_scaled, y_train_smote)  # Retrain on best iteration

print(f"✅ Model trained with best iteration: {best_iteration}")


✅ Early stopping triggered at 11 iterations.
✅ Model trained with best iteration: 11


## Evaluate the Model

In [291]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import StratifiedKFold, cross_val_score

# 🚀 Make Predictions
y_pred_proba = xgb_model.predict_proba(X_test_scaled)[:, 1]  # Get probability scores
y_pred = (y_pred_proba > 0.5).astype(int)  # Convert to 0/1 labels

# 🚀 Evaluate Model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred_proba)  # AUC based on probabilities

print(f"✅ Model Evaluation on Test Data:")
print(f"   Accuracy: {accuracy:.4f}")
print(f"   Precision: {precision:.4f}")
print(f"   Recall: {recall:.4f}")
print(f"   F1 Score: {f1:.4f}")
print(f"   ROC AUC: {roc_auc:.4f}")

# 🚀 Cross-validation (On Original Training Data, Not SMOTE)
cv = StratifiedKFold(n_splits=7, shuffle=True, random_state=42)
cv_scores = cross_val_score(xgb_model, X_train, y_train, cv=cv, scoring='accuracy')

print(f"⚡ Cross-validation Accuracy: {np.mean(cv_scores):.4f}")


✅ Model Evaluation on Test Data:
   Accuracy: 0.9526
   Precision: 0.8966
   Recall: 0.9634
   F1 Score: 0.9288
   ROC AUC: 0.9913
⚡ Cross-validation Accuracy: 0.9659


## Save & Deploy model

In [294]:
import joblib


# Save the trained model
joblib.dump(xgb_model, "loan_model.pkl")
print("✅ Model trained and saved successfully!")

# Save the scaler
joblib.dump(scaler, "scaler.pkl")
print("✅ Scaler saved successfully!")

# Load the model and scaler for verification
model = joblib.load("loan_model.pkl")
scaler = joblib.load("scaler.pkl")

print(model)  # Should print XGBClassifier with correct parameters
print(scaler)  # Should print StandardScaler() with fitted parameters


✅ Model trained and saved successfully!
✅ Scaler saved successfully!
XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=0.65, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric='aucpr', feature_types=None,
              feature_weights=None, gamma=0.5, grow_policy=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.1, max_bin=None, max_cat_threshold=None,
              max_cat_to_onehot=None, max_delta_step=None, max_depth=2,
              max_leaves=None, min_child_weight=None, missing=nan,
              monotone_constraints=None, multi_strategy=None, n_estimators=11,
              n_jobs=4, num_parallel_tree=None, ...)
StandardScaler()


In [296]:
import pandas as pd

# Get feature names from the training data
feature_names = X_train_smote.columns  # Ensure these match the original dataset

# Convert sample_data to a DataFrame with correct column names
sample_data_df = pd.DataFrame(sample_data, columns=feature_names)

# Transform the sample data
scaled_sample = scaler.transform(sample_data_df)

print("Scaled Sample:", scaled_sample)


Scaled Sample: [[65.24098568 -1.99613171 -1.79664837 -3.72948067 -1.76052988 -2.22871297
  -2.08055019 -2.17402394 -2.15945819 -0.59282102 -0.59352378 -0.58827461
  -0.58849171 -0.5890156  -0.59328474 -0.58918022 -0.59130568 -0.72212275
  -0.72326244 -0.72501753 -1.02617139 -1.02059339 -1.02533348 -1.02102306
  -0.48887748 -0.48027251 -0.49509993 -0.472367   -0.60267452 -1.02247367
  -1.02426523 -2.84149008 -0.36669351]]


In [298]:
print(type(scaler))  # Should print <class 'sklearn.preprocessing._data.StandardScaler'>


<class 'sklearn.preprocessing._data.StandardScaler'>


In [27]:
# ### Training various models
# from sklearn.linear_model import LogisticRegression
# from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
# from sklearn.tree import DecisionTreeClassifier
# from xgboost import XGBClassifier
# from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# # Define models
# models = {
#     "Logistic Regression": LogisticRegression(max_iter=1000),
#     "Decision Tree": DecisionTreeClassifier(max_depth=10, min_samples_split=50, min_samples_leaf=25),
#     "Random Forest": RandomForestClassifier(n_estimators=100, max_depth=10, min_samples_split=50, min_samples_leaf=25),
#     "AdaBoost": AdaBoostClassifier(),
#     "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric="logloss")
# }

# # Train and evaluate models
# results = {}
# for name, model in models.items():
#     print(f"Training {name}...")
#     model.fit(X_train, y_train)
#     y_pred = model.predict(X_test)
    
#     # Compute metrics
#     accuracy = accuracy_score(y_test, y_pred)
#     precision = precision_score(y_test, y_pred)
#     recall = recall_score(y_test, y_pred)
#     f1 = f1_score(y_test, y_pred)

#     results[name] = {
#         "Accuracy": accuracy,
#         "Precision": precision,
#         "Recall": recall,
#         "F1 Score": f1
#     }

# # Print results
# print("\nModel Performance:")
# for model, metrics in results.items():
#     print(f"\n{model}:")
#     for metric, value in metrics.items():
#         print(f"{metric}: {value:.4f}")


In [35]:
print(f"DATATYPES: \n{data.dtypes}\n")

DATATYPES: 
Age                               int64
Income                            int64
LoanAmount                        int64
CreditScore                       int64
MonthsEmployed                    int64
NumCreditLines                    int64
InterestRate                    float64
LoanTerm                          int64
DTIRatio                        float64
Education_Bachelor's            float64
Education_High School           float64
Education_Master's              float64
Education_PhD                   float64
EmploymentType_Full-time        float64
EmploymentType_Part-time        float64
EmploymentType_Self-employed    float64
EmploymentType_Unemployed       float64
MaritalStatus_Divorced          float64
MaritalStatus_Married           float64
MaritalStatus_Single            float64
HasMortgage_No                  float64
HasMortgage_Yes                 float64
HasDependents_No                float64
HasDependents_Yes               float64
LoanPurpose_Auto            

In [37]:
print(X_train.columns)

Index(['Age', 'Income', 'LoanAmount', 'CreditScore', 'MonthsEmployed',
       'NumCreditLines', 'InterestRate', 'LoanTerm', 'DTIRatio',
       'Education_Bachelor's', 'Education_High School', 'Education_Master's',
       'Education_PhD', 'EmploymentType_Full-time', 'EmploymentType_Part-time',
       'EmploymentType_Self-employed', 'EmploymentType_Unemployed',
       'MaritalStatus_Divorced', 'MaritalStatus_Married',
       'MaritalStatus_Single', 'HasMortgage_No', 'HasMortgage_Yes',
       'HasDependents_No', 'HasDependents_Yes', 'LoanPurpose_Auto',
       'LoanPurpose_Business', 'LoanPurpose_Education', 'LoanPurpose_Home',
       'LoanPurpose_Other', 'HasCoSigner_No', 'HasCoSigner_Yes',
       'Default_False', 'Default_True'],
      dtype='object')


In [197]:
# 📌 Import Required Libraries
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# 📌 Load Trained Model
model_filename = "loan_model.pkl"  # Change this if filename differs
with open(model_filename, "rb") as file:
    loan_model = pickle.load(file)

# 📌 Define Original Dataset Columns (Before Encoding)
original_columns = [
    "ApplicantIncome", "LoanAmount", "CreditScore", "DebtToIncomeRatio", "EmploymentYears",
    "Education", "EmploymentType", "MaritalStatus", "HasMortgage", "HasDependents",
    "LoanPurpose", "HasCoSigner", "Default"
]

# 📌 Create Sample Data That Should Be Rejected (Original Column Names)
rejected_loan_sample = pd.DataFrame([{
    "ApplicantIncome": 2000,        # Low income
    "LoanAmount": 300000,              # High loan amount relative to income
    "CreditScore": 20,             # Poor credit score
    "DebtToIncomeRatio": 0.6,       # High debt-to-income ratio
    "EmploymentYears": 0,           # No work experience
    "Education": "High School",     # Low education level
    "EmploymentType": "Unemployed", # No job
    "MaritalStatus": "Single",      # No financial support from spouse
    "HasMortgage": "No",            # No assets as collateral
    "HasDependents": "No",          # No dependents (neutral factor)
    "LoanPurpose": "Business",         # Generic loan purpose
    "HasCoSigner": "No",            # No co-signer to support repayment
    "Default": "True"               # High risk case
}])

print("🚀 Original Sample Loan Application Data (Before Processing):")
display(rejected_loan_sample)

# ----------------- 🔹 DATA PROCESSING 🔹 -----------------
# 📌 Convert Categorical Columns Using One-Hot Encoding
categorical_cols = ["Education", "EmploymentType", "MaritalStatus", "HasMortgage", "HasDependents", "LoanPurpose", "HasCoSigner", "Default"]
encoder = OneHotEncoder(drop="first", sparse_output=False)  # Drop first to avoid dummy variable trap
encoded_features = encoder.fit_transform(rejected_loan_sample[categorical_cols])
encoded_feature_names = encoder.get_feature_names_out(categorical_cols)

# 📌 Convert Encoded Features to DataFrame
encoded_df = pd.DataFrame(encoded_features, columns=encoded_feature_names)

# 📌 Merge Encoded Data with Numeric Features
numeric_cols = ["ApplicantIncome", "LoanAmount", "CreditScore", "DebtToIncomeRatio", "EmploymentYears"]
processed_loan_sample = pd.concat([rejected_loan_sample[numeric_cols].reset_index(drop=True), encoded_df], axis=1)

print("🔹 Processed Sample Data (After Encoding):")
display(processed_loan_sample)

# 📌 Load Feature Names from Training Data (to ensure alignment)
trained_feature_names = [
    'Age', 'Income', 'LoanAmount', 'CreditScore', 'MonthsEmployed', 'NumCreditLines',
    'InterestRate', 'LoanTerm', 'DTIRatio', "Education_Bachelor's", 'Education_High School', 
    "Education_Master's", 'Education_PhD', 'EmploymentType_Full-time', 'EmploymentType_Part-time', 
    'EmploymentType_Self-employed', 'EmploymentType_Unemployed', 'MaritalStatus_Divorced', 
    'MaritalStatus_Married', 'MaritalStatus_Single', 'HasMortgage_No', 'HasMortgage_Yes', 
    'HasDependents_No', 'HasDependents_Yes', 'LoanPurpose_Auto', 'LoanPurpose_Business', 
    'LoanPurpose_Education', 'LoanPurpose_Home', 'LoanPurpose_Other', 'HasCoSigner_No', 
    'HasCoSigner_Yes', 'Default_False', 'Default_True'
]

# 📌 Ensure Column Order Matches Training Data (Fill Missing Columns with 0)
for col in trained_feature_names:
    if col not in processed_loan_sample.columns:
        processed_loan_sample[col] = 0  # Assign 0 to missing columns

# 📌 Reorder Columns to Match Training Data
processed_loan_sample = processed_loan_sample[trained_feature_names]

# ----------------- 🔹 FEATURE SCALING 🔹 -----------------
# 📌 Apply Scaling (Use the same scaler as in training)
scaler = StandardScaler()
processed_loan_sample_scaled = scaler.fit_transform(processed_loan_sample)

print("✅ Feature Scaling Applied!")

# ----------------- 🔹 MODEL PREDICTION 🔹 -----------------
# 📌 Make Prediction
prediction = loan_model.predict(processed_loan_sample_scaled)
loan_status = "Rejected" if prediction[0] == 0 else "Approved"

# 📌 Display Final Prediction
print("\n🔴 Model Prediction: Loan is **{}**".format(loan_status))


🚀 Original Sample Loan Application Data (Before Processing):


Unnamed: 0,ApplicantIncome,LoanAmount,CreditScore,DebtToIncomeRatio,EmploymentYears,Education,EmploymentType,MaritalStatus,HasMortgage,HasDependents,LoanPurpose,HasCoSigner,Default
0,2000,300000,20,0.6,0,High School,Unemployed,Single,No,No,Business,No,True


🔹 Processed Sample Data (After Encoding):


Unnamed: 0,ApplicantIncome,LoanAmount,CreditScore,DebtToIncomeRatio,EmploymentYears
0,2000,300000,20,0.6,0


✅ Feature Scaling Applied!

🔴 Model Prediction: Loan is **Approved**


In [153]:
importances = loan_model.feature_importances_
features = processed_loan_sample.columns
sorted_indices = np.argsort(importances)[::-1]

for idx in sorted_indices:
    print(f"{features[idx]}: {importances[idx]:.4f}")
 # Check if rejected cases are balanced

LoanPurpose_Other: 0.4146
CreditScore: 0.1747
DTIRatio: 0.1411
LoanPurpose_Business: 0.1209
LoanPurpose_Home: 0.0829
LoanPurpose_Auto: 0.0379
LoanPurpose_Education: 0.0251
LoanAmount: 0.0030
LoanTerm: 0.0000
Education_Master's: 0.0000
Education_High School: 0.0000
Education_Bachelor's: 0.0000
Default_True: 0.0000
InterestRate: 0.0000
NumCreditLines: 0.0000
MonthsEmployed: 0.0000
EmploymentType_Full-time: 0.0000
Income: 0.0000
Education_PhD: 0.0000
EmploymentType_Unemployed: 0.0000
EmploymentType_Part-time: 0.0000
EmploymentType_Self-employed: 0.0000
Default_False: 0.0000
MaritalStatus_Divorced: 0.0000
MaritalStatus_Married: 0.0000
MaritalStatus_Single: 0.0000
HasMortgage_No: 0.0000
HasMortgage_Yes: 0.0000
HasDependents_No: 0.0000
HasDependents_Yes: 0.0000
HasCoSigner_No: 0.0000
HasCoSigner_Yes: 0.0000
Age: 0.0000


In [155]:
print(processed_loan_sample_scaled)

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]]


In [157]:
test_case = X_train[y_train == 0].sample(1)  # Pick a known rejected loan
prediction = loan_model.predict(test_case)
print("Expected: Rejected | Predicted:", "Rejected" if prediction[0] == 0 else "Approved")


Expected: Rejected | Predicted: Approved


In [161]:
probability = loan_model.predict_proba(processed_loan_sample_scaled)[0][1]  # Get probability of approval

threshold = 0.9  # Adjust based on data
loan_status = "Rejected" if probability < threshold else "Approved"

print(f"Approval Probability: {probability:.4f}")
print(f"Final Decision: {loan_status}")


Approval Probability: 0.8267
Final Decision: Rejected
