# MODEL 1 : 

In [12]:
import pandas as pd
import joblib

# Load datasets
df_processed = pd.read_csv("df_final_features_v2.csv")   # encoded features
df_raw = pd.read_csv("original_dataset.csv")             # includes loan_id
model = joblib.load("best_model_model1.pkl")

# ‚úÖ Merge safely using a reliable key
# (Assuming both have equal rows & matching order, but to be 100% safe)
if 'loan_id' not in df_processed.columns and 'loan_id' in df_raw.columns:
    df_processed = pd.concat([df_raw[['loan_id']].reset_index(drop=True),
                              df_processed.reset_index(drop=True)], axis=1)
else:
    # If both have loan_id, merge to ensure clean alignment
    df_processed = pd.merge(df_raw[['loan_id']], df_processed, left_index=True, right_index=True, how='inner')

# Verify merge worked
print("‚úÖ Data merged successfully.")
print("Shape after merge:", df_processed.shape)
print("Sample rows:\n", df_processed[['loan_id']].head())

# Extract trained features
trained_features = model.feature_names_in_


‚úÖ Data merged successfully.
Shape after merge: (19999, 97)
Sample rows:
    loan_id
0        1
1        2
2        3
3        4
4        5


In [15]:
# df_processed.columns.to_list()

In [23]:

# ================================
# Step 3: Prediction function
# ================================
def predict_loan_status(loan_id_input):
    """
    Predict loan status for a given loan_id using merged dataset.
    """
    global df_processed, model, trained_features

    match_row = df_processed[df_processed['loan_id'] == loan_id_input]

    if match_row.empty:
        print(f"‚ùå Loan ID {loan_id_input} not found.")
        return

    X_pred = match_row[trained_features].copy()
    X_pred = X_pred.apply(lambda col: pd.to_numeric(col, errors='coerce')).fillna(0)

    y_pred = model.predict(X_pred)[0]
    try:
        y_prob = model.predict_proba(X_pred)[0][0]
    except:
        y_prob = None

    label_map = {0: "approved ‚úÖ", 1: "rejected ‚ùå"}
    label = label_map.get(y_pred, "unknown")

    print(f"\nüîç Prediction for Loan ID {loan_id_input}:")
    print(f"   ‚û§ Status: {label}")
    if y_prob is not None:
        print(f"   ‚û§ Probability (approval): {y_prob:.2%}")
    print("-" * 40)


# ================================
# Step 4: Interactive input loop
# ================================
if __name__ == "__main__":
    print("\nüîÆ Loan Approval Prediction System")
    print("Type 'exit' anytime to quit.")
    print("----------------------------------")

    while True:
        user_input = input("Enter Loan ID to predict: ").strip()
        if user_input.lower() == "exit":
            print("üëã Exiting prediction system.")
            break
        if not user_input.isdigit():
            print("‚ö†Ô∏è Please enter a valid numeric Loan ID.\n")
            continue

        loan_id_input = int(user_input)
        predict_loan_status(loan_id_input)


üîÆ Loan Approval Prediction System
Type 'exit' anytime to quit.
----------------------------------


Enter Loan ID to predict:  9042272



üîç Prediction for Loan ID 9042272:
   ‚û§ Status: approved ‚úÖ
   ‚û§ Probability (approval): 93.00%
----------------------------------------


Enter Loan ID to predict:  12682421



üîç Prediction for Loan ID 12682421:
   ‚û§ Status: rejected ‚ùå
   ‚û§ Probability (approval): 12.00%
----------------------------------------


Enter Loan ID to predict:  5049994



üîç Prediction for Loan ID 5049994:
   ‚û§ Status: rejected ‚ùå
   ‚û§ Probability (approval): 17.00%
----------------------------------------


Enter Loan ID to predict:  9214105



üîç Prediction for Loan ID 9214105:
   ‚û§ Status: rejected ‚ùå
   ‚û§ Probability (approval): 14.00%
----------------------------------------


Enter Loan ID to predict:  1710



üîç Prediction for Loan ID 1710:
   ‚û§ Status: approved ‚úÖ
   ‚û§ Probability (approval): 91.00%
----------------------------------------


Enter Loan ID to predict:  1351



üîç Prediction for Loan ID 1351:
   ‚û§ Status: rejected ‚ùå
   ‚û§ Probability (approval): 15.00%
----------------------------------------


Enter Loan ID to predict:  1



üîç Prediction for Loan ID 1:
   ‚û§ Status: approved ‚úÖ
   ‚û§ Probability (approval): 90.00%
----------------------------------------


Enter Loan ID to predict:  exit


üëã Exiting prediction system.


# MODEL 2 : 

In [None]:
import pandas as pd

df_raw = pd.read_csv("original_dataset.csv")
df_model2 = pd.read_csv("df_model2_features.csv")

# Step: Filter approved loans by string
approved_loan_ids = df_raw[df_raw['loan_status'] == 'approved'][['loan_id']].reset_index(drop=True)

print("Filtered approved count:", approved_loan_ids.shape)
print("Model2 features count:", df_model2.shape)

# Only proceed if counts match
if approved_loan_ids.shape[0] == df_model2.shape[0]:
    df_model2.insert(0, 'loan_id', approved_loan_ids.values)
    df_model2.to_csv("df_model2_features_with_id.csv", index=False)
    print("‚úÖ loan_id merged successfully.")
else:
    print("‚ùå Still mismatch.")
    # Show a sample
    print("Approved IDs head:\n", approved_loan_ids.head())
    print("df_model2 head:\n", df_model2.head())

Filtered approved count: (12559, 1)
Model2 features count: (12559, 23)
‚úÖ loan_id merged successfully.


In [None]:
import pandas as pd
import numpy as np
import joblib

# -----------------------------
# Load datasets and models
# -----------------------------

# Model 1 dataset & model
df_processed = pd.read_csv("df_final_features_v2.csv")   # Model 1 features (encoded)
df_raw = pd.read_csv("original_dataset.csv")             # raw dataset with loan_id

model1 = joblib.load("best_model_model1.pkl")

# Model 2 dataset & model
df_model2 = pd.read_csv("df_model2_features_with_id.csv")
model2 = joblib.load("model2_best_regressor.pkl")
features_model2 = joblib.load("model2_feature_names.pkl")  # list of Model 2 feature names

# -----------------------------
# Merge Model 1 dataset with raw (to get loan_id)
# -----------------------------

if 'loan_id' not in df_processed.columns and 'loan_id' in df_raw.columns:
    df_processed = pd.concat([
        df_raw[['loan_id']].reset_index(drop=True),
        df_processed.reset_index(drop=True)
    ], axis=1)
else:
    df_processed = pd.merge(df_raw[['loan_id']], df_processed, left_index=True, right_index=True, how='inner')

print("‚úÖ Data merged successfully.")
print("Data shape:", df_processed.shape)

‚úÖ Data merged successfully.
Data shape: (19999, 97)


In [None]:
# -----------------------------
# Prediction function combining Model 1 and Model 2
# -----------------------------

def predict_loan(loan_id_input):
    """
    Predict loan approval and, if approved, predict loan amount.
    """
    global df_processed, df_model2, model1, model2, features_model2

    # Find row in Model 1 dataset
    row = df_processed[df_processed['loan_id'] == loan_id_input]

    if row.empty:
        print(f"‚ùå Loan ID {loan_id_input} not found.")
        return

    # Prepare features for Model 1 prediction
    X_model1 = row[model1.feature_names_in_].apply(pd.to_numeric, errors='coerce').fillna(0)

    # Predict approval (0=approved, 1=rejected)
    pred_approval = model1.predict(X_model1)[0]

    # Try to get approval probability (if available)
    try:
        prob_approval = model1.predict_proba(X_model1)[0][0]
    except:
        prob_approval = None

    label_map = {0: "approved ‚úÖ", 1: "rejected ‚ùå"}
    status_label = label_map.get(pred_approval, "unknown")

    print(f"\nüîç Loan ID {loan_id_input}:")
    print(f"   ‚û§ Status: {status_label}")
    if prob_approval is not None:
        print(f"   ‚û§ (Approval) probability: {prob_approval:.2%}")

    # If approved, predict loan amount using Model 2 features
    if pred_approval == 0:
        row_model2 = df_model2[df_model2['loan_id'] == loan_id_input]

        if row_model2.empty:
            print("‚ùå Loan ID not found in Model 2 dataset.")
            print("   ‚û§ Loan amount prediction: Not available")
            return

        X_model2 = row_model2[features_model2].apply(pd.to_numeric, errors='coerce').fillna(0)
        loan_amount_log_pred = model2.predict(X_model2)[0]
        loan_amount_pred = np.expm1(loan_amount_log_pred)  # inverse log transform

        print(f"   ‚û§ Predicted loan amount: ‚Çπ{loan_amount_pred:,.2f}")

    else:
        print("   ‚û§ Loan amount prediction: Not applicable (loan rejected)")

# -----------------------------
# Interactive loop
# -----------------------------

if __name__ == "__main__":
    print("\nüîÆ Combined Loan Prediction System")
    print("Type 'exit' to quit.")
    print("----------------------------------------")

    while True:
        user_input = input("Enter Loan ID to predict: ").strip()
        if user_input.lower() == "exit":
            print("üëã Exiting prediction system.")
            break
        if not user_input.isdigit():
            print("‚ö†Ô∏è Please enter a valid numeric Loan ID.\n")
            continue

        loan_id = int(user_input)
        predict_loan(loan_id)


üîÆ Combined Loan Prediction System
Type 'exit' to quit.
----------------------------------------


Enter Loan ID to predict:  1



üîç Loan ID 1:
   ‚û§ Status: approved ‚úÖ
   ‚û§ (Approval) probability: 90.00%
   ‚û§ Predicted loan amount: ‚Çπ28,668,154.66


Enter Loan ID to predict:  6



üîç Loan ID 6:
   ‚û§ Status: rejected ‚ùå
   ‚û§ (Approval) probability: 0.00%
   ‚û§ Loan amount prediction: Not applicable (loan rejected)


Enter Loan ID to predict:  75



üîç Loan ID 75:
   ‚û§ Status: rejected ‚ùå
   ‚û§ (Approval) probability: 18.00%
   ‚û§ Loan amount prediction: Not applicable (loan rejected)


Enter Loan ID to predict:  118



üîç Loan ID 118:
   ‚û§ Status: approved ‚úÖ
   ‚û§ (Approval) probability: 99.00%
   ‚û§ Predicted loan amount: ‚Çπ1,639,152.37


Enter Loan ID to predict:  exit


üëã Exiting prediction system.
