In [1]:
# ===============================
# 📌 1) Import Libraries
# ===============================
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score


In [3]:
# ===============================
# 📌 2) Load the Loan Data
# ===============================
df = pd.read_csv(r'C:\Users\Lenovo\Desktop\jpmorgan\Task 3 and 4_Loan_Data.csv')

print(df.head())
print(df.info())


   customer_id  credit_lines_outstanding  loan_amt_outstanding  \
0      8153374                         0           5221.545193   
1      7442532                         5           1958.928726   
2      2256073                         0           3363.009259   
3      4885975                         0           4766.648001   
4      4700614                         1           1345.827718   

   total_debt_outstanding       income  years_employed  fico_score  default  
0             3915.471226  78039.38546               5         605        0  
1             8228.752520  26648.43525               2         572        1  
2             2027.830850  65866.71246               4         602        0  
3             2501.730397  74356.88347               5         612        0  
4             1768.826187  23448.32631               6         631        0  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 8 columns):
 #   Column                 

In [5]:
# ===============================
# 📌 3) Prepare Features & Target
# ===============================
X = df.drop(['customer_id', 'default'], axis=1)
y = df['default']


In [7]:
# ===============================
# 📌 4) Split Data
# ===============================
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [9]:
# ===============================
# 📌 5) Train Model
# ===============================
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9945

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      1652
           1       0.99      0.98      0.98       348

    accuracy                           0.99      2000
   macro avg       0.99      0.99      0.99      2000
weighted avg       0.99      0.99      0.99      2000



In [11]:
# ===============================
# 📌 6) Function to Estimate PD & Expected Loss
# ===============================
def estimate_default_risk(loan_details):
    """
    loan_details: dict with feature names & values
    """
    df_input = pd.DataFrame([loan_details])
    prob_default = model.predict_proba(df_input)[0][1]
    exposure = loan_details['loan_amt_outstanding']
    recovery_rate = 0.10  # given
    expected_loss = prob_default * exposure * (1 - recovery_rate)
    return {
        'Probability of Default': round(prob_default, 4),
        'Expected Loss': round(expected_loss, 2)
    }


In [17]:
print("Model expects:", list(X.columns))
print("Input keys:", list(new_loan.keys()))


Model expects: ['credit_lines_outstanding', 'loan_amt_outstanding', 'total_debt_outstanding', 'income', 'years_employed', 'fico_score']
Input keys: ['customer_credit_lines_outstanding', 'loan_amt_outstanding', 'total_debt_outstanding', 'income', 'years_employed', 'fico_score']


In [19]:
def estimate_default_risk(loan_details):
    """
    loan_details: dict with feature names & values
    """
    # Use same columns and order!
    df_input = pd.DataFrame([loan_details], columns=X.columns)

    prob_default = model.predict_proba(df_input)[0][1]
    exposure = loan_details['loan_amt_outstanding']
    recovery_rate = 0.10
    expected_loss = prob_default * exposure * (1 - recovery_rate)

    return {
        'Probability of Default': round(prob_default, 4),
        'Expected Loss': round(expected_loss, 2)
    }


In [21]:
def estimate_default_risk(loan_details):
    """
    loan_details: dict with feature names & values
    """
    # Use same columns and order!
    df_input = pd.DataFrame([loan_details], columns=X.columns)

    prob_default = model.predict_proba(df_input)[0][1]
    exposure = loan_details['loan_amt_outstanding']
    recovery_rate = 0.10
    expected_loss = prob_default * exposure * (1 - recovery_rate)

    return {
        'Probability of Default': round(prob_default, 4),
        'Expected Loss': round(expected_loss, 2)
    }


In [23]:
new_loan = {
    'credit_lines_outstanding': 2,  # ✅ match exactly!
    'loan_amt_outstanding': 5000,
    'total_debt_outstanding': 25000,
    'income': 75000,
    'years_employed': 5,
    'fico_score': 650
}

result = estimate_default_risk(new_loan)
print(result)


{'Probability of Default': 0.14, 'Expected Loss': 630.0}
