In [63]:
#The objective of this study is to perform real credit risk prediction using customer behavior data

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score


In [3]:
df = pd.read_csv(r'C:\Users\Swetha\Downloads\default of credit card clients.csv')


In [4]:
#Separate features (X) and target (y)
X = df.drop("default payment next month", axis=1)
y = df["default payment next month"]


In [5]:
#Split data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [6]:
#Scale the data

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [7]:
#Build and train the model
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train_scaled, y_train)

In [8]:
#Random Forest


rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)

In [20]:
#Predict credit card default
y_pred_lr = lr.predict(X_test_scaled)
y_pred_rf = rf.predict(X_test)


In [10]:
#Evaluate the model


def evaluate(y_test, y_pred, model_name):
    print(model_name)
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Precision:", precision_score(y_test, y_pred))
    print("Recall:", recall_score(y_test, y_pred))
    print("F1 Score:", f1_score(y_test, y_pred))
    print("-"*30)

evaluate(y_test, y_pred_lr, "Logistic Regression")
evaluate(y_test, y_pred_rf, "Random Forest")



Logistic Regression
Accuracy: 0.8098333333333333
Precision: 0.6928251121076233
Recall: 0.23533891850723535
F1 Score: 0.35133598635588403
------------------------------
Random Forest
Accuracy: 0.8136666666666666
Precision: 0.6312247644683715
Recall: 0.3571972581873572
F1 Score: 0.45622568093385213
------------------------------


In [11]:
y_prob_lr = lr.predict_proba(X_test_scaled)[:,1]
y_prob_rf = rf.predict_proba(X_test)[:,1]

print("ROC-AUC Logistic Regression:", roc_auc_score(y_test, y_prob_lr))
print("ROC-AUC Random Forest:", roc_auc_score(y_test, y_prob_rf))


ROC-AUC Logistic Regression: 0.7270556485659562
ROC-AUC Random Forest: 0.7602945939011357


In [12]:
#by using random forest, The model can predict credit risk using customer behavior data, but it is better at identifying safe customers than risky ones.

In [13]:
# Predict default for test data
#Each value is the model’s prediction----0 = safe customer----1 = risky customer
y_pred = rf.predict(X_test)
y_pred[:10]


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [21]:
X = df.drop(["ID", "default payment next month"], axis=1)
y = df["default payment next month"]


In [22]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train Random Forest model
rf = RandomForestClassifier(
    n_estimators=100,
    random_state=42
)
rf.fit(X_train, y_train)

In [23]:
# Predict default for test data
# 0 = safe customer, 1 = risky customer
y_pred = rf.predict(X_test)

# Show first 10 predictions
y_pred[:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [27]:
#Predict for ONE new customer (real prediction)
new_customer = [[200000, 2, 2, 1, 35,
                 0, 0, 0, 0, 0, 0,
                 5000, 4800, 4700, 4600, 4500, 4400,
                 2000, 2000, 2000, 2000, 2000, 2000]]

# Predict
prediction = rf.predict(new_customer)

prediction




array([0])

In [28]:
prediction = rf.predict(new_customer)
if prediction[0] == 1:
    print("Customer is predicted to be HIGH CREDIT RISK (may default).")
else:
    print("Customer is predicted to be LOW CREDIT RISK (likely to pay).")


Customer is predicted to be LOW CREDIT RISK (likely to pay).




In [26]:
#FINAL INFERENCE:
#The model shows that the customer has low credit risk, which means the customer is likely to pay the credit card bill on time.