In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import LabelEncoder

# Load datasets
train_df = pd.read_csv("../data/processed/train_set_SMOTEd.csv")
test_df = pd.read_csv("../data/processed/test_set.csv")

### RF Model

In [2]:
# Define feature columns (all except 'id' and 'credit_status')
feature_columns = [col for col in train_df.columns if col not in ["credit_status", "id"]]

# Split features and target
X_train = train_df[feature_columns]
y_train = LabelEncoder().fit_transform(train_df["credit_status"])
X_test = test_df[feature_columns]
y_test = LabelEncoder().fit_transform(test_df["credit_status"])

# Building the Random Forest model
rf = RandomForestClassifier(random_state=42, class_weight='balanced')
rf.fit(X_train, y_train);

### Evaluation

In [3]:
# Predictions and Evaluation
y_pred = rf.predict(X_test)
# Attach predictions to customer IDs
results_df = test_df[["id"]].copy()
results_df["predicted_credit_status"] = y_pred

# Display results
print(results_df.head())

# Evaluate performance
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

        id  predicted_credit_status
0  5052720                        1
1  5087861                        1
2  5068206                        1
3  5137255                        1
4  5023163                        1
Accuracy: 0.8769884805266045

Confusion Matrix:
 [[ 225  416]
 [ 481 6170]]

Classification Report:
               precision    recall  f1-score   support

           0       0.32      0.35      0.33       641
           1       0.94      0.93      0.93      6651

    accuracy                           0.88      7292
   macro avg       0.63      0.64      0.63      7292
weighted avg       0.88      0.88      0.88      7292

