-----------
Machine Learning Model (Random Forest Classifier)
----------

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [2]:
def machine_learning_credit_risk(features, labels, test_size=0.2):
    """
    Trains a Random Forest Classifier to predict default.

    Args:
        features: input data
        labels: outcomes of default (1=default, 0 = no default)
        test_size: The size of data used to test the model.

    Returns:
        The accuracy of the model using the test data.
    """
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=test_size, random_state=42)
    # Train a Random Forest classifier
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    # Make predictions on the test set
    y_pred = model.predict(X_test)
    # Calculate the accuracy of the model
    accuracy = accuracy_score(y_test, y_pred)

    return accuracy, model # Also return model so that it can be used to predict values later

In [3]:
# Example Usage (using dummy data)
np.random.seed(42)
n_obs = 100
df = pd.DataFrame({
    'credit_score': np.random.randint(300, 800, n_obs),
    'debt_ratio': np.random.uniform(0, 1, n_obs),
    'interest_rate': np.random.uniform(0.02, 0.1, n_obs),
    'default_status': np.random.randint(0, 2, n_obs)
})

In [10]:
print(df)

    credit_score  debt_ratio  interest_rate  default_status
0            402    0.894827       0.059156               1
1            735    0.597900       0.098852               0
2            648    0.921874       0.039364               1
3            570    0.088493       0.073771               1
4            406    0.195983       0.080930               0
..           ...         ...            ...             ...
95           701    0.051479       0.097090               1
96           517    0.278646       0.088241               1
97           343    0.908266       0.043556               1
98           461    0.239562       0.050808               0
99           501    0.144895       0.088091               0

[100 rows x 4 columns]


In [4]:
features = df[['credit_score', 'debt_ratio', 'interest_rate']].values
labels = df['default_status'].values

In [5]:
print(features)

[[4.02000000e+02 8.94827350e-01 5.91562208e-02]
 [7.35000000e+02 5.97899979e-01 9.88520363e-02]
 [6.48000000e+02 9.21874235e-01 3.93644217e-02]
 [5.70000000e+02 8.84925021e-02 7.37708438e-02]
 [4.06000000e+02 1.95982862e-01 8.09295692e-02]
 [3.71000000e+02 4.52272889e-02 3.90110035e-02]
 [4.88000000e+02 3.25330331e-01 7.82573079e-02]
 [3.20000000e+02 3.88677290e-01 4.94226506e-02]
 [4.02000000e+02 2.71349032e-01 7.05844664e-02]
 [4.21000000e+02 8.28737509e-01 7.06823769e-02]
 [7.66000000e+02 3.56753327e-01 6.28619747e-02]
 [5.14000000e+02 2.80934510e-01 2.72231816e-02]
 [6.30000000e+02 5.42696083e-01 8.68241996e-02]
 [7.58000000e+02 1.40924225e-01 4.56624052e-02]
 [3.87000000e+02 8.02196981e-01 3.49214808e-02]
 [6.72000000e+02 7.45506437e-02 2.32620113e-02]
 [3.99000000e+02 9.86886937e-01 6.72714355e-02]
 [6.59000000e+02 7.72244769e-01 7.42051489e-02]
 [4.51000000e+02 1.98715682e-01 2.13270263e-02]
 [4.30000000e+02 5.52211712e-03 6.09674447e-02]
 [4.49000000e+02 8.15461428e-01 3.811966

In [6]:
print(labels)

[1 0 1 1 0 1 0 1 1 0 1 1 1 1 1 0 1 0 1 0 0 1 1 1 0 1 0 0 0 1 1 0 0 1 0 1 0
 1 0 1 0 1 1 0 0 1 0 1 0 1 1 1 1 1 1 0 0 0 1 1 1 1 0 0 1 0 0 1 0 1 0 1 1 1
 1 0 1 1 1 0 1 0 1 0 0 1 0 1 1 1 0 0 1 1 0 1 1 1 0 0]


In [7]:
# Run machine learning model
accuracy, model = machine_learning_credit_risk(features, labels)
print(f"Machine Learning Model Accuracy: {accuracy:.4f}")

Machine Learning Model Accuracy: 0.6500


In [8]:
# Predicting new observations:
new_obs = np.array([[650, 0.3, 0.05], [400, 0.7, 0.09]])
predicted_defaults = model.predict(new_obs)
print(f"Machine Learning Model Predicted Defaults: {predicted_defaults}")

Machine Learning Model Predicted Defaults: [1 0]
