# Balanced Random Forest Classifier

In [None]:
# Initial imports.

# Initial imports.

import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced

In [None]:
# Loading data

file_path = Path("C:/Users/emili/Class/Patient_Survival_Prediction/Resources/database_erd.csv")
df_psp = pd.read_csv(file_path)
df_psp.head()

In [None]:
# Create our features


# Convert string columns to numbers and drop 'loan_status' column then assign to X
X = pd.get_dummies(df, columns=['ethnicity', 'gender', 'hospital_admit_source', 'icu_admit_source', 
                'icu_stay_type', 'icu_type', 'apache_3j_bodysystem', 'apache_2_bodysystem']).drop('hospital_death', axis=1)

# Create our target
y = df['hospital_death']
X.head()


In [None]:
X.describe()

In [None]:
# Check the balance of our target values
y.value_counts()

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

In [None]:
# Check balances
print(Counter(y_train))
print(Counter(y_test))

In [None]:
# Resample the training data with the BalancedRandomForestClassifier
from imblearn.ensemble import BalancedRandomForestClassifier

# Instantiate
brfc = BalancedRandomForestClassifier(n_estimators=100, random_state=1)

# Fit
brfc.fit(X_train, y_train)

In [None]:
# Calculated the balanced accuracy score
y_pred = brfc.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

In [None]:
# Display the confusion matrix
# Calculating the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Create a DataFrame from the confusion matrix.


In [None]:
# Print the imbalanced classification report
print(classification_report_imbalanced(y_test, y_pred))

In [None]:
# List the features sorted in descending order by feature importance
features_rank = sorted(zip(brfc.feature_importances_, X.columns), reverse=True)
for feature in features_rank:
    print(f"{feature[1]}: ({feature[0]})")

In [None]:
# If needed to we can run an Easy Ensemble Classifier as well or other types of models to compare to the RFC