In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification


X, y = make_classification(
    n_samples=10000,
    n_features=20,
    n_informative=2,
    n_redundant=10,
    n_clusters_per_class=1,
    weights=[0.9, 0.1],  # 90% of class 0, 10% of class 1
    flip_y=0,
    random_state=42,
)

# Convert to DataFrame
df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])])
df["target"] = y

In [2]:
df.head()

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,...,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,feature_18,feature_19,target
0,0.477908,-0.9131,-0.142257,-0.888393,0.101232,0.557124,0.603857,0.392627,-0.068788,-0.421507,...,-0.151552,0.489513,-0.584593,-0.225217,0.715035,-0.084127,0.041213,0.194508,-0.142022,0
1,-0.307475,-0.097225,0.93632,0.642466,1.165698,-0.399368,-0.304983,-0.322777,0.028461,0.188398,...,0.213513,-0.299423,0.474325,-0.138453,-0.783926,0.101268,-0.066895,-0.021799,0.481598,0
2,1.047926,1.403379,-0.48894,-1.913669,-0.082735,1.2018,1.364556,2.394906,-0.158484,-0.964356,...,-0.276121,1.080888,-1.234284,-1.635954,0.075388,-0.161633,0.07081,0.476562,0.863912,0
3,-0.249407,-0.431734,1.639818,0.195003,1.070736,-0.135673,-0.631617,0.082602,0.095749,0.533669,...,-0.36047,-0.314264,-0.067048,0.000762,0.578895,-0.134721,0.131493,-0.49308,0.648938,0
4,0.049466,0.530635,-2.463684,-0.006239,0.735609,0.008183,0.163489,1.181896,-0.026218,-0.143726,...,0.124572,0.06943,0.058234,-0.280637,0.215532,0.04829,-0.044555,0.145079,-0.18269,0


In [4]:
from imblearn.ensemble import BalancedRandomForestClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42
)

In [6]:
rf = RandomForestClassifier(n_estimators=300)
balanced_rf = BalancedRandomForestClassifier(n_estimators=300)

In [7]:
rf_model = rf.fit(X_train, y_train)
balanced_rf_model = balanced_rf.fit(X_train, y_train)

In [9]:
pred = rf.predict(X_test)
balanced_pred = balanced_rf.predict(X_test)

In [11]:
from sklearn.metrics import classification_report

In [12]:
print(classification_report(pred, y_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2992
           1       0.96      1.00      0.98       308

    accuracy                           1.00      3300
   macro avg       0.98      1.00      0.99      3300
weighted avg       1.00      1.00      1.00      3300



In [13]:
print(classification_report(balanced_pred, y_test))

              precision    recall  f1-score   support

           0       0.99      1.00      1.00      2969
           1       0.97      0.94      0.96       331

    accuracy                           0.99      3300
   macro avg       0.98      0.97      0.98      3300
weighted avg       0.99      0.99      0.99      3300

