In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE, RandomOverSampler

# Load data
df = pd.read_csv("dataset.csv")
X = df.drop(columns=["Unnamed: 0", "label"])
y = df["label"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Meta-learning to choose oversampling strategy
# For example, using a decision-making criterion like class distribution

if y_train.value_counts()[1] < 10:
    oversampler = SMOTE()
else:
    oversampler = RandomOverSampler()

# Apply oversampling
X_res, y_res = oversampler.fit_resample(X_train, y_train)

# Train Random Forest Classifier
rf = RandomForestClassifier()
rf.fit(X_res, y_res)

# Predictions
y_pred = rf.predict(X_test)

# Evaluation
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.99      0.99      0.99     67005
           1       0.99      1.00      1.00       461
           2       1.00      1.00      1.00     11189
           3       0.90      0.92      0.91      6870

    accuracy                           0.99     85525
   macro avg       0.97      0.98      0.97     85525
weighted avg       0.99      0.99      0.99     85525

