In [2]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

# Load data
X_train = np.load('/kaggle/input/classifier-data/X_train.npy')
y_train_raw = np.load('/kaggle/input/classifier-data/y_train.npy')
X_test = np.load('/kaggle/input/classifier-data/X_test.npy')
y_test_raw = np.load('/kaggle/input/classifier-data/y_test.npy')

# Convert y to binary: MI (1) if any label is 1, else non-MI (0)
y_train = (y_train_raw.sum(axis=1) > 0).astype(int)
y_test = (y_test_raw.sum(axis=1) > 0).astype(int)

# (Optional but recommended) Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'class_weight': [{0: 1, 1: w} for w in [2, 5, 10]]
}

# Run Grid Search
grid = GridSearchCV(
    RandomForestClassifier(random_state=42),
    param_grid,
    cv=5,
    scoring='f1',
    n_jobs=-1,
    verbose=2
)
grid.fit(X_train_scaled, y_train)

# Get best model
best_rf = grid.best_estimator_

from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix

y_pred = best_rf.predict(X_test_scaled)

print("Best Parameters:", grid.best_params_)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


Fitting 5 folds for each of 324 candidates, totalling 1620 fits
[CV] END class_weight={0: 1, 1: 2}, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END class_weight={0: 1, 1: 2}, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.3s
[CV] END class_weight={0: 1, 1: 2}, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   0.6s
[CV] END class_weight={0: 1, 1: 2}, max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=300; total time=   0.9s
[CV] END class_weight={0: 1, 1: 2}, max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=100; total time=   0.3s
[CV] END class_weight={0: 1, 1: 2}, max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=100; total time=   0.3s
[CV] END class_weight={0: 1, 1: 2}, max_depth=None, min_samples_leaf=1, min_samples_split=5, n_estimators=200; total time=   0.6s
[CV] END class_weight={0: 