In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.metrics import accuracy_score, classification_report

# features & labels

In [None]:
X = df.drop(['essay', 'cleaned_essay', 'grade'], axis=1)  # Drop unnecessary columns
y = df['grade']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
rf = RandomForestClassifier(random_state=42)


# parameter grid for Randomized Search

In [None]:
param_dist = {
    'n_estimators': [100, 200, 500, 1000],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2']
}

In [None]:
random_search = RandomizedSearchCV(rf, param_distributions=param_dist, n_iter=50, cv=5, random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train)

# best model

In [None]:
best_rf = random_search.best_estimator_


# Predict on test data using the best model


In [None]:
y_pred = best_rf.predict(X_test)


In [None]:
print("Best Parameters:", random_search.best_params_)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))