In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

df = pd.read_csv('7.csv')

X = df.drop('species', axis=1)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['species'])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 10, 20],
    'max_features': ['sqrt', 'log2'],
    'min_samples_split': [2, 5],
}

rf = RandomForestClassifier(random_state=42)

grid_search = GridSearchCV(
    estimator=rf,
    param_grid=param_grid,
    scoring='f1_weighted',
    cv=3,
    n_jobs=-1,
    verbose=1
)

grid_search.fit(X_train, y_train)

best_rf = grid_search.best_estimator_
y_pred = best_rf.predict(X_test)

print("\n Best Hyperparameters:", grid_search.best_params_)
print(f"\n Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Fitting 3 folds for each of 36 candidates, totalling 108 fits

 Best Hyperparameters: {'max_depth': 10, 'max_features': 'sqrt', 'min_samples_split': 5, 'n_estimators': 100}

 Accuracy: 92.89%

Classification Report:
                        precision    recall  f1-score   support

    Anabas testudineus       1.00      1.00      1.00       160
     Coilia dussumieri       1.00      1.00      1.00       125
Otolithoides biauritus       0.66      0.79      0.72       141
     Otolithoides pama       1.00      1.00      1.00       129
     Pethia conchonius       1.00      1.00      1.00       132
  Polynemus paradiseus       1.00      1.00      1.00       159
  Puntius lateristriga       1.00      1.00      1.00       105
        Setipinna taty       0.75      0.61      0.67       146
 Sillaginopsis panijus       1.00      1.00      1.00       127

              accuracy                           0.93      1224
             macro avg       0.93      0.93      0.93      1224
          weig