In [None]:
import numpy as np
import pandas as pd
np.random.seed(42)

from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.datasets import load_wine

In [None]:
dataset = load_wine()
x = dataset.data
y = dataset.target
print(f"target names: {dataset.target_names}")

In [None]:
df = pd.DataFrame(data=x, columns=dataset.feature_names)
df["label"] = dataset.target_names[y]
df.head(20)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
print(f"x_train.shape = {x_train.shape}, x_test.shape = {x_test.shape}")

In [None]:
criterion = "gini"
rf = RandomForestClassifier(criterion=criterion)

cv = GridSearchCV(rf, param_grid={
    "max_depth": np.arange(4, 9, dtype=int),
    "max_features": ["sqrt", "log2"],
    "n_estimators": np.arange(100, 200, step=25, dtype=int),
    "min_samples_split": np.arange(2, 5, dtype=int),
}, cv=10, n_jobs=-1)

cv.fit(x_train, y_train)
n_estimators = cv.best_params_["n_estimators"]
max_depth = cv.best_params_["max_depth"]
max_features = cv.best_params_["max_features"]
min_samples_split = cv.best_params_["min_samples_split"]
best_rf = RandomForestClassifier(
    n_estimators=n_estimators,
    max_depth=max_depth,
    max_features=max_features,
    min_samples_split=min_samples_split,
    criterion=criterion,
)
best_rf.fit(x_train, y_train)

KeyboardInterrupt: 

In [None]:
score = best_rf.score(x_test, y_test)
print(f"accuracy: {score*100:.4f}%")