#### Dataset Preparation

In [102]:
import numpy as np


np.random.seed(42)

In [103]:
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

In [104]:
dataset = load_wine()
x = dataset.data
y = dataset.target

In [105]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

#### CART Classifier: GridSearchCV

In [106]:
parameters = {
    "criterion": ["gini", "entropy"],
    "max_depth": [None, 2, 4, 8, 10],
    "min_samples_split": [1, 2, 4],
    "min_samples_leaf": [1, 2],
    "max_features": ["sqrt", "log2"],
}

clf = DecisionTreeClassifier()
grid_cv = GridSearchCV(clf, parameters, cv=10, n_jobs=-1)
_ = grid_cv.fit(x_train, y_train)

In [107]:
print(f"Parameters of best model: {grid_cv.best_params_}")
print(f"Score of best model: {grid_cv.best_score_}")

Parameters of best model: {'criterion': 'entropy', 'max_depth': 10, 'max_features': 'log2', 'min_samples_leaf': 1, 'min_samples_split': 2}
Score of best model: 0.9589743589743589


#### Cart Classifier: Train Best Model

In [108]:
clf = DecisionTreeClassifier(**grid_cv.best_params_)
clf.fit(x_train, y_train)
score = clf.score(x_test, y_test)

print(f"Accuracy: {score}")

Accuracy: 0.8703703703703703


#### RandomForest Classifier: GridSearchCV

In [109]:
parameters = {
    "criterion": ["gini", "entropy"],
    "max_depth": [None, 2, 5, 10],
    "min_samples_split": [1, 2],
    "min_samples_leaf": [1, 2],
    "max_features": ["sqrt", "log2"],
    "n_estimators": [5, 10, 15],
}

clf = RandomForestClassifier()
grid_cv = GridSearchCV(clf, parameters, cv=3, n_jobs=-1)
_ = grid_cv.fit(x_train, y_train)

In [110]:
print(f"Parameters of best model: {grid_cv.best_params_}")
print(f"Score of best model: {grid_cv.best_score_}")

Parameters of best model: {'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 1, 'n_estimators': 15}
Score of best model: 0.983739837398374


#### RandomForest Classifier: Train Best Model

In [111]:
clf = RandomForestClassifier(**grid_cv.best_params_)
clf.fit(x_train, y_train)
score = clf.score(x_test, y_test)

print(f"Accuracy: {score}")

Accuracy: 0.9814814814814815
