In [2]:
import numpy as np
np.random.seed(42)
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier

In [3]:
dataset = load_wine()
x, y = dataset.data, dataset.target

In [4]:
df = pd.DataFrame(x, columns = dataset.feature_names)
df["y"] = y
df.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,y
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0


## Cart Classifier

In [5]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3)

In [6]:
parameters = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 2, 4, 8, 10]
}

clf = DecisionTreeClassifier()
grid_cv = GridSearchCV(clf, parameters, cv = 10, n_jobs = -1)
grid_cv.fit(x_train, y_train)

GridSearchCV(cv=10, estimator=DecisionTreeClassifier(), n_jobs=-1,
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [None, 2, 4, 8, 10]})

In [7]:
print(f"Parameters of best model: {grid_cv.best_params_}")
print(f"Score of best model: {grid_cv.best_score_}")

Parameters of best model: {'criterion': 'gini', 'max_depth': None}
Score of best model: 0.9448717948717948


## CART Classifier Train best model


In [8]:
clf = DecisionTreeClassifier(
    criterion='gini', 
    max_depth=4)
clf.fit(x_train, y_train)
score = clf.score(x_test, y_test)
print(f"Accuracy: {score}")

Accuracy: 0.9444444444444444


### RandomForrest Classifier: Grid Search

In [9]:
parameters = {
    'n_estimators': [10, 20, 40, 80, 160],
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 2, 4, 8, 10]
}
np.random.seed(42)

clf = RandomForestClassifier()
grid_cv = GridSearchCV(clf, parameters, cv = 10, n_jobs = -1)
grid_cv.fit(x_train, y_train)

GridSearchCV(cv=10, estimator=RandomForestClassifier(), n_jobs=-1,
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [None, 2, 4, 8, 10],
                         'n_estimators': [10, 20, 40, 80, 160]})

In [10]:
print(f"Parameters of best model: {grid_cv.best_params_}")
print(f"Score of best model: {grid_cv.best_score_}")

Parameters of best model: {'criterion': 'entropy', 'max_depth': None, 'n_estimators': 20}
Score of best model: 0.9839743589743589


In [11]:
# Train
clf = RandomForestClassifier(n_estimators = 20, criterion='gini', max_depth = 4)
clf.fit(x_train, y_train)
print(f"Accuracy: {clf.score(x_test, y_test)}")

Accuracy: 1.0


## Gradient Boosting

In [13]:
parameters = {
    'loss': ['deviance', 'exponential'],
    'n_estimators': [10, 20, 40],
    'criterion': ['mse', 'mae'],
    'max_depth': [None, 2, 4, 8, 10]
}
np.random.seed(42)

clf = GradientBoostingClassifier()
grid_cv = GridSearchCV(clf, parameters, cv = 10, n_jobs = -1)
grid_cv.fit(x_train, y_train)

 0.91282051 0.90448718 0.91282051 0.90448718 0.91282051 0.91282051
 0.90448718 0.90448718 0.91282051        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
 0.91153846 0.91987179 0.91987179 0.91346154 0.92115385 0.92884615
 0.89679487 0.92884615 0.91987179 0.92884615 0.90384615 0.92051282
 0.91987179 0.90320513 0.93589744        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan]


GridSearchCV(cv=10, estimator=GradientBoostingClassifier(), n_jobs=-1,
             param_grid={'criterion': ['mse', 'mae'],
                         'loss': ['deviance', 'exponential'],
                         'max_depth': [None, 2, 4, 8, 10],
                         'n_estimators': [10, 20, 40]})

In [16]:
print(f"Parameters of best model: {grid_cv.best_params_}")
print(f"Score of best model: {grid_cv.best_score_}")

Parameters of best model: {'criterion': 'mse', 'loss': 'deviance', 'max_depth': 2, 'n_estimators': 40}
Score of best model: 0.9435897435897436


In [17]:
# Train
clf = GradientBoostingClassifier(loss = 'deviance', n_estimators = 10, criterion='mae', max_depth = 8)
clf.fit(x_train, y_train)
print(f"Accuracy: {clf.score(x_test, y_test)}")

Accuracy: 0.9629629629629629


