In [1]:
import pandas as pd
import numpy as np

red = pd.read_csv('winequality-red.csv', sep=";")
white = pd.read_csv('winequality-white.csv', sep=";")

red['color'] = 0
white['color'] = 1
df = pd.concat([red, white], ignore_index=True)

In [2]:
target = np.where(df['quality'] <= 5, 'low',
        np. where(df['quality'] <= 7, 'medium', 'hight'))
df.drop(labels='quality', inplace=True, axis=1)
pd.Series(target).value_counts(sort = True)

medium    3915
low       2384
hight      198
dtype: int64

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df, target, test_size = 0.3, random_state = 88)

In [4]:
pd.Series(y_test).value_counts(sort = True)

medium    1177
low        724
hight       49
dtype: int64

In [5]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

In [6]:
ada = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=7, max_features=5), 
    n_estimators=200, algorithm="SAMME", learning_rate=0.4, random_state=88
)

In [7]:
ada.fit(X_train, y_train)

AdaBoostClassifier(algorithm='SAMME',
          base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=7,
            max_features=5, max_leaf_nodes=None, min_impurity_decrease=0.0,
            min_impurity_split=None, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best'),
          learning_rate=0.4, n_estimators=200, random_state=88)

In [9]:
from sklearn.metrics import accuracy_score, confusion_matrix
y_pred = ada.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(accuracy_score(y_test, y_pred))

[[  17    0   32]
 [   0  508  216]
 [   1  175 1001]]
0.7825641025641026


In [10]:
ada_cv = AdaBoostClassifier(
    DecisionTreeClassifier(), 
    algorithm="SAMME.R", learning_rate = 0.4
)

In [11]:
from sklearn.model_selection import GridSearchCV
siatka_parametrow_1 = {'base_estimator__max_features': [3, 5, 7],
                     'base_estimator__max_depth': [3, 5, 7],
                     'n_estimators': [190, 200, 210]}
poszukiwanie_1 = GridSearchCV(ada_cv, siatka_parametrow_1, cv = 5, n_jobs=-1, scoring='accuracy', return_train_score=True)
poszukiwanie_1.fit(X_train, y_train)

GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=AdaBoostClassifier(algorithm='SAMME.R',
          base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'),
          learning_rate=0.4, n_estimators=50, random_state=None),
       fit_params=None, iid='warn', n_jobs=-1,
       param_grid={'base_estimator__max_features': [3, 5, 7], 'base_estimator__max_depth': [3, 5, 7], 'n_estimators': [190, 200, 210]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring='accuracy', verbose=0)

In [12]:
poszukiwanie_1.best_params_

{'base_estimator__max_depth': 7,
 'base_estimator__max_features': 7,
 'n_estimators': 190}