In [10]:
import pandas as pd
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn import preprocessing

In [3]:
def is_tasty(quality):
    if quality >= 7:
        return 1
    else:
        return 0

In [11]:
data = pd.read_csv("winequality-red.csv", sep=",")
features = data[["fixed acidity","volatile acidity","citric acid","residual sugar","chlorides","free sulfur dioxide","total sulfur dioxide","density","pH","sulphates","alcohol"]]
data['tasty'] = data['quality'].apply(is_tasty)
targets = data['tasty']
print(features)

      fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0               7.4             0.700         0.00             1.9      0.076   
1               7.8             0.880         0.00             2.6      0.098   
2               7.8             0.760         0.04             2.3      0.092   
3              11.2             0.280         0.56             1.9      0.075   
4               7.4             0.700         0.00             1.9      0.076   
...             ...               ...          ...             ...        ...   
1594            6.2             0.600         0.08             2.0      0.090   
1595            5.9             0.550         0.10             2.2      0.062   
1596            6.3             0.510         0.13             2.3      0.076   
1597            5.9             0.645         0.12             2.0      0.075   
1598            6.0             0.310         0.47             3.6      0.067   

      free sulfur dioxide  

In [25]:
# convert to array format for algorithm
X = np.array(features).reshape(-1, 11)
Y = np.array(targets).reshape(-1, 1)

# normalize
X = preprocessing.MinMaxScaler().fit_transform(X)

feature_train, feature_test, target_train, target_test = train_test_split(features, targets, test_size=0.3)

In [26]:
param_dist = {
    'n_estimators': [10, 50, 200],
    'learning_rate': [0.01, 0.05, 0.3, 1]
}
estimator = AdaBoostClassifier()
grid_search = GridSearchCV(estimator=estimator, param_grid=param_dist, cv=10)

In [27]:
grid_search.fit(feature_train, target_train)

In [28]:
predictions = grid_search.predict(feature_test)
print(confusion_matrix(target_test, predictions))
print(accuracy_score(target_test, predictions))

[[398  20]
 [ 37  25]]
0.88125
