In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV

import gboost
import utilities as utils

In [5]:
data_white = pd.read_csv("data/winequality-white.csv", delimiter=';')
data_white.insert(0, 'color', 0)
data_red = pd.read_csv("data/winequality-red.csv", delimiter=';')
data_red.insert(0, 'color', 1)

X_all = data_white.append(data_red)
y_all = X_all["quality"]
X_all.drop(labels="quality", axis=1, inplace=True)

state = 2
test_size = 0.3
X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=test_size, random_state=state)

In [40]:
n_estimators = list(np.linspace(50, 150, 3, dtype=np.int)) #8
learning_rates = list(np.linspace(0.05, 0.6, 12)) #16
max_depths = list(np.linspace(2, 6, 5, dtype=np.int)) #4
print(n_estimators, learning_rates, max_depths)

random_seed = 7

scores_3d = []
for n_est in n_estimators:
    scores_2d = []
    for lr in learning_rates:
        scores_1d = []
        for md in max_depths:
            gb = gboost.GradientBoostingClassifier(lr, n_est, md)
            _, score_mean, _ = utils.kfold(gb, X_train, y_train, random_seed)
            scores_1d.append(score_mean)
            print(n_est, lr, md, score_mean)
        scores_2d.append(scores_1d)
    scores_3d.append(scores_2d)

np.save("scores_params", scores_3d)
            

[50, 100, 150] [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.39999999999999997, 0.44999999999999996, 0.49999999999999994, 0.5499999999999999, 0.6] [2, 3, 4, 5, 6]
50 0.05 2 0.548485751536599
50 0.05 3 0.5524529707580554
50 0.05 4 0.5667457965763051
50 0.05 5 0.5806082053539681
50 0.05 6 0.5913842090113277
50 0.1 2 0.5482771465822314
50 0.1 3 0.5614642984134509
50 0.1 4 0.5717963392539663
50 0.1 5 0.592697472019506
50 0.1 6 0.6003958753111296
50 0.15 2 0.55640325775919
50 0.15 3 0.5612472273489223
50 0.15 4 0.5746710916202442
50 0.15 5 0.5880868284258115
50 0.15 6 0.6116188895849913
50 0.2 2 0.5546538207555157
50 0.2 3 0.5632374405255761
50 0.2 4 0.5792898626796932
50 0.2 5 0.5898454088284597
50 0.2 6 0.6054413383226942
50 0.25 2 0.5515680929240252
50 0.25 3 0.571362197124909
50 0.25 4 0.57465720719958
50 0.25 5 0.5898376200071115
50 0.25 6 0.6122755210890805
50 0.3 2 0.5592688667264938
50 0.3 3 0.5601432465839246
50 0.3 4 0.5621290573832947
50 0.3 5 0.581045056638277
50 0.3 6 0.60281786010

In [48]:
sc = np.load("scores_params.npy")
print(sc)

[[[0.54848575 0.55245297 0.5667458  0.58060821 0.59138421]
  [0.54827715 0.5614643  0.57179634 0.59269747 0.60039588]
  [0.55640326 0.56124723 0.57467109 0.58808683 0.61161889]
  [0.55465382 0.56323744 0.57928986 0.58984541 0.60544134]
  [0.55156809 0.5713622  0.57465721 0.58983762 0.61227552]
  [0.55926887 0.56014325 0.56212906 0.58104506 0.60281786]
  [0.54959854 0.55354408 0.57202729 0.58477726 0.58675697]
  [0.55816252 0.55772905 0.55751198 0.58478742 0.58521546]
  [0.54233055 0.54586024 0.56014866 0.56763034 0.57092636]
  [0.53947883 0.5401358  0.55068288 0.5473855  0.56587142]
  [0.54101966 0.52628626 0.54277451 0.54541188 0.54079683]
  [0.53485701 0.52914103 0.52826123 0.54365262 0.54387037]]

 [[0.55157385 0.56718434 0.57312484 0.59138116 0.60897238]
  [0.56235392 0.57290879 0.58674647 0.60148055 0.61710527]
  [0.56784368 0.57708258 0.58763711 0.60061836 0.62283174]
  [0.56279449 0.56894868 0.5907269  0.62261095 0.61007128]
  [0.5583823  0.57707818 0.58500076 0.60742478 0.61623

In [47]:
np.where(sc == np.amax(sc))

(array([2]), array([1]), array([4]))