In [1]:
import pandas as pd
import random

from google.colab import drive
drive.mount('/content/drive')

train = pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/TP3/parquet/train.parquet")
test = pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/TP3/parquet/test.parquet")

random.seed(666)

Mounted at /content/drive


In [2]:
train.at[18473, 'genre'] = "Children’s Music" #Arreglo una fila bug (género distinto por un caracter al resto).

In [3]:
#SETS DE ENTRENAMIENTO Y VALIDACIÓN

artistas = list(train.artist.unique())
artistas_train = random.sample(artistas, int(len(artistas) * 0.8)) #Divido los unique de artistas en 80% para train.
artistas_validation = list(set(artistas) - set(artistas_train)) #Divido los unique de artistas en 20% para train.

train_set = train[ train['artist'].isin(artistas_train) ] #Genero el set de entrenamiento.
validation_set = train[ train['artist'].isin(artistas_validation) ] #Genero el set de validación

In [4]:
#ELIMINO FILAS DE VALIDATION CON GÉNEROS QUE NO ESTÁN EN TRAIN

generos_train = list(train_set.genre.unique())
validation_set = validation_set[validation_set.genre.isin(generos_train)]

In [5]:
#DROPEO COLUMNAS QUE LEAKEAN INFORMACIÓN

train_set.drop(['artist', 'track_name', 'did', 'a_genres'], axis = 1, inplace = True)
validation_set.drop(['artist', 'track_name', 'did', 'a_genres'], axis = 1, inplace = True)
test.drop(['artist', 'track_name', 'did', 'a_genres'], axis = 1, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [6]:
#Tengo que llenar language (uso el más común) y s-label (promedio)
lenguaje_mas_comun = train_set['language'].value_counts().index[0]
s_label_promedio = train_set['s-label'].mean()

train_set['language'].fillna(lenguaje_mas_comun, inplace = True)
train_set['s-label'].fillna(s_label_promedio, inplace = True)
train_set['lyric'].fillna('', inplace = True)

validation_set['language'].fillna(lenguaje_mas_comun, inplace = True)
validation_set['s-label'].fillna(s_label_promedio, inplace = True)
validation_set['lyric'].fillna('', inplace = True)

test['language'].fillna(lenguaje_mas_comun, inplace = True)
test['s-label'].fillna(s_label_promedio, inplace = True)
test['lyric'].fillna('', inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return self._update_inplace(result)


# **ENCODING DE VARIABLES CATEGÓRICAS**
#####Time Signature --> Mean Encoding

In [7]:
def MeanEncoding(df, matriz_probas_ts_genero):
  df = pd.merge(df, matriz_probas_ts_genero, on = 'time_signature', how = 'outer')
  y_df = df.loc[:, df.columns == 'genre']
  x_df = df.reset_index().drop(['index', 'genre', 'time_signature'],axis=1)
  return x_df, y_df

ts_por_genero = train_set.groupby(['genre', 'time_signature']).count()[ ['lyric'] ].rename(columns = {'lyric': 'count'}).reset_index(level=['time_signature'])
total_genero = ts_por_genero.groupby(['genre']).sum()

probas_ts_genero = pd.merge(ts_por_genero, total_genero, on = 'genre', how = 'outer')
probas_ts_genero['probas'] = probas_ts_genero['count_x'] / probas_ts_genero['count_y']
probas_ts_genero = probas_ts_genero[ ['time_signature', 'probas'] ].reset_index()

matriz_probas_ts_genero = probas_ts_genero.pivot_table(probas_ts_genero, index = 'time_signature', columns = 'genre').fillna(0)
matriz_probas_ts_genero = matriz_probas_ts_genero['probas'].add_prefix('ts_')

x_validation_set, y_validation_set = MeanEncoding(validation_set, matriz_probas_ts_genero)
x_test_set, y_test_set = MeanEncoding(test, matriz_probas_ts_genero)
x_train_set, y_train_set = MeanEncoding(train_set, matriz_probas_ts_genero)

#####Mode, Key, Language --> One Hot Encoding

In [8]:
from sklearn.preprocessing import OneHotEncoder

def OneHotEncoding(df, rango, features, **kwargs):
  ohe = OneHotEncoder(**kwargs)
  train = df[0]
  valid = df[1]
  test = df[2]
  
  encoded_train = ohe.fit_transform(train[features]).todense().astype(int)
  encoded_valid = ohe.transform(valid[features]).todense().astype(int)
  encoded_test = ohe.transform(test[features]).todense().astype(int)

  cols = dict(zip([x for x in range(rango)], ohe.get_feature_names_out()))

  train = train.reset_index().drop(['index'] + features,axis=1).join(pd.DataFrame(encoded_train).rename(columns = cols))
  valid = valid.reset_index().drop(['index'] + features,axis=1).join(pd.DataFrame(encoded_valid).rename(columns = cols))
  test = test.reset_index().drop(['index'] + features,axis=1).join(pd.DataFrame(encoded_test).rename(columns = cols))

  return train, valid, test
  

In [9]:
#MODE
x_train_set, x_validation_set, x_test_set = OneHotEncoding([x_train_set, x_validation_set, x_test_set], 1, ['mode'], drop = 'first', handle_unknown = 'ignore')

#KEY
x_train_set, x_validation_set, x_test_set = OneHotEncoding([x_train_set, x_validation_set, x_test_set], 12, ['key'], handle_unknown = 'ignore')

#LANGUAGE
x_train_set, x_validation_set, x_test_set = OneHotEncoding([x_train_set, x_validation_set, x_test_set], 13, ['language'], handle_unknown = 'ignore')

#####Lyric --> TFIDF / Count Vectorizer

In [10]:
from sklearn.feature_extraction.text import CountVectorizer#, TfidfVectorizer
from nltk import word_tokenize
from nltk.corpus import stopwords
import nltk
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

#Agrego features por largo de lyric y cantidad de palabras.
x_train_set['lyric_len'] = x_train_set['lyric'].map(lambda x: len(x))
x_train_set['lyric_count'] = x_train_set['lyric'].map(lambda x: len(word_tokenize(x)))

x_validation_set['lyric_len'] = x_validation_set['lyric'].map(lambda x: len(x))
x_validation_set['lyric_count'] = x_validation_set['lyric'].map(lambda x: len(word_tokenize(x)))

x_test_set['lyric_len'] = x_test_set['lyric'].map(lambda x: len(x))
x_test_set['lyric_count'] = x_test_set['lyric'].map(lambda x: len(word_tokenize(x)))

#CountVectorizer
stopwords = set(stopwords.words('english'))
countVec = CountVectorizer(lowercase=True, stop_words=stopwords, max_features=10)
matriz = countVec.fit_transform(x_train_set['lyric'])
palabras_mas_frecuentes = pd.DataFrame(matriz.toarray(), columns=countVec.get_feature_names()).sum().index.tolist()

for palabra in palabras_mas_frecuentes:
  x_train_set["lyric_"+palabra] = x_train_set["lyric"].apply(lambda x: x.count(palabra))
  x_validation_set["lyric_"+palabra] = x_validation_set["lyric"].apply(lambda x: x.count(palabra))
  x_test_set["lyric_"+palabra] = x_test_set["lyric"].apply(lambda x: x.count(palabra))

#Dropeo categoría lyric.
x_train_set = x_train_set.reset_index().drop(['index', 'lyric'],axis=1)
x_validation_set = x_validation_set.reset_index().drop(['index', 'lyric'],axis=1)
x_test_set = x_test_set.reset_index().drop(['index', 'lyric'],axis=1)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


# **ENTRENAMIENTO DEL MODELO Y MÉTRICAS**
### Búsqueda de hiper-parámetros
### Score para validación y test

In [None]:
#El score con el set de validación es 0.5017044612420335
#El score con el set de pruebas es 0.494583615436696

In [12]:
import xgboost as xgb
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import top_k_accuracy_score

modelo = xgb.XGBClassifier(random_state = 666)
hiper_parametros = {'n_estimators': [25, 50, 100, 150, 200], 'max_depth': [2, 3, 4, 5, 6], 'learning_rate': [0.05, 0.1, 0.15, 0.2, 0.25], 'subsample': [0.25, 0.5, 0.75, 1, 1.25],
                    'gamma': [0, 0.5, 0.75, 1, 2], 'objective': ['reg:logistic', 'binary:logistic'], 'colsample_bytree': [0.1, 0.25, 0.5, 0.75, 1]}

random_search = RandomizedSearchCV(modelo, hiper_parametros, random_state = 666, n_iter = 30, cv = 3, verbose = 10)
search = random_search.fit(x_train_set, y_train_set)

Fitting 3 folds for each of 30 candidates, totalling 90 fits
[CV 1/3; 1/30] START colsample_bytree=0.75, gamma=1, learning_rate=0.1, max_depth=2, n_estimators=50, objective=reg:logistic, subsample=1.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 1/30] END colsample_bytree=0.75, gamma=1, learning_rate=0.1, max_depth=2, n_estimators=50, objective=reg:logistic, subsample=1.25;, score=nan total time=   0.4s
[CV 2/3; 1/30] START colsample_bytree=0.75, gamma=1, learning_rate=0.1, max_depth=2, n_estimators=50, objective=reg:logistic, subsample=1.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 1/30] END colsample_bytree=0.75, gamma=1, learning_rate=0.1, max_depth=2, n_estimators=50, objective=reg:logistic, subsample=1.25;, score=nan total time=   0.3s
[CV 3/3; 1/30] START colsample_bytree=0.75, gamma=1, learning_rate=0.1, max_depth=2, n_estimators=50, objective=reg:logistic, subsample=1.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 1/30] END colsample_bytree=0.75, gamma=1, learning_rate=0.1, max_depth=2, n_estimators=50, objective=reg:logistic, subsample=1.25;, score=nan total time=   0.3s
[CV 1/3; 2/30] START colsample_bytree=0.25, gamma=1, learning_rate=0.05, max_depth=4, n_estimators=50, objective=reg:logistic, subsample=1.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 2/30] END colsample_bytree=0.25, gamma=1, learning_rate=0.05, max_depth=4, n_estimators=50, objective=reg:logistic, subsample=1.25;, score=nan total time=   0.3s
[CV 2/3; 2/30] START colsample_bytree=0.25, gamma=1, learning_rate=0.05, max_depth=4, n_estimators=50, objective=reg:logistic, subsample=1.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 2/30] END colsample_bytree=0.25, gamma=1, learning_rate=0.05, max_depth=4, n_estimators=50, objective=reg:logistic, subsample=1.25;, score=nan total time=   0.4s
[CV 3/3; 2/30] START colsample_bytree=0.25, gamma=1, learning_rate=0.05, max_depth=4, n_estimators=50, objective=reg:logistic, subsample=1.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 2/30] END colsample_bytree=0.25, gamma=1, learning_rate=0.05, max_depth=4, n_estimators=50, objective=reg:logistic, subsample=1.25;, score=nan total time=   0.3s
[CV 1/3; 3/30] START colsample_bytree=0.25, gamma=0.5, learning_rate=0.05, max_depth=3, n_estimators=200, objective=binary:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 3/30] END colsample_bytree=0.25, gamma=0.5, learning_rate=0.05, max_depth=3, n_estimators=200, objective=binary:logistic, subsample=0.75;, score=0.408 total time= 1.4min
[CV 2/3; 3/30] START colsample_bytree=0.25, gamma=0.5, learning_rate=0.05, max_depth=3, n_estimators=200, objective=binary:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 3/30] END colsample_bytree=0.25, gamma=0.5, learning_rate=0.05, max_depth=3, n_estimators=200, objective=binary:logistic, subsample=0.75;, score=0.342 total time= 1.4min
[CV 3/3; 3/30] START colsample_bytree=0.25, gamma=0.5, learning_rate=0.05, max_depth=3, n_estimators=200, objective=binary:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 3/30] END colsample_bytree=0.25, gamma=0.5, learning_rate=0.05, max_depth=3, n_estimators=200, objective=binary:logistic, subsample=0.75;, score=0.419 total time= 1.3min
[CV 1/3; 4/30] START colsample_bytree=1, gamma=0.5, learning_rate=0.1, max_depth=2, n_estimators=100, objective=binary:logistic, subsample=0.5


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 4/30] END colsample_bytree=1, gamma=0.5, learning_rate=0.1, max_depth=2, n_estimators=100, objective=binary:logistic, subsample=0.5;, score=0.383 total time= 1.2min
[CV 2/3; 4/30] START colsample_bytree=1, gamma=0.5, learning_rate=0.1, max_depth=2, n_estimators=100, objective=binary:logistic, subsample=0.5


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 4/30] END colsample_bytree=1, gamma=0.5, learning_rate=0.1, max_depth=2, n_estimators=100, objective=binary:logistic, subsample=0.5;, score=0.339 total time= 1.2min
[CV 3/3; 4/30] START colsample_bytree=1, gamma=0.5, learning_rate=0.1, max_depth=2, n_estimators=100, objective=binary:logistic, subsample=0.5


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 4/30] END colsample_bytree=1, gamma=0.5, learning_rate=0.1, max_depth=2, n_estimators=100, objective=binary:logistic, subsample=0.5;, score=0.392 total time= 1.2min
[CV 1/3; 5/30] START colsample_bytree=0.1, gamma=0.75, learning_rate=0.1, max_depth=4, n_estimators=100, objective=binary:logistic, subsample=1


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 5/30] END colsample_bytree=0.1, gamma=0.75, learning_rate=0.1, max_depth=4, n_estimators=100, objective=binary:logistic, subsample=1;, score=0.401 total time=  29.5s
[CV 2/3; 5/30] START colsample_bytree=0.1, gamma=0.75, learning_rate=0.1, max_depth=4, n_estimators=100, objective=binary:logistic, subsample=1


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 5/30] END colsample_bytree=0.1, gamma=0.75, learning_rate=0.1, max_depth=4, n_estimators=100, objective=binary:logistic, subsample=1;, score=0.323 total time=  29.5s
[CV 3/3; 5/30] START colsample_bytree=0.1, gamma=0.75, learning_rate=0.1, max_depth=4, n_estimators=100, objective=binary:logistic, subsample=1


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 5/30] END colsample_bytree=0.1, gamma=0.75, learning_rate=0.1, max_depth=4, n_estimators=100, objective=binary:logistic, subsample=1;, score=0.405 total time=  29.0s
[CV 1/3; 6/30] START colsample_bytree=0.25, gamma=0.5, learning_rate=0.2, max_depth=2, n_estimators=50, objective=reg:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 6/30] END colsample_bytree=0.25, gamma=0.5, learning_rate=0.2, max_depth=2, n_estimators=50, objective=reg:logistic, subsample=0.75;, score=0.374 total time=  15.8s
[CV 2/3; 6/30] START colsample_bytree=0.25, gamma=0.5, learning_rate=0.2, max_depth=2, n_estimators=50, objective=reg:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 6/30] END colsample_bytree=0.25, gamma=0.5, learning_rate=0.2, max_depth=2, n_estimators=50, objective=reg:logistic, subsample=0.75;, score=0.332 total time=  17.0s
[CV 3/3; 6/30] START colsample_bytree=0.25, gamma=0.5, learning_rate=0.2, max_depth=2, n_estimators=50, objective=reg:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 6/30] END colsample_bytree=0.25, gamma=0.5, learning_rate=0.2, max_depth=2, n_estimators=50, objective=reg:logistic, subsample=0.75;, score=0.381 total time=  15.8s
[CV 1/3; 7/30] START colsample_bytree=1, gamma=0.5, learning_rate=0.1, max_depth=3, n_estimators=50, objective=binary:logistic, subsample=1.25
[CV 1/3; 7/30] END colsample_bytree=1, gamma=0.5, learning_rate=0.1, max_depth=3, n_estimators=50, objective=binary:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 2/3; 7/30] START colsample_bytree=1, gamma=0.5, learning_rate=0.1, max_depth=3, n_estimators=50, objective=binary:logistic, subsample=1.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 7/30] END colsample_bytree=1, gamma=0.5, learning_rate=0.1, max_depth=3, n_estimators=50, objective=binary:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 3/3; 7/30] START colsample_bytree=1, gamma=0.5, learning_rate=0.1, max_depth=3, n_estimators=50, objective=binary:logistic, subsample=1.25
[CV 3/3; 7/30] END colsample_bytree=1, gamma=0.5, learning_rate=0.1, max_depth=3, n_estimators=50, objective=binary:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 1/3; 8/30] START colsample_bytree=0.25, gamma=1, learning_rate=0.15, max_depth=5, n_estimators=25, objective=reg:logistic, subsample=1.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 8/30] END colsample_bytree=0.25, gamma=1, learning_rate=0.15, max_depth=5, n_estimators=25, objective=reg:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 2/3; 8/30] START colsample_bytree=0.25, gamma=1, learning_rate=0.15, max_depth=5, n_estimators=25, objective=reg:logistic, subsample=1.25
[CV 2/3; 8/30] END colsample_bytree=0.25, gamma=1, learning_rate=0.15, max_depth=5, n_estimators=25, objective=reg:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 3/3; 8/30] START colsample_bytree=0.25, gamma=1, learning_rate=0.15, max_depth=5, n_estimators=25, objective=reg:logistic, subsample=1.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 8/30] END colsample_bytree=0.25, gamma=1, learning_rate=0.15, max_depth=5, n_estimators=25, objective=reg:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 1/3; 9/30] START colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=2, n_estimators=200, objective=reg:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 9/30] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=2, n_estimators=200, objective=reg:logistic, subsample=0.75;, score=0.376 total time= 1.4min
[CV 2/3; 9/30] START colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=2, n_estimators=200, objective=reg:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 9/30] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=2, n_estimators=200, objective=reg:logistic, subsample=0.75;, score=0.339 total time= 1.4min
[CV 3/3; 9/30] START colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=2, n_estimators=200, objective=reg:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 9/30] END colsample_bytree=0.5, gamma=1, learning_rate=0.05, max_depth=2, n_estimators=200, objective=reg:logistic, subsample=0.75;, score=0.393 total time= 1.5min
[CV 1/3; 10/30] START colsample_bytree=0.1, gamma=0.5, learning_rate=0.05, max_depth=6, n_estimators=50, objective=binary:logistic, subsample=1.25
[CV 1/3; 10/30] END colsample_bytree=0.1, gamma=0.5, learning_rate=0.05, max_depth=6, n_estimators=50, objective=binary:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 2/3; 10/30] START colsample_bytree=0.1, gamma=0.5, learning_rate=0.05, max_depth=6, n_estimators=50, objective=binary:logistic, subsample=1.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 10/30] END colsample_bytree=0.1, gamma=0.5, learning_rate=0.05, max_depth=6, n_estimators=50, objective=binary:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 3/3; 10/30] START colsample_bytree=0.1, gamma=0.5, learning_rate=0.05, max_depth=6, n_estimators=50, objective=binary:logistic, subsample=1.25
[CV 3/3; 10/30] END colsample_bytree=0.1, gamma=0.5, learning_rate=0.05, max_depth=6, n_estimators=50, objective=binary:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 1/3; 11/30] START colsample_bytree=0.1, gamma=0, learning_rate=0.25, max_depth=4, n_estimators=50, objective=binary:logistic, subsample=0.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 11/30] END colsample_bytree=0.1, gamma=0, learning_rate=0.25, max_depth=4, n_estimators=50, objective=binary:logistic, subsample=0.25;, score=0.354 total time=  15.4s
[CV 2/3; 11/30] START colsample_bytree=0.1, gamma=0, learning_rate=0.25, max_depth=4, n_estimators=50, objective=binary:logistic, subsample=0.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 11/30] END colsample_bytree=0.1, gamma=0, learning_rate=0.25, max_depth=4, n_estimators=50, objective=binary:logistic, subsample=0.25;, score=0.305 total time=  15.1s
[CV 3/3; 11/30] START colsample_bytree=0.1, gamma=0, learning_rate=0.25, max_depth=4, n_estimators=50, objective=binary:logistic, subsample=0.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 11/30] END colsample_bytree=0.1, gamma=0, learning_rate=0.25, max_depth=4, n_estimators=50, objective=binary:logistic, subsample=0.25;, score=0.394 total time=  14.9s
[CV 1/3; 12/30] START colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=4, n_estimators=25, objective=binary:logistic, subsample=0.5


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 12/30] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=4, n_estimators=25, objective=binary:logistic, subsample=0.5;, score=0.377 total time=  19.5s
[CV 2/3; 12/30] START colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=4, n_estimators=25, objective=binary:logistic, subsample=0.5


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 12/30] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=4, n_estimators=25, objective=binary:logistic, subsample=0.5;, score=0.330 total time=  19.5s
[CV 3/3; 12/30] START colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=4, n_estimators=25, objective=binary:logistic, subsample=0.5


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 12/30] END colsample_bytree=0.5, gamma=1, learning_rate=0.1, max_depth=4, n_estimators=25, objective=binary:logistic, subsample=0.5;, score=0.407 total time=  19.5s
[CV 1/3; 13/30] START colsample_bytree=1, gamma=0, learning_rate=0.05, max_depth=4, n_estimators=150, objective=binary:logistic, subsample=1.25
[CV 1/3; 13/30] END colsample_bytree=1, gamma=0, learning_rate=0.05, max_depth=4, n_estimators=150, objective=binary:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 2/3; 13/30] START colsample_bytree=1, gamma=0, learning_rate=0.05, max_depth=4, n_estimators=150, objective=binary:logistic, subsample=1.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 13/30] END colsample_bytree=1, gamma=0, learning_rate=0.05, max_depth=4, n_estimators=150, objective=binary:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 3/3; 13/30] START colsample_bytree=1, gamma=0, learning_rate=0.05, max_depth=4, n_estimators=150, objective=binary:logistic, subsample=1.25
[CV 3/3; 13/30] END colsample_bytree=1, gamma=0, learning_rate=0.05, max_depth=4, n_estimators=150, objective=binary:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 1/3; 14/30] START colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=4, n_estimators=100, objective=reg:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 14/30] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=4, n_estimators=100, objective=reg:logistic, subsample=0.75;, score=0.422 total time= 1.2min
[CV 2/3; 14/30] START colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=4, n_estimators=100, objective=reg:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 14/30] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=4, n_estimators=100, objective=reg:logistic, subsample=0.75;, score=0.353 total time= 1.2min
[CV 3/3; 14/30] START colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=4, n_estimators=100, objective=reg:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 14/30] END colsample_bytree=0.5, gamma=0, learning_rate=0.2, max_depth=4, n_estimators=100, objective=reg:logistic, subsample=0.75;, score=0.451 total time= 1.2min
[CV 1/3; 15/30] START colsample_bytree=0.25, gamma=1, learning_rate=0.1, max_depth=4, n_estimators=200, objective=binary:logistic, subsample=0.5


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 15/30] END colsample_bytree=0.25, gamma=1, learning_rate=0.1, max_depth=4, n_estimators=200, objective=binary:logistic, subsample=0.5;, score=0.413 total time= 1.7min
[CV 2/3; 15/30] START colsample_bytree=0.25, gamma=1, learning_rate=0.1, max_depth=4, n_estimators=200, objective=binary:logistic, subsample=0.5


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 15/30] END colsample_bytree=0.25, gamma=1, learning_rate=0.1, max_depth=4, n_estimators=200, objective=binary:logistic, subsample=0.5;, score=0.345 total time= 1.7min
[CV 3/3; 15/30] START colsample_bytree=0.25, gamma=1, learning_rate=0.1, max_depth=4, n_estimators=200, objective=binary:logistic, subsample=0.5


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 15/30] END colsample_bytree=0.25, gamma=1, learning_rate=0.1, max_depth=4, n_estimators=200, objective=binary:logistic, subsample=0.5;, score=0.450 total time= 1.7min
[CV 1/3; 16/30] START colsample_bytree=1, gamma=0.5, learning_rate=0.25, max_depth=5, n_estimators=25, objective=binary:logistic, subsample=0.5


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 16/30] END colsample_bytree=1, gamma=0.5, learning_rate=0.25, max_depth=5, n_estimators=25, objective=binary:logistic, subsample=0.5;, score=0.398 total time=  38.4s
[CV 2/3; 16/30] START colsample_bytree=1, gamma=0.5, learning_rate=0.25, max_depth=5, n_estimators=25, objective=binary:logistic, subsample=0.5


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 16/30] END colsample_bytree=1, gamma=0.5, learning_rate=0.25, max_depth=5, n_estimators=25, objective=binary:logistic, subsample=0.5;, score=0.336 total time=  38.4s
[CV 3/3; 16/30] START colsample_bytree=1, gamma=0.5, learning_rate=0.25, max_depth=5, n_estimators=25, objective=binary:logistic, subsample=0.5


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 16/30] END colsample_bytree=1, gamma=0.5, learning_rate=0.25, max_depth=5, n_estimators=25, objective=binary:logistic, subsample=0.5;, score=0.432 total time=  38.5s
[CV 1/3; 17/30] START colsample_bytree=0.75, gamma=0, learning_rate=0.1, max_depth=5, n_estimators=50, objective=binary:logistic, subsample=1.25
[CV 1/3; 17/30] END colsample_bytree=0.75, gamma=0, learning_rate=0.1, max_depth=5, n_estimators=50, objective=binary:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 2/3; 17/30] START colsample_bytree=0.75, gamma=0, learning_rate=0.1, max_depth=5, n_estimators=50, objective=binary:logistic, subsample=1.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 17/30] END colsample_bytree=0.75, gamma=0, learning_rate=0.1, max_depth=5, n_estimators=50, objective=binary:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 3/3; 17/30] START colsample_bytree=0.75, gamma=0, learning_rate=0.1, max_depth=5, n_estimators=50, objective=binary:logistic, subsample=1.25
[CV 3/3; 17/30] END colsample_bytree=0.75, gamma=0, learning_rate=0.1, max_depth=5, n_estimators=50, objective=binary:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 1/3; 18/30] START colsample_bytree=0.25, gamma=0.75, learning_rate=0.05, max_depth=2, n_estimators=50, objective=binary:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 18/30] END colsample_bytree=0.25, gamma=0.75, learning_rate=0.05, max_depth=2, n_estimators=50, objective=binary:logistic, subsample=0.75;, score=0.341 total time=  15.8s
[CV 2/3; 18/30] START colsample_bytree=0.25, gamma=0.75, learning_rate=0.05, max_depth=2, n_estimators=50, objective=binary:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 18/30] END colsample_bytree=0.25, gamma=0.75, learning_rate=0.05, max_depth=2, n_estimators=50, objective=binary:logistic, subsample=0.75;, score=0.310 total time=  17.0s
[CV 3/3; 18/30] START colsample_bytree=0.25, gamma=0.75, learning_rate=0.05, max_depth=2, n_estimators=50, objective=binary:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 18/30] END colsample_bytree=0.25, gamma=0.75, learning_rate=0.05, max_depth=2, n_estimators=50, objective=binary:logistic, subsample=0.75;, score=0.350 total time=  15.9s
[CV 1/3; 19/30] START colsample_bytree=0.25, gamma=1, learning_rate=0.15, max_depth=4, n_estimators=200, objective=reg:logistic, subsample=0.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 19/30] END colsample_bytree=0.25, gamma=1, learning_rate=0.15, max_depth=4, n_estimators=200, objective=reg:logistic, subsample=0.25;, score=0.404 total time= 1.4min
[CV 2/3; 19/30] START colsample_bytree=0.25, gamma=1, learning_rate=0.15, max_depth=4, n_estimators=200, objective=reg:logistic, subsample=0.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 19/30] END colsample_bytree=0.25, gamma=1, learning_rate=0.15, max_depth=4, n_estimators=200, objective=reg:logistic, subsample=0.25;, score=0.338 total time= 1.4min
[CV 3/3; 19/30] START colsample_bytree=0.25, gamma=1, learning_rate=0.15, max_depth=4, n_estimators=200, objective=reg:logistic, subsample=0.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 19/30] END colsample_bytree=0.25, gamma=1, learning_rate=0.15, max_depth=4, n_estimators=200, objective=reg:logistic, subsample=0.25;, score=0.439 total time= 1.4min
[CV 1/3; 20/30] START colsample_bytree=1, gamma=0.75, learning_rate=0.05, max_depth=5, n_estimators=200, objective=reg:logistic, subsample=1.25
[CV 1/3; 20/30] END colsample_bytree=1, gamma=0.75, learning_rate=0.05, max_depth=5, n_estimators=200, objective=reg:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 2/3; 20/30] START colsample_bytree=1, gamma=0.75, learning_rate=0.05, max_depth=5, n_estimators=200, objective=reg:logistic, subsample=1.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 20/30] END colsample_bytree=1, gamma=0.75, learning_rate=0.05, max_depth=5, n_estimators=200, objective=reg:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 3/3; 20/30] START colsample_bytree=1, gamma=0.75, learning_rate=0.05, max_depth=5, n_estimators=200, objective=reg:logistic, subsample=1.25
[CV 3/3; 20/30] END colsample_bytree=1, gamma=0.75, learning_rate=0.05, max_depth=5, n_estimators=200, objective=reg:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 1/3; 21/30] START colsample_bytree=0.75, gamma=0.5, learning_rate=0.2, max_depth=6, n_estimators=50, objective=binary:logistic, subsample=1


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 21/30] END colsample_bytree=0.75, gamma=0.5, learning_rate=0.2, max_depth=6, n_estimators=50, objective=binary:logistic, subsample=1;, score=0.411 total time=  55.9s
[CV 2/3; 21/30] START colsample_bytree=0.75, gamma=0.5, learning_rate=0.2, max_depth=6, n_estimators=50, objective=binary:logistic, subsample=1


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 21/30] END colsample_bytree=0.75, gamma=0.5, learning_rate=0.2, max_depth=6, n_estimators=50, objective=binary:logistic, subsample=1;, score=0.348 total time=  55.8s
[CV 3/3; 21/30] START colsample_bytree=0.75, gamma=0.5, learning_rate=0.2, max_depth=6, n_estimators=50, objective=binary:logistic, subsample=1


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 21/30] END colsample_bytree=0.75, gamma=0.5, learning_rate=0.2, max_depth=6, n_estimators=50, objective=binary:logistic, subsample=1;, score=0.458 total time=  56.4s
[CV 1/3; 22/30] START colsample_bytree=1, gamma=0, learning_rate=0.1, max_depth=4, n_estimators=150, objective=reg:logistic, subsample=1


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 22/30] END colsample_bytree=1, gamma=0, learning_rate=0.1, max_depth=4, n_estimators=150, objective=reg:logistic, subsample=1;, score=0.424 total time= 2.4min
[CV 2/3; 22/30] START colsample_bytree=1, gamma=0, learning_rate=0.1, max_depth=4, n_estimators=150, objective=reg:logistic, subsample=1


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 22/30] END colsample_bytree=1, gamma=0, learning_rate=0.1, max_depth=4, n_estimators=150, objective=reg:logistic, subsample=1;, score=0.350 total time= 2.4min
[CV 3/3; 22/30] START colsample_bytree=1, gamma=0, learning_rate=0.1, max_depth=4, n_estimators=150, objective=reg:logistic, subsample=1


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 22/30] END colsample_bytree=1, gamma=0, learning_rate=0.1, max_depth=4, n_estimators=150, objective=reg:logistic, subsample=1;, score=0.445 total time= 2.4min
[CV 1/3; 23/30] START colsample_bytree=0.25, gamma=2, learning_rate=0.05, max_depth=3, n_estimators=150, objective=reg:logistic, subsample=1


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 23/30] END colsample_bytree=0.25, gamma=2, learning_rate=0.05, max_depth=3, n_estimators=150, objective=reg:logistic, subsample=1;, score=0.400 total time=  51.5s
[CV 2/3; 23/30] START colsample_bytree=0.25, gamma=2, learning_rate=0.05, max_depth=3, n_estimators=150, objective=reg:logistic, subsample=1


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 23/30] END colsample_bytree=0.25, gamma=2, learning_rate=0.05, max_depth=3, n_estimators=150, objective=reg:logistic, subsample=1;, score=0.342 total time=  51.7s
[CV 3/3; 23/30] START colsample_bytree=0.25, gamma=2, learning_rate=0.05, max_depth=3, n_estimators=150, objective=reg:logistic, subsample=1


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 23/30] END colsample_bytree=0.25, gamma=2, learning_rate=0.05, max_depth=3, n_estimators=150, objective=reg:logistic, subsample=1;, score=0.410 total time=  52.8s
[CV 1/3; 24/30] START colsample_bytree=0.75, gamma=2, learning_rate=0.15, max_depth=2, n_estimators=100, objective=reg:logistic, subsample=1.25
[CV 1/3; 24/30] END colsample_bytree=0.75, gamma=2, learning_rate=0.15, max_depth=2, n_estimators=100, objective=reg:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 2/3; 24/30] START colsample_bytree=0.75, gamma=2, learning_rate=0.15, max_depth=2, n_estimators=100, objective=reg:logistic, subsample=1.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 24/30] END colsample_bytree=0.75, gamma=2, learning_rate=0.15, max_depth=2, n_estimators=100, objective=reg:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 3/3; 24/30] START colsample_bytree=0.75, gamma=2, learning_rate=0.15, max_depth=2, n_estimators=100, objective=reg:logistic, subsample=1.25
[CV 3/3; 24/30] END colsample_bytree=0.75, gamma=2, learning_rate=0.15, max_depth=2, n_estimators=100, objective=reg:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 1/3; 25/30] START colsample_bytree=0.1, gamma=0, learning_rate=0.05, max_depth=6, n_estimators=25, objective=reg:logistic, subsample=1.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 25/30] END colsample_bytree=0.1, gamma=0, learning_rate=0.05, max_depth=6, n_estimators=25, objective=reg:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 2/3; 25/30] START colsample_bytree=0.1, gamma=0, learning_rate=0.05, max_depth=6, n_estimators=25, objective=reg:logistic, subsample=1.25
[CV 2/3; 25/30] END colsample_bytree=0.1, gamma=0, learning_rate=0.05, max_depth=6, n_estimators=25, objective=reg:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 3/3; 25/30] START colsample_bytree=0.1, gamma=0, learning_rate=0.05, max_depth=6, n_estimators=25, objective=reg:logistic, subsample=1.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 25/30] END colsample_bytree=0.1, gamma=0, learning_rate=0.05, max_depth=6, n_estimators=25, objective=reg:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 1/3; 26/30] START colsample_bytree=0.1, gamma=1, learning_rate=0.1, max_depth=2, n_estimators=200, objective=binary:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 26/30] END colsample_bytree=0.1, gamma=1, learning_rate=0.1, max_depth=2, n_estimators=200, objective=binary:logistic, subsample=0.75;, score=0.389 total time=  48.6s
[CV 2/3; 26/30] START colsample_bytree=0.1, gamma=1, learning_rate=0.1, max_depth=2, n_estimators=200, objective=binary:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 26/30] END colsample_bytree=0.1, gamma=1, learning_rate=0.1, max_depth=2, n_estimators=200, objective=binary:logistic, subsample=0.75;, score=0.333 total time=  48.7s
[CV 3/3; 26/30] START colsample_bytree=0.1, gamma=1, learning_rate=0.1, max_depth=2, n_estimators=200, objective=binary:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 26/30] END colsample_bytree=0.1, gamma=1, learning_rate=0.1, max_depth=2, n_estimators=200, objective=binary:logistic, subsample=0.75;, score=0.380 total time=  48.6s
[CV 1/3; 27/30] START colsample_bytree=0.5, gamma=2, learning_rate=0.25, max_depth=3, n_estimators=100, objective=reg:logistic, subsample=1


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 27/30] END colsample_bytree=0.5, gamma=2, learning_rate=0.25, max_depth=3, n_estimators=100, objective=reg:logistic, subsample=1;, score=0.417 total time=  48.5s
[CV 2/3; 27/30] START colsample_bytree=0.5, gamma=2, learning_rate=0.25, max_depth=3, n_estimators=100, objective=reg:logistic, subsample=1


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 27/30] END colsample_bytree=0.5, gamma=2, learning_rate=0.25, max_depth=3, n_estimators=100, objective=reg:logistic, subsample=1;, score=0.345 total time=  47.3s
[CV 3/3; 27/30] START colsample_bytree=0.5, gamma=2, learning_rate=0.25, max_depth=3, n_estimators=100, objective=reg:logistic, subsample=1


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 27/30] END colsample_bytree=0.5, gamma=2, learning_rate=0.25, max_depth=3, n_estimators=100, objective=reg:logistic, subsample=1;, score=0.435 total time=  47.0s
[CV 1/3; 28/30] START colsample_bytree=0.25, gamma=0.75, learning_rate=0.1, max_depth=3, n_estimators=100, objective=binary:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 28/30] END colsample_bytree=0.25, gamma=0.75, learning_rate=0.1, max_depth=3, n_estimators=100, objective=binary:logistic, subsample=0.75;, score=0.403 total time=  39.5s
[CV 2/3; 28/30] START colsample_bytree=0.25, gamma=0.75, learning_rate=0.1, max_depth=3, n_estimators=100, objective=binary:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 28/30] END colsample_bytree=0.25, gamma=0.75, learning_rate=0.1, max_depth=3, n_estimators=100, objective=binary:logistic, subsample=0.75;, score=0.340 total time=  39.5s
[CV 3/3; 28/30] START colsample_bytree=0.25, gamma=0.75, learning_rate=0.1, max_depth=3, n_estimators=100, objective=binary:logistic, subsample=0.75


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 28/30] END colsample_bytree=0.25, gamma=0.75, learning_rate=0.1, max_depth=3, n_estimators=100, objective=binary:logistic, subsample=0.75;, score=0.417 total time=  40.6s
[CV 1/3; 29/30] START colsample_bytree=0.5, gamma=2, learning_rate=0.25, max_depth=6, n_estimators=25, objective=binary:logistic, subsample=1


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 1/3; 29/30] END colsample_bytree=0.5, gamma=2, learning_rate=0.25, max_depth=6, n_estimators=25, objective=binary:logistic, subsample=1;, score=0.406 total time=  21.4s
[CV 2/3; 29/30] START colsample_bytree=0.5, gamma=2, learning_rate=0.25, max_depth=6, n_estimators=25, objective=binary:logistic, subsample=1


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 29/30] END colsample_bytree=0.5, gamma=2, learning_rate=0.25, max_depth=6, n_estimators=25, objective=binary:logistic, subsample=1;, score=0.346 total time=  21.5s
[CV 3/3; 29/30] START colsample_bytree=0.5, gamma=2, learning_rate=0.25, max_depth=6, n_estimators=25, objective=binary:logistic, subsample=1


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 3/3; 29/30] END colsample_bytree=0.5, gamma=2, learning_rate=0.25, max_depth=6, n_estimators=25, objective=binary:logistic, subsample=1;, score=0.438 total time=  21.3s
[CV 1/3; 30/30] START colsample_bytree=1, gamma=0, learning_rate=0.15, max_depth=6, n_estimators=150, objective=binary:logistic, subsample=1.25
[CV 1/3; 30/30] END colsample_bytree=1, gamma=0, learning_rate=0.15, max_depth=6, n_estimators=150, objective=binary:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 2/3; 30/30] START colsample_bytree=1, gamma=0, learning_rate=0.15, max_depth=6, n_estimators=150, objective=binary:logistic, subsample=1.25


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[CV 2/3; 30/30] END colsample_bytree=1, gamma=0, learning_rate=0.15, max_depth=6, n_estimators=150, objective=binary:logistic, subsample=1.25;, score=nan total time=   0.2s
[CV 3/3; 30/30] START colsample_bytree=1, gamma=0, learning_rate=0.15, max_depth=6, n_estimators=150, objective=binary:logistic, subsample=1.25
[CV 3/3; 30/30] END colsample_bytree=1, gamma=0, learning_rate=0.15, max_depth=6, n_estimators=150, objective=binary:logistic, subsample=1.25;, score=nan total time=   0.2s


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
33 fits failed out of a total of 90.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
33 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/xgboost/sklearn.py", line 732, in fit
    callbacks=callbacks)
  File "/usr/local/lib/python3.7/dist-packages/xgboost/training.py", line 216, in train
    xgb_model=xgb_model, callbacks=callbacks)
  File "/usr/local/lib/python3.7/dist-packages/xgboost/training.py", line 74, in _train_internal
    bst.updat

In [23]:
search.best_params_

{'subsample': 0.75,
 'objective': 'reg:logistic',
 'n_estimators': 100,
 'max_depth': 4,
 'learning_rate': 0.2,
 'gamma': 0,
 'colsample_bytree': 0.5}

In [24]:
modelo_tuneado = random_search.best_estimator_

proba_preds_valid = modelo_tuneado.predict_proba(x_validation_set)
top_k_accuracy_valid = top_k_accuracy_score(y_validation_set, proba_preds_valid, k = 2, labels = modelo_tuneado.classes_)
print("El score con el set de validación es", top_k_accuracy_valid)

El score con el set de validación es 0.5114865866310953
