# CatBoostClasiffier

En este notebook, se modela el entrenamiento y testing de un modelo `CatBoostClasiffier` sobre el dataset obtenido del preprocesado, que incluye la obtención de dummies. Gracias al buen desempeño de CatBoost con variables categóricas, numéricas (binarias) y booleanas, se analizará el desempeño de una versión simple del modelo. Además, se analizará la importancia de las categorías utilizadas por si resultara relevante para el entrenamiento de otros modelos y la posibilidad de añadir variables categóricas que representen el efecto agrupado de varias variables. 

In [1]:
import pandas as pd
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import datetime
import pandas as pd
from catboost import Pool

In [2]:
# Cargar los datos
df = pd.read_csv("../../processed/data_simp_preprocess_v1.csv")

Extraemos las etiquetas, descartamos las variables que carecen de valor predictivo (`id`) y las que están representadas como dummies. 

In [3]:
drop_cols = ["id","statement","subject","label","speaker","party_affiliation","party_affiliation_category_map"]

In [4]:
df_label = df["label"].astype(int)
df = df.drop(columns=drop_cols)
df.head(5)

Unnamed: 0,party_affiliation_uni,economy,health-care,taxes,federal-budget,education,jobs,state-budget,candidates-biography,elections,...,state_info_vermont,state_info_virginia,state_info_virginia.1,state_info_washington,"state_info_washington, d.c.","state_info_washington, d.c..1",state_info_west virginia,state_info_wisconsin,state_info_wisconsin.1,state_info_wyoming
0,republican,0,0,0,0,0,0,0,0,0,...,False,False,False,False,False,False,False,False,False,False
1,democrat,0,1,0,0,0,0,0,0,0,...,False,False,False,False,False,False,False,False,False,False
2,democrat,0,0,1,0,0,0,0,0,0,...,False,False,False,False,True,False,False,False,False,False
3,none,0,0,0,0,0,0,0,1,0,...,False,False,False,False,False,False,False,False,False,False
4,republican,0,1,0,0,0,0,1,0,0,...,False,False,False,False,False,False,False,True,False,False


In [6]:
import pandas as pd


# Cargar el DataFrame
df_clean = df.copy()

# Identificar columnas con espacio al final
columns_with_space = [col for col in df_clean.columns if col.endswith(" ")]

# Fusionar lógicamente con su versión sin espacio (tipo OR)
for col in columns_with_space:
    base_col = col.rstrip()
    if base_col in df_clean.columns:
        # Asumimos valores 0/1 o booleanos; hacemos un OR lógico
        df_clean[base_col] = df_clean[base_col].astype(int) | df_clean[col].astype(int)
        df_clean.drop(columns=[col], inplace=True)  # Eliminar la columna con espacio

df_clean

Unnamed: 0,party_affiliation_uni,economy,health-care,taxes,federal-budget,education,jobs,state-budget,candidates-biography,elections,...,state_info_united states,state_info_unknown,state_info_utah,state_info_vermont,state_info_virginia,state_info_washington,"state_info_washington, d.c.",state_info_west virginia,state_info_wisconsin,state_info_wyoming
0,republican,0,0,0,0,0,0,0,0,0,...,False,False,False,False,0,False,0,False,0,False
1,democrat,0,1,0,0,0,0,0,0,0,...,False,False,False,False,0,False,0,False,0,False
2,democrat,0,0,1,0,0,0,0,0,0,...,False,False,False,False,0,False,1,False,0,False
3,none,0,0,0,0,0,0,0,1,0,...,False,False,False,False,0,False,0,False,0,False
4,republican,0,1,0,0,0,0,1,0,0,...,False,False,False,False,0,False,0,False,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8945,democrat,0,0,0,0,0,0,0,0,0,...,False,False,False,False,0,False,0,False,0,False
8946,republican,0,0,0,0,0,0,0,0,0,...,False,False,False,False,0,False,0,False,0,False
8947,republican,0,1,0,0,0,0,0,0,0,...,False,False,False,False,0,False,0,False,0,False
8948,republican,0,0,0,0,0,0,0,0,1,...,False,False,False,False,0,False,0,False,0,False


In [9]:

# Sumar los valores 1 en ambas columnas
sum_base = df["state_info_california"].sum()
sum_with_space = df["state_info_california "].sum()

# Imprimir resultados
print("Suma en 'state_info_colorado':", sum_base)
print("Suma en 'state_info_colorado ':", sum_with_space)
print("Suma resultado: ",df_clean["state_info_california"].sum())

Suma en 'state_info_colorado': 121
Suma en 'state_info_colorado ': 1
Suma resultado:  122


## Modelo Catboost Simple

Inicialmente, entrenamos el modelo con todas las columnas del dataset. 

In [10]:
# Definir la columna objetivo
X = df.copy()
y = df_label

# Detectar columnas categóricas
cat_features = X.select_dtypes(include=['object']).columns.tolist()
cat_features

['party_affiliation_uni']

In [11]:
# Dividir en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Inicializar y entrenar el modelo
model = CatBoostClassifier(
    iterations=500,
    learning_rate=0.03,
    depth=8,
    cat_features=cat_features,
    auto_class_weights='Balanced',
    verbose=100,
    random_state=42
)
model.fit(X_train, y_train)

0:	learn: 0.6921110	total: 72.2ms	remaining: 36s
100:	learn: 0.6544493	total: 1.88s	remaining: 7.42s
200:	learn: 0.6368934	total: 3.42s	remaining: 5.08s
300:	learn: 0.6218705	total: 4.86s	remaining: 3.21s
400:	learn: 0.5995653	total: 6.25s	remaining: 1.54s
499:	learn: 0.5796372	total: 7.61s	remaining: 0us


<catboost.core.CatBoostClassifier at 0x12c528fd0>

In [12]:
# Evaluar el modelo
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.587709497206704
Classification Report:
               precision    recall  f1-score   support

           0       0.44      0.60      0.51       631
           1       0.73      0.58      0.65      1159

    accuracy                           0.59      1790
   macro avg       0.58      0.59      0.58      1790
weighted avg       0.62      0.59      0.60      1790



- **Modelo CatBoost Simple** con `auto_class_weights`: `macro-avg=0,59`, `precision[0]=45` y `precision[1]=0,73`. *Sol*: Reducir número de variables para ver si el modelo mantiene la generalización (equivalente a eliminar cardinalidad y ruido)
- **Modelo CatBoost Simple** sin `party_affiliation`: `macro-avg`baja un 1%, bajando en la misma medida la precisión de la clase minoritaria. 
- **Modelo CatBoost Simple** sin `party_affiliation` ni `party_affiliation_uni`: `macro-avg`baja un 1%, bajando en la misma medida la precisión de la clase mayoritaria.

En todos los casos, el modelo presenta un peor desempeño en la detección de la clase minoritaria.

## Ajuste de Hiperparámetros con RandomizedSearch

Buscamos los mejores hiperparámetros para el modelo simplificado inicial

In [13]:
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.model_selection import StratifiedKFold
import numpy as np


# Modelo base
base_model = CatBoostClassifier(
    auto_class_weights='Balanced',
    verbose=100,
    random_state=42,
    early_stopping_rounds = 30
)

# Hiperparámetros a explorar
param_dist = {
    'depth': [4, 6],
    'learning_rate': np.linspace(0.01, 0.05, 5),
    'iterations': [500, 800, 1000],
    'l2_leaf_reg': [5, 10, 20],
    'min_data_in_leaf': [10, 20, 30],
    'bagging_temperature': [0.5, 1, 1.5],
    'random_strength': [0.5, 1, 1.5]
}


stratified_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Búsqueda aleatoria
search = RandomizedSearchCV(
    estimator=base_model,
    param_distributions=param_dist,
    n_iter=20,
    scoring='f1_macro',
    cv=stratified_cv,
    verbose=3,
    n_jobs=-1
)


In [14]:
# Iniciamos la busqueda
search.fit(X_train, y_train, cat_features=cat_features)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
0:	learn: 0.6924137	total: 7.19ms	remaining: 5.75s
100:	learn: 0.6725352	total: 700ms	remaining: 4.85s
200:	learn: 0.6654832	total: 1.27s	remaining: 3.78s
300:	learn: 0.6596149	total: 1.78s	remaining: 2.95s
400:	learn: 0.6522298	total: 2.33s	remaining: 2.31s
500:	learn: 0.6461697	total: 2.88s	remaining: 1.72s
600:	learn: 0.6402853	total: 3.41s	remaining: 1.13s
700:	learn: 0.6354575	total: 4.17s	remaining: 590ms
799:	learn: 0.6309718	total: 4.75s	remaining: 0us


In [15]:
# Evaluación final en test
best_model = search.best_estimator_
y_pred = best_model.predict(X_test)
print("Best Params:", search.best_params_)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Best Params: {'random_strength': 1, 'min_data_in_leaf': 30, 'learning_rate': np.float64(0.03), 'l2_leaf_reg': 20, 'iterations': 800, 'depth': 4, 'bagging_temperature': 0.5}
Accuracy: 0.5888268156424581
Classification Report:
               precision    recall  f1-score   support

           0       0.44      0.61      0.51       631
           1       0.73      0.58      0.65      1159

    accuracy                           0.59      1790
   macro avg       0.58      0.59      0.58      1790
weighted avg       0.63      0.59      0.60      1790



#### Subida a Kaggle 

In [16]:
cat_features

['party_affiliation_uni']

In [17]:
# Cargar los datos de test
df_test = pd.read_csv("../../data/processed/test_simp_preprocess_v1.csv")

# Guardar id
test_ids = df_test["id"]

In [18]:
# Eliminar las mismas columnas que en df
drop_cols_test =  drop_cols.copy()
print(drop_cols_test)

drop_cols_test.remove("label")
drop_cols_test.remove("party_affiliation")
print(drop_cols_test)

df_test = df_test.drop(columns=drop_cols_test)
df_test.head(2)

['id', 'statement', 'subject', 'label', 'speaker', 'party_affiliation', 'party_affiliation_category_map']
['id', 'statement', 'subject', 'speaker', 'party_affiliation_category_map']


Unnamed: 0,party_affiliation_uni,economy,health-care,taxes,federal-budget,education,jobs,state-budget,candidates-biography,elections,...,party_affiliation_columnist,party_affiliation_democrat,party_affiliation_independent,party_affiliation_journalist,party_affiliation_libertarian,party_affiliation_newsmaker,party_affiliation_none,party_affiliation_organization,party_affiliation_other,party_affiliation_republican
0,democrat,0,0,0,0,0,0,0,0,0,...,False,True,False,False,False,False,False,False,False,False
1,republican,0,0,0,0,0,0,0,0,1,...,False,False,False,False,False,False,False,False,False,True


In [19]:
# Ajuste de columnas (por dummies)
missing_cols = set(df.columns) - set(df_test.columns)
for col in missing_cols:
    df_test[col] = 0  # Rellenar columnas faltantes con 0

# Avisar si faltan columnas por transparencia
if missing_cols:
    print(f"Columnas faltantes añadidas en test: {len(missing_cols)}")

Columnas faltantes añadidas en test: 69


In [20]:
cat_features_test = df_test.select_dtypes(include=['object']).columns.tolist()
cat_features_test
df_test = df_test.drop(columns=['speaker_job_grouped'])

In [21]:
# Crear el Pool
test_pool = Pool(df_test, cat_features=cat_features)
y_pred_test = best_model.predict(test_pool)

# Guardar predicciones
current_date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
output = pd.DataFrame({
    "id": test_ids,
    "label": y_pred_test.astype(int)
})
filename = f"../3_summision/CatBoost_Simple{current_date}.csv"
output.to_csv(filename, index=False)
print(f"Predicciones guardadas en {filename}")

Predicciones guardadas en ../3_summision/CatBoost_Simple2025-05-18_06-53-30.csv


#### Resultados:
1. Dataset con todas las categorías procesado con CatBoost Simple + Balanceo de clases + Búsqueda de Hiperparámetros: **0.55996**
2. Con Randomized más robusto y StratifiedFolds sube a **0.56244**. Aumenta la detección de la clase minoritaria y la diferencia entre el resultado de Kaggle y el notebook baja considerablemente, indicando que el modelo está controlando bien el overfitting. 

## Ajuste de umbral

In [22]:
import numpy as np
from sklearn.metrics import f1_score, accuracy_score, classification_report

# Obtener probabilidades de clase 1
y_probs = best_model.predict_proba(X_test)[:, 1]

# Probar distintos umbrales
thresholds = np.arange(0.1, 0.91, 0.01)
f1_scores = []

for t in thresholds:
    y_pred_thresh = (y_probs >= t).astype(int)
    score = f1_score(y_test, y_pred_thresh, average='macro')
    f1_scores.append(score)

# Mejor umbral
best_threshold = thresholds[np.argmax(f1_scores)]
best_f1 = max(f1_scores)

# Evaluar con el mejor umbral
y_pred_opt = (y_probs >= best_threshold).astype(int)

print(f"Mejor umbral: {best_threshold:.2f} con F1 macro: {best_f1:.4f}")
print("Best Params:", search.best_params_)
print("Accuracy:", accuracy_score(y_test, y_pred_opt))
print("Classification Report:\n", classification_report(y_test, y_pred_opt))


Mejor umbral: 0.44 con F1 macro: 0.5929
Best Params: {'random_strength': 1, 'min_data_in_leaf': 30, 'learning_rate': np.float64(0.03), 'l2_leaf_reg': 20, 'iterations': 800, 'depth': 4, 'bagging_temperature': 0.5}
Accuracy: 0.6441340782122905
Classification Report:
               precision    recall  f1-score   support

           0       0.49      0.41      0.45       631
           1       0.71      0.77      0.74      1159

    accuracy                           0.64      1790
   macro avg       0.60      0.59      0.59      1790
weighted avg       0.63      0.64      0.64      1790



#### Subida a Kaggle

In [23]:
# Cargar los datos de test
df_test = pd.read_csv("../../data/processed/test_simp_preprocess_v1.csv")

# Guardar id
test_ids = df_test["id"]

In [24]:
# Crear el Pool
y_probs_test = best_model.predict_proba(test_pool)[:, 1]
# Evaluar con el mejor umbral
y_pred_test = (y_probs_test >= best_threshold).astype(int)

# Guardar predicciones
current_date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
output = pd.DataFrame({
    "id": test_ids,
    "label": y_pred_test.astype(int)
})
filename = f"../3_summision/CatBoost_Simple_Threshold_{current_date}.csv"
output.to_csv(filename, index=False)
print(f"Predicciones guardadas en {filename}")

Predicciones guardadas en ../3_summision/CatBoost_Simple_Threshold_2025-05-18_06-53-30.csv


#### Resultados:
1. Dataset con todas las categorías procesado con CatBoost Simple + Balanceo de clases + Búsqueda de Hiperparámetros + Umbral: **0.56970**. Subida de un 1%.

## Importancia de variables

In [25]:
# Obtener importancia de características
feature_importances = best_model.get_feature_importance(prettified=True)
feature_importances.columns = ['feature', 'importance']
feature_importances = feature_importances.sort_values(by='importance', ascending=False)
feature_importances

Unnamed: 0,feature,importance
0,party_affiliation_uni,20.620395
1,health-care,6.002707
2,state_info_non-define,2.830360
3,economy,2.747409
4,jobs,2.718907
...,...,...
198,state_info_utah,0.000000
199,state_info_virginia,0.000000
200,state_info_washington,0.000000
201,state_info_west virginia,0.000000


Vemos que `party_affiliation_uni` contiene información muy relevante para el modelo, quizás tomando demasiado protagonismo. Se considera, para iteraciones futuras, quitar esta variable y ver si 'party_affiliation_category_map` toma importancia suficiente, sin perder el efecto de la afiliación política en el modelo. 

In [26]:
# Filtramos las más relevantes
top_n = 100
top_features = feature_importances['feature'].head(top_n).tolist()

X_train_top = X_train[top_features]
X_test_top = X_test[top_features]
cat_features_top = X_train_top.select_dtypes(include=['object']).columns.tolist()
cat_features_top

['party_affiliation_uni']

In [27]:
# Buscamos el modelo con mejores hiperparámetros
search_top = RandomizedSearchCV(
    estimator=base_model,
    param_distributions=param_dist,
    n_iter=20,
    scoring='f1_macro',
    cv=3,
    verbose=3,
    n_jobs=-1
)
search_top.fit(X_train_top, y_train, cat_features=cat_features_top)
best_model_top = search_top.best_estimator_

params_top = best_model_top.get_params()

# Evaluar el modelo
y_pred_top = best_model_top.predict(X_test_top)
print("Accuracy:", accuracy_score(y_test, y_pred_top))
print("Classification Report:\n", classification_report(y_test, y_pred_top))

Fitting 3 folds for each of 20 candidates, totalling 60 fits
0:	learn: 0.6928244	total: 69.3ms	remaining: 55.4s
100:	learn: 0.6773621	total: 1.42s	remaining: 9.8s
200:	learn: 0.6705077	total: 2.79s	remaining: 8.33s
300:	learn: 0.6663054	total: 4.34s	remaining: 7.19s
400:	learn: 0.6630064	total: 6.93s	remaining: 6.9s
500:	learn: 0.6601724	total: 8.28s	remaining: 4.94s
600:	learn: 0.6576788	total: 9.65s	remaining: 3.2s
700:	learn: 0.6552565	total: 10.8s	remaining: 1.53s
799:	learn: 0.6531166	total: 12.1s	remaining: 0us
[CV 3/5] END bagging_temperature=1, depth=4, iterations=800, l2_leaf_reg=20, learning_rate=0.01, min_data_in_leaf=10, random_strength=0.5;, score=0.570 total time=  12.5s
0:	learn: 0.6920676	total: 14.9ms	remaining: 7.44s
100:	learn: 0.6616491	total: 1.43s	remaining: 5.63s
200:	learn: 0.6485942	total: 4.02s	remaining: 5.98s
300:	learn: 0.6321153	total: 6.02s	remaining: 3.98s
400:	learn: 0.6190999	total: 8.43s	remaining: 2.08s
499:	learn: 0.6082233	total: 9.95s	remaining: 0

El resultado se mantiene, pero aumenta la detección de la clase 0. 
#### Subida a Kaggle

In [28]:
# Cargar los datos de test
df_test = pd.read_csv("../../data/processed/test_simp_preprocess_v1.csv")

# Guardar id
test_ids = df_test["id"]


In [29]:
# Eliminar las mismas columnas que en df
drop_cols_test =  drop_cols.copy()
print(drop_cols_test)

drop_cols_test.remove("label")
drop_cols_test.remove("party_affiliation")
print(drop_cols_test)

df_test = df_test.drop(columns=drop_cols_test)
df_test.head(2)

['id', 'statement', 'subject', 'label', 'speaker', 'party_affiliation', 'party_affiliation_category_map']
['id', 'statement', 'subject', 'speaker', 'party_affiliation_category_map']


Unnamed: 0,party_affiliation_uni,economy,health-care,taxes,federal-budget,education,jobs,state-budget,candidates-biography,elections,...,party_affiliation_columnist,party_affiliation_democrat,party_affiliation_independent,party_affiliation_journalist,party_affiliation_libertarian,party_affiliation_newsmaker,party_affiliation_none,party_affiliation_organization,party_affiliation_other,party_affiliation_republican
0,democrat,0,0,0,0,0,0,0,0,0,...,False,True,False,False,False,False,False,False,False,False
1,republican,0,0,0,0,0,0,0,0,1,...,False,False,False,False,False,False,False,False,False,True


In [30]:
# Ajuste de columnas (por dummies)
missing_cols = set(df.columns) - set(df_test.columns)
for col in missing_cols:
    df_test[col] = 0  # Rellenar columnas faltantes con 0

# Avisar si faltan columnas por transparencia
if missing_cols:
    print(f"Columnas faltantes añadidas en test: {len(missing_cols)}")

Columnas faltantes añadidas en test: 69


In [31]:
cat_features_test = df_test.select_dtypes(include=['object']).columns.tolist()
cat_features_test
df_test = df_test.drop(columns=['speaker_job_grouped'])

In [32]:
# Crear el Pool
test_pool = Pool(df_test, cat_features=cat_features)
y_pred_top_test = best_model_top.predict(test_pool)

# Guardar predicciones
current_date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
output = pd.DataFrame({
    "id": test_ids,
    "label": y_pred_top_test.astype(int)
})
filename = f"../3_summision/CatBoost_Simple_Importancia{current_date}.csv"
output.to_csv(filename, index=False)
print(f"Predicciones guardadas en {filename}")

Predicciones guardadas en ../3_summision/CatBoost_Simple_Importancia2025-05-18_06-56-27.csv


#### Resultados:
1. Dataset con TOP100 categorías procesado con CatBoost Simple + Balanceo de clases + Búsqueda de Hiperparámetros: **0.56539**. Sube aproximadamente un 1% más que con todas las categorías: no solo generaliza mejor sino que además categorías por debajo del 100 introducen ruido.


### Ajuste de umbral

In [33]:
# Obtener probabilidades de clase 1
y_probs = model_top.predict_proba(X_test_top)[:, 1]

# Probar distintos umbrales
thresholds = np.arange(0.1, 0.91, 0.01)
f1_scores = []

for t in thresholds:
    y_pred_thresh = (y_probs >= t).astype(int)
    score = f1_score(y_test, y_pred_thresh, average='macro')
    f1_scores.append(score)

# Mejor umbral
best_threshold = thresholds[np.argmax(f1_scores)]
best_f1 = max(f1_scores)

# Evaluar con el mejor umbral
y_pred_opt = (y_probs >= best_threshold).astype(int)

print(f"Mejor umbral: {best_threshold:.2f} con F1 macro: {best_f1:.4f}")
print("Best Params:", search.best_params_)
print("Accuracy:", accuracy_score(y_test, y_pred_opt))
print("Classification Report:\n", classification_report(y_test, y_pred_opt))



NameError: name 'model_top' is not defined

In [None]:
#### Subir a Kaggle

In [None]:
# Cargar los datos de test
df_test = pd.read_csv("../../data/processed/test_simp_preprocess_v1.csv")

# Guardar id
test_ids = df_test["id"]

In [None]:
# Eliminar las mismas columnas que en df
drop_cols_test =  drop_cols.copy()
print(drop_cols_test)

drop_cols_test.remove("label")
drop_cols_test.remove("party_affiliation")
print(drop_cols_test)

df_test = df_test.drop(columns=drop_cols_test)
df_test.head(2)

In [None]:
# Ajuste de columnas (por dummies)
missing_cols = set(df.columns) - set(df_test.columns)
for col in missing_cols:
    df_test[col] = 0  # Rellenar columnas faltantes con 0

# Avisar si faltan columnas por transparencia
if missing_cols:
    print(f"Columnas faltantes añadidas en test: {len(missing_cols)}")

In [None]:
cat_features_test = df_test.select_dtypes(include=['object']).columns.tolist()
cat_features_test
df_test = df_test.drop(columns=['speaker_job_grouped'])

In [None]:
# Crear el Pool
test_pool = Pool(df_test, cat_features=cat_features)
y_probs = model_top.predict_proba(test_pool)[:, 1]

# Evaluar con el mejor umbral
y_pred_test = (y_probs >= best_threshold).astype(int)

# Guardar predicciones
current_date = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
output = pd.DataFrame({
    "id": test_ids,
    "label": y_pred_test.astype(int)
})
filename = f"../3_summision/CatBoost_Simple_top_Threshold{current_date}.csv"
output.to_csv(filename, index=False)
print(f"Predicciones guardadas en {filename}")

100:	learn: 0.6690486	total: 1.4s	remaining: 12.4s
200:	learn: 0.6575937	total: 3.36s	remaining: 13.4s
300:	learn: 0.6498900	total: 6.04s	remaining: 14s
400:	learn: 0.6420668	total: 7.53s	remaining: 11.2s
500:	learn: 0.6316050	total: 8.96s	remaining: 8.92s
600:	learn: 0.6183887	total: 10.3s	remaining: 6.86s
700:	learn: 0.6070118	total: 12.3s	remaining: 5.24s
800:	learn: 0.5971294	total: 14.2s	remaining: 3.53s
900:	learn: 0.5883428	total: 15.9s	remaining: 1.74s
999:	learn: 0.5805241	total: 17.4s	remaining: 0us
[CV 2/3] END bagging_temperature=1, depth=6, iterations=1000, l2_leaf_reg=5, learning_rate=0.02, min_data_in_leaf=10, random_strength=1.5;, score=0.586 total time=  17.7s
0:	learn: 0.6921768	total: 14.3ms	remaining: 7.14s
100:	learn: 0.6607534	total: 1.23s	remaining: 4.87s
200:	learn: 0.6469041	total: 2.46s	remaining: 3.66s
300:	learn: 0.6336347	total: 3.73s	remaining: 2.47s
400:	learn: 0.6168583	total: 4.93s	remaining: 1.22s
499:	learn: 0.6036150	total: 6.1s	remaining: 0us
[CV 3/