# Modelo: Boosting

---

## Resultados dashboard

- Prueba N° 2: 
    - Recall: 0.793
    - Acurracy: 0.761
<br>
<br>
- Prueba N° 3: 
    - Recall: 0.794
    - Acurracy: 0.758


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
import xgboost as xgb
from models.utils import transform_df, save_results

#### Se cargan los datos y se realizan las transformaciones

In [None]:
df_train = transform_df("datasets/hospitalizaciones_train.csv")
df_test = transform_df("datasets/hospitalizaciones_test.csv", test=True)

#### Se extraen los valores de train y test

In [None]:
x_train = df_train.drop(columns=["stay_labed"]).values
y_train = df_train["stay_labed"].values.reshape(-1, 1)
x_test = df_test.values

#### Se definen los parametros a optimizar

In [None]:
test_params = {
    "n_estimators": np.arange(100, 160, 10),
    "learning_rate": np.arange(0.5, 1, 0.1),
    "booster": ["gbtree", "gblinear", "dart"],
    "tree_method": ["exact", "approx", "hist", "gpu_hist"],
    "subsample": [0.9],
    "max_depth": np.arange(8, 20, 1),
    "grow_policy": ["depthwise"],
}

#### Se realiza la busqueda de los parametros optimos

In [None]:
grid_serch_boosting_classifier = GridSearchCV(
    estimator=xgb.XGBClassifier(),
    cv=5,
    n_jobs=-1,
    param_grid=test_params,
    scoring=["recall", "accuracy"],
    refit="recall",
    verbose=3,
)
grid_serch_boosting_classifier.fit(x_train, y_train)

#### Se asignan los mejores parametros

In [None]:
best_params = grid_serch_boosting_classifier.best_params_

#### Se instancia el modelo con los parametros encontrados y se realiza la predicción

In [None]:
boosting_classifier = xgb.XGBClassifier(**best_params)
y_pred = boosting_classifier.predict(x_test)

#### Se guardan los resultados obtenidos en la predicción

In [None]:
y_pred = pd.DataFrame(y_pred)
save_results(y_pred)