In [5]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Importación de los Datos

In [6]:
from utils import import_data, validation_train, full_train
from sklearn.ensemble import ExtraTreesClassifier, StackingClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from loguru import logger

RANDOM_STATE = 42
df_train, df_test, stopwords = import_data()

LABELS = [
    "Odio",
    "Mujeres",
    "Comunidad LGBTQ+",
    "Comunidades Migrantes",
    "Pueblos Originarios",
]


[32m2023-06-14 19:38:55.666[0m | [1mINFO    [0m | [36mutils.utilities[0m:[36mimport_data[0m:[36m12[0m - [1mDatos de Entrenamiento y Test cargados correctamente...[0m
[32m2023-06-14 19:38:55.667[0m | [1mINFO    [0m | [36mutils.utilities[0m:[36mimport_data[0m:[36m13[0m - [1mTrain: (2256, 9), Test: (2291, 9)[0m


## Definición de Modelos

In [7]:
et = ExtraTreesClassifier(n_estimators=500, n_jobs=-1, random_state=RANDOM_STATE)
cb = CatBoostClassifier(
    n_estimators=500, thread_count=-1, random_state=RANDOM_STATE, verbose=False
)
xgb = XGBClassifier(n_estimators=500, n_jobs=-1, random_state=RANDOM_STATE)
lr = LogisticRegression(random_state=RANDOM_STATE)
mlp = MLPClassifier(
    hidden_layer_sizes=(64, 32),
    activation="relu",
    solver="adam",
    random_state=RANDOM_STATE,
    alpha=0.1,
)

estimators = [("et", et), ("cb", cb), ("xgb", xgb), ("lr", lr), ("mlp", mlp)]

hate_stack = StackingClassifier(
    estimators=estimators, final_estimator=LogisticRegression(random_state=42), cv=3
)


## Esquema de Validación

In [8]:
dict_results = validation_train(
    df_train, hate_stack, LABELS, stopwords, random_state=RANDOM_STATE, verbose=True
)
print(f"Stacking Results:")
print(f"Mean Training Score: {dict_results['mean_train_score']}")
print(f"SD Training Score: {dict_results['sd_train_score']}")
print(f"Mean Validation Score: {dict_results['mean_val_score']}")
print(f"SD Validation Score: {dict_results['sd_val_score']}")
print(f"Mean Precision Train Score: {dict_results['mean_precision_train_score']}")
print(f"SD Precision Train Score: {dict_results['sd_precision_train_score']}")
print(f"Mean Precision Validation Score: {dict_results['mean_precision_val_score']}")
print(f"SD Precision Validation Score: {dict_results['sd_precision_val_score']}")
print(f"Mean Recall Train Score: {dict_results['mean_recall_train_score']}")
print(f"SD Recall Train Score: {dict_results['sd_recall_train_score']}")
print(f"Mean Recall Validation Score: {dict_results['mean_recall_val_score']}")
print(f"SD Recall Validation Score: {dict_results['sd_recall_val_score']}")


Train Score fold 1: 0.9992974418134202
Validation Score fold 1: 0.7690925562468738
--------------------------------------------
Train Score fold 2: 0.9986517262769286
Validation Score fold 2: 0.8049551199392759
--------------------------------------------
Train Score fold 3: 0.9994154555206065
Validation Score fold 3: 0.806837224787222
--------------------------------------------
Train Score fold 4: 0.9997538158542589
Validation Score fold 4: 0.796186917671002
--------------------------------------------
Train Score fold 5: 1.0
Validation Score fold 5: 0.8177478834597189
--------------------------------------------
Stacking Results:
Mean Training Score: 0.9994236878930428
SD Training Score: 0.0004589228032538586
Mean Validation Score: 0.7989639404208184
SD Validation Score: 0.016436123646685594
Mean Precision Train Score: 0.9994346702453265
SD Precision Train Score: 0.0006287698287160303
Mean Precision Validation Score: 0.8790885981248543
SD Precision Validation Score: 0.00622393580622

## Full Train

In [9]:
dict_results = full_train(df_train, df_test, hate_stack, LABELS, stopwords)
print(f"Stacking Results:")
print(f"Test Score: {dict_results['test_score']}")
print(f"Test Precision: {dict_results['test_precision']}")
print(f"Test Recall: {dict_results['test_recall']}")


Stacking Results:
Test Score: 0.8175373271622297
Test Precision: 0.7838088936814132
Test Recall: 0.8610952188880378
