# Optimizacion de hiperparametros

In [71]:

import sys

sys.path.append("../")

import pandas as pd
import numpy as np
from hyperopt import fmin, hp, tpe, space_eval, STATUS_OK, Trials
from sklearn.metrics import roc_auc_score
from typing import Dict, Callable
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from libs import configs
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer


In [69]:
data

Unnamed: 0,index,label,title,filename,title_length,news
0,n0,F,Hacer gargaras con agua y sal elimina el coron...,../data/data_fake_news/fake/F - Hacer gargaras...,52,Beber mucha agua y hacer gárgaras con agua cal...
1,n1,F,Helicópteros no rociarán desinfectante contra ...,../data/data_fake_news/fake/F - Helicópteros n...,68,Helicópteros no rociarán desinfectante contra ...
2,n2,F,Nostradamus predijo el COVID19 y lo describió ...,../data/data_fake_news/fake/F - Nostradamus pr...,59,Nostradamus predijo el COVID19 y lo describió ...
3,n3,F,Nostradamus predijo el COVID19 y lo describió ...,../data/data_fake_news/fake/F - Nostradamus pr...,60,Nostradamus predijo el COVID19 y lo describió ...
4,n4,F,Sostener la respiración por 10 segundos no ayu...,../data/data_fake_news/fake/F - Sostener la re...,85,Sostener la respiración por 10 segundos no ayu...
...,...,...,...,...,...,...
528,n528,V,"_Unidos para seguir cuidándote_, la nueva estr...",../data/data_fake_news/true/V-_Unidos para seg...,68,"'Unidos para seguir cuidándote', la nueva estr..."
529,n529,V,_Vacuna contra covid en EE. UU. no estará disp...,../data/data_fake_news/true/V-_Vacuna contra c...,68,'Vacuna contra covid en EE. UU. no estará disp...
530,n530,V,"‘Datos del covid tienen rezagos, no falseamien...",../data/data_fake_news/true/V-‘Datos del covid...,69,"‘Datos del covid tienen rezagos, no falseamien..."
531,n531,V,‘Este virus muestra cuán vulnerables somos’_Pe...,../data/data_fake_news/true/V-‘Este virus mues...,60,‘Este virus muestra cuán vulnerables somos’: P...


In [43]:

def optimize_auc(search_space: Dict, evals: int) -> Callable:
    """decorador para Optimizar hiperametros de cualquier modelo de ML
       
    """
    def _objective_wrapper(objective: Callable) -> Callable:
        def wrapper(*args, **kwargs) -> Callable:
            trials = Trials()
            return fmin(
                fn = objective,
                space = search_space,
                algo = tpe.suggest,
                max_evals = evals,
                trials = trials
            )
        return wrapper
    return _objective_wrapper

In [72]:
data = pd.read_csv(configs.PREPROCESSED_DATA, sep="\t")
data["fake"] = np.where(data["label"] == "F", 1, 0)
X_train, X_test, y_train, y_test = train_test_split(data['news'], data['fake'], random_state = 0)

count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(X_train)
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
test_data_transformed = tfidf_transformer.transform(count_vect.transform(X_test)).toarray()

In [86]:
SPACE = {
    "n_estimators": hp.quniform("n_estimators", 50, 100, 10)
}

def optimize_models(space: Dict) -> Dict:

    @optimize_auc(search_space=space, evals=10)
    def train_predict_xgboost(search_space: Dict) -> Dict:

        model = XGBClassifier(n_estimators=int(search_space["n_estimators"]))
        model.fit(X_train_tfidf, y_train)
        y_score = model.predict_proba(test_data_transformed)[:,1]
        ROC_SCORE = roc_auc_score(y_test, y_score)
        print(f"ROC-AUC Score: {ROC_SCORE}")
        return {
            "loss": -ROC_SCORE, 
            "status": STATUS_OK
        }
    
    return train_predict_xgboost(SPACE)
    

In [87]:
optimize_model(space=SPACE)

ROC-AUC Score: 0.8587053571428571                                                                                                                                                                                  
ROC-AUC Score: 0.8587053571428571                                                                                                                                                                                  
ROC-AUC Score: 0.8752232142857144                                                                                                                                                                                  
ROC-AUC Score: 0.8752232142857144                                                                                                                                                                                  
ROC-AUC Score: 0.8587053571428571                                                                                                                       

{'n_estimators': 60.0}

In [59]:
hp.quniform("test", 1, 10,2)

<hyperopt.pyll.base.Apply at 0x7fea24bd16f0>

In [84]:
best_

{'n_estimators': 60.0}