In [121]:
from hyperopt import hp, tpe, fmin, Trials, STATUS_OK, space_eval
from sklearn.ensemble import RandomForestClassifier
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_validate
from sklearn.metrics import make_scorer
from hyperopt import hp, tpe, fmin, Trials, STATUS_OK, space_eval
import pickle
from sklearn.model_selection import KFold
import threading as th
import keyboard
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble.forest import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
import pickle
import os        
import json

In [194]:
class HyperOptimizer(object):
    def __init__(self, search_space,X,y,scoring,trial_file,trial_step=1, trial_initial_step=1,
     debug=False, n_split=3, score_multiplier=1, preprocess_steps = None):
        assert isinstance(search_space,dict), "searc_space need to be a dict"
        assert "models_spaces" in search_space, "model or models need to be associated with the key models_spaces"
        self.search_space = search_space
        self.keep_going = True
        self.scoring = scoring
        self.trial_step = trial_step# how many additional trials to do after loading saved trials. 1 = save after iteration
        self.trial_initial_step = trial_initial_step  # initial max_trials. put something small to not have to wait
        self.preprocess_steps = preprocess_steps
        self.score_multiplier = score_multiplier
        self.debug = debug
        #self.trial_folder = os.path.join(thierazik.config["PATH"],"hp_trials")
        self.trial_folder = "/home/thierno/Downloads/hp_trials"
        self.trial_file = trial_file
        self.trial_file_path = os.path.join(self.trial_folder,self.trial_file)
        self.X = X
        self.y=y
        self.cv_inner = KFold(
            n_splits=n_split, 
            shuffle=True, 
            #random_state=thierazik.config["SEED"], 
            random_state=54, 
        )
        self.best_params=None
    def get_acc_status(self,model, X_, y):
    
        # Proceed to the cross-validation
        # cv_result is a dict : test_score, train_score, fit_time, score_time, estimator
        cv_results = cross_validate(
            model,
            X_,
            y,
            cv=self.cv_inner,
            scoring=self.scoring,
            n_jobs=-1
        )
        
        return {
            'loss': self.score_multiplier * cv_results['test_score'].mean(),
            'loss_std': cv_results['test_score'].std(),
            'status': STATUS_OK,
        }

    def obj_fnc(self,params):   
        """
        The function that return the value to be minimzed by FMIN wrt hyperparams space
        """ 
        X_train_ = self.X
        # proceed to preprocessing
        if(self.preprocess_steps):
            X_train_ = self.preprocess_steps(params, self.X[:])
        
        # get all parameters, except the model
        parameters = params['models_spaces'].copy()
        del parameters['model']
        
        # instantiation of the classifier model with parameters
        model = params['models_spaces']['model'](**parameters)
        
        # return loss and status
        return(self.get_acc_status(model, X_train_, self.y))

    def run_trials(self):
        os.makedirs(self.trial_folder, exist_ok=True)
        
        try:  # try to load an already saved trials object, and increase the max
            # use data path for this project
            hypopt_trials = pickle.load(open(self.trial_file_path, "rb"))
            print("Found saved Trials! Loading...")
            max_evals = len(hypopt_trials.trials) + self.trial_step
            print("Rerunning from {} trials.".format(len(hypopt_trials.trials)))
            
        except:  # create a new trials object and start searching
            print("Unable to load previous trials...")
            hypopt_trials = Trials()
            max_evals = self.trial_initial_step

        # Optimization accross the search space
        self.best_params = fmin(
            self.obj_fnc,
            space=self.search_space,
            algo=tpe.suggest,
            max_evals=max_evals,
            trials=hypopt_trials
        )

        # save the trials object
        with open(self.trial_file_path, mode="wb") as f:
            pickle.dump(hypopt_trials, f)
            
        # get the best_params
        self.best_params = space_eval(self.search_space, self.best_params)
        
        # print the main results
        if(self.debug):
            print(
                "\n----------------------",
                "\nAlgo:", best_params['models_spaces']['model'],
                "\nLoss:", hypopt_trials.best_trial['result']['loss'],
                "\nPreprocessing:", best_params['preprocessing_steps'],
                "\nModel params:", best_params['models_spaces'],
            )
    def save_best_params(self):
        global o
        o=self.best_params
        with open(self.trial_file_path.split(".")[0]+"_best_params.txt", mode="w") as f:
            o = copy.deepcopy(self.best_params)
            o["models_spaces"]["model"] = str(o["models_spaces"]["model"]).split("'")[1]
            f.write(json.dumps(o))
        with open(self.trial_file_path.split(".")[0]+"_model_best_params.pickle", mode="wb") as f:
            o = copy.deepcopy(self.best_params)
            o = o["models_spaces"]
            o.pop('model', None)
            pickle.dump(o,f)
    def key_capture_thread(self):
        # Blocks until you press 'ESC'.
        keyboard.wait('esc')
        self.keep_going = False
        print('\nInterrupting… Please wait until shut down and the saving of the current trial state.')
    
    def optimize(self):
        self.keep_going = True
        """
        Call this method to run the trials and press ESC to stop the optimization
        """
        th.Thread(target=self.key_capture_thread, args=(), name='key_capture_thread', daemon=True).start()
        while self.keep_going:
            print("\nExecuting... Press 'ESC' key to interrupt.")
            self.run_trials()
            if(not self.keep_going):
                self.save_best_params()
            
        print('\nSuccessfully interrupted! The optimization can be restarted with the same state using the saved file')

In [195]:
X, y = datasets.make_classification(n_samples=1000, n_features=10,
                                    n_informative=5, n_redundant=2, random_state=0)

In [196]:
space = {}
models_spaces = {}

# Initializing the search space for preprocessing steps
space['preprocessing_steps'] = hp.choice(
    "preprocessing",
    [
    {'scale':       hp.choice('scale', [True, False])},
    {'normalize':       hp.choice('normalize', [True, False])},
    {'robust_scaler':       hp.choice('robust_scaler', [True, False])},
    ]
)

{
    'scale':       hp.choice('scale', [True, False]),
    'normalize':       hp.choice('normalize', [True, False]),
}

models_spaces['rf'] = { 
    'model':        RandomForestClassifier,
    'max_depth':    hp.choice('rf_max_depth', range(1,20)),
    'max_features': hp.choice('rf_max_features', range(1,3)),
    'n_estimators': hp.choice('rf_n_estimators', range(10,50)),
    'criterion':    hp.choice('rf_criterion', ["gini", "entropy"]),
}

### LOGISTIC REGRESSION
models_spaces['logit'] = { 
    'model':          LogisticRegression,
    'warm_start' :    hp.choice('logit_warm_start', [True, False]),
    'fit_intercept' : hp.choice('logit_fit_intercept', [True, False]),
    'tol' :           hp.uniform('logit_tol', 0.00001, 0.0001),
    'C' :             hp.uniform('logit_C', 0.05, 3),
    'solver' :        hp.choice('logit_solver', ['newton-cg', 'lbfgs', 'liblinear']),
    'max_iter' :      hp.choice('logit_max_iter', range(100,1000)),
    'multi_class' :   'auto',
    'class_weight' :  'balanced',
}
space['models_spaces'] = hp.choice(
        'models_spaces',
        [ models_spaces[key] for key in models_spaces ] # 
    )

In [161]:
def preprocess_steps(params, X_):
    from sklearn.preprocessing import Normalizer,StandardScaler, RobustScaler
    
    # print(params)
    
    if 'normalize' in params['preprocessing_steps']:
        if params['preprocessing_steps']['normalize'] == True:
            X_ = Normalizer().fit_transform(X_)
        
    if 'scale' in params['preprocessing_steps']:
        if params['preprocessing_steps']['scale'] == True:
            X_ = StandardScaler().fit_transform(X_)
    if 'robust_scaler' in params['preprocessing_steps']:
        if params['preprocessing_steps']['robust_scaler'] == True:
            X_ = RobustScaler().fit_transform(X_)

    return X_

In [197]:
optimizer = HyperOptimizer(search_space=space,X=X,y=y,scoring="accuracy",trial_file="xxxx.hyperopt",trial_step=1, trial_initial_step=1,
     debug=False, n_split=3, score_multiplier=-1, preprocess_steps = preprocess_steps)

In [198]:
optimizer.optimize()


Executing... Press 'ESC' key to interrupt.
Unable to load previous trials...
100%|██████████| 1/1 [00:00<00:00,  6.98it/s, best loss: -0.9370358382334429]

Executing... Press 'ESC' key to interrupt.
Found saved Trials! Loading...
Rerunning from 1 trials.
100%|██████████| 1/1 [00:00<00:00,  9.10it/s, best loss: -0.9370358382334429]

Executing... Press 'ESC' key to interrupt.
Found saved Trials! Loading...
Rerunning from 2 trials.
100%|██████████| 1/1 [00:00<00:00, 12.91it/s, best loss: -0.9370358382334429]

Executing... Press 'ESC' key to interrupt.
Found saved Trials! Loading...
Rerunning from 3 trials.
100%|██████████| 1/1 [00:00<00:00, 21.97it/s, best loss: -0.9370358382334429]

Executing... Press 'ESC' key to interrupt.
Found saved Trials! Loading...
Rerunning from 4 trials.

Interrupting… Please wait until shut down and the saving of the current trial state.
100%|██████████| 1/1 [00:00<00:00, 24.17it/s, best loss: -0.9370358382334429]

Successfully interrupted! The optimization ca