In [None]:
import numpy as np
from collections import OrderedDict

from hyperopt import fmin, tpe
from hyperopt.pyll.stochastic import sample

# Database
from hyperopt import Trials

from hyperopt import STATUS_OK
from timeit import default_timer as timer

from itertools import product

In [None]:
%run utils.ipynb

In [None]:
class Hyperopt():
    def __init__(self, model, objective=None):
        self._model_type     = model.model_type
        self._hyperopt_scope = model.hyperopt_scope
        self._grid_scope     = model.grid_scope
        self._model          = model
        
        self.objective_fn    = objective
        self.hp_trials       = Trials()
        self.results         = []
        self.iteration       = 0
        self.history         = {}
    
    def run(self, key, data, target, n_cv=0, data_transformer=None, eval_set=None, algo=tpe.suggest, max_evals=10, random_seed=0, verbose=1):
        if self.objective_fn == None:
            objective_fn = self.get_objective_fn(key, data, target, n_cv, data_transformer, eval_set)
        else:
            objective_fn = self.objective_fn
        
        if algo != 'grid':
            params_space = self._hyperopt_scope
            best_result = fmin(
                fn=objective_fn, 
                space=params_space, 
                algo=algo, 
                max_evals=max_evals, 
                trials=self.hp_trials,
                rstate=np.random.RandomState(random_seed)
            )
            self.results = self.hp_trials.results
        elif algo == 'grid':
            params_space = self._grid_scope
            params_set = (dict(zip(params_space.keys(), values)) for values in product(*params_space.values()))
            for params in params_set:
                result = objective_fn(params)
                self.results.append(result)
        
        best_params = None
        best_loss = 0
        for result in self.results:
            if best_loss < result["loss"]:
                best_loss = result["loss"]
                best_params = result["params"]
        
        return best_loss, best_params
    
    def get_objective_fn(
        self, 
        key, 
        data, 
        target, 
        n_cv=0, 
        data_transformer=lambda id, x, y: (id, x, y), 
        eval_set=None
    ):
        if n_cv != 0:
            self.cv_folds = get_ts_cv_folds(key.shape[0], n_folds=n_cv, frac_min_tr=0.6, max_tr='full')
        
        if eval_set == None:
            self.eval_set = OrderedDict()
        else:
            self.eval_set = eval_set

        self.data_transformer = data_transformer
        
        def objective(params):
            # keep track of evals
            self.iteration += 1
            print("Hyperopt Iteration : " + str(self.iteration))
            
            params_key = "__".join([str(k)+'_'+str(v) for k, v in params.items()])
            if params_key in self.history:
                return self.history[params_key]
            
            target_metric = self._model.get_target_metric()
    
            start = timer()
    
            print(params)
        
            cvs = []
            models = []
            attributes = []
            results = []
    
            # perform n_folds cross validation
            if n_cv == 0:
                id, X, y = self.data_transformer(key, data, target, retrain=True)
                eval_set = {k: self.data_transformer(v[0], v[1], v[2], retrain=False) for k, v in self.eval_set.iteritems()}
                
                self._model.init_model(params)
                result = self._model.fit(X, y, eval_set)
                cvs.append(result.loc['CV'][target_metric])
                models.append(self._model.model)
                attributes.append(self._model.get_attributes(self._model.model))
                results.append(result)
                
                print(result)
            elif n_cv > 0:
                for fold, cv_fold in enumerate(self.cv_folds):
                    cv_start = timer()

                    TR_id = key[cv_fold[0]:cv_fold[1], :]
                    TR_X = data[cv_fold[0]:cv_fold[1], :]
                    TR_y = target[cv_fold[0]:cv_fold[1], :]
                    TR_id, TR_X, TR_y = self.data_transformer(TR_id, TR_X, TR_y, retrain=True)
                    
                    eval_set = {k: self.data_transformer(v[0], v[1], v[2], retrain=False) for k, v in self.eval_set.iteritems()}
                    eval_set['TR'] = (TR_id, TR_X, TR_y)
                    
                    CV_id = key[cv_fold[2]:cv_fold[3], :]
                    CV_X = data[cv_fold[2]:cv_fold[3], :]
                    CV_y = target[cv_fold[2]:cv_fold[3], :]
                    CV_id, CV_X, CV_y = self.data_transformer(CV_id, CV_X, CV_y, retrain=False)
                    eval_set['CV'] = (CV_id, CV_X, CV_y)
                    
                    ## print(distribution_diff(TR_X[:, 0], CV_X[:, 0], method='ks'))

                    self._model.init_model(params)
                    result = self._model.fit(TR_X, TR_y, eval_set)

                    cvs.append(result.loc['CV'][target_metric])
                    models.append(self._model.model)
                    attributes.append(self._model.get_attributes(self._model.model))
                    results.append(result)
                    print('CV_fold : ', fold, timer() - cv_start)
                    print(result)

            run_time = timer() - start

            cv_mean = np.mean(cvs)
            cv_std = np.std(cvs)

            ret = {
                'iteration': self.iteration,
                'loss': cv_mean,
                'loss_variance': cv_std,
                'eval_time': run_time,
                'status': STATUS_OK,
                
                'params': params,
                'attributes': attributes
            }
            self.history[params_key] = ret
            return ret
        
        return objective