In [18]:
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.base import BaseEstimator

In [None]:
class MyCobra(BaseEstimator):
    def __init__(self, random_state=None, epsilon=None, models= {}, frac = 0.5):
        self.random_state = random_state
        self.epsilon = epsilon
        self.estimators_ = models
        if len(models) ==0:
            raise "Please provide the models to be used"
        self.frac = frac
    
    def fit(self, X, y, default=True, X_k=None, X_l=None, y_k=None, y_l=None):
        self.X_ = X
        self.y_ = y
        self.X_k_ = X_k
        self.X_l_ = X_l
        self.y_k_ = y_k
        self.y_l_ = y_l
        
        #set up data
        self.prep_data()
        self.get_models_predictions()

        return self


    def set_epsilon(self,grid_points=50, X_eps, y_eps):
        self.X_ = X_eps
        self.y_ = y_eps
        self.prep_data()
        self.get_models_predictions()
        temp= sorted(self.all_preds_)
        res = [temp[i + 1] - temp[i] for i in range(len(temp)-1)]
        emin = np.min(res)
        emax = np.max(a) - np.min(a)
        erange = np.linspace(emin, emax, grid_points)
        tuned_parameters = [{'epsilon': erange}]
        clf = GridSearchCV(self, tuned_parameters, scoring="neg_mean_squared_error")
        clf.fit(X_eps, y_eps)
        self.epsilon = clf.best_params_["epsilon"]
        self.estimators_, self.machine_predictions_ = {}, {}


    def pred(self, X, alpha, info=False):

       
        select = {}
        
        for name,model in self.estimators_.items():
            val = model.predict(X)
            select[machine] = set()
            
            for count in range(0, len(self.X_l_)):
                    # if its in epsilon distance get idx
                    if np.abs(self.machine_predictions_[name][count] - val) <= self.epsilon:
                        select[machine].add(count)
                

        idxs = []
        for count in range(0, len(self.X_l_)):
            # add point if count >=alpha
            row_check = 0
            for machine in select:
                if count in select[machine]:
                    row_check += 1
            if row_check >= alpha:
                idxs.append(count)
        if len(idxs) == 0:
            return 0

        # take mean
        avg = 0
        for idx in idxs:
            avg += self.y_l_[idx]
        
        avg /= len(idxs)
        return avg


    def predict(self, X, alpha=None, info=False):


        if alpha is None:
            alpha = len(self.estimators_)
        return self.pred(X,alpha=alpha)



    def prep_data(self, k=None, l=None, shuffle_data=False):
        
        if shuffle_data:
            self.X_, self.y_ = shuffle(self.X_, self.y_, random_state=self.random_state)

        if k is None and l is None:
            k = int(len(self.X_)* frac)
            l = int(len(self.X_))

        if k is not None and l is None:
            l = len(self.X_) - k

        if l is not None and k is None:
            k = len(self.X_) - l

        self.X_k_ = self.X_[:k]
        self.X_l_ = self.X_[k:l]
        self.y_k_ = self.y_[:k]
        self.y_l_ = self.y_[k:l]

        return self


    def get_models_predictions(self):
        self.model_preds_ = {}
        self.all_preds_ = np.array([])

        for model in self.estimators_:
            self.model_preds_[model] = self.estimators_[model].predict(self.X_l_)
            self.all_preds_ = np.append(self.all_preds_, self.model_preds_[model])
        return self