In [30]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, mean_absolute_error
from sklearn.base import BaseEstimator
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, mean_absolute_error



class ModelOptimization(BaseEstimator):
    def __init__(self, models, param_grids, scoring, cv=5, top_n=3, n_inter=10, search_method = 'grid'):
        self.models = models
        self.param_grids = param_grids
        self.scoring = scoring
        self.cv = cv
        self.top_n = top_n
        self.best_models = []
        self.search_method = search_method
        self.n_iter = n_inter

    def fit(self, X, y):
        all_scores = {}
        for i, model in enumerate(self.models):
            print(f"Fitting model {i+1}/{len(self.models)}: {type(model).__name__}")
            model.fit(X, y)
            y_pred = model.predict(X)

            if self.scoring == 'accuracy':
                score = accuracy_score(y, y_pred)
            elif self.scoring == 'roc_auc':
                if len(set(y)) > 2:  # Multi-class classification
                    score = roc_auc_score(y, model.predict_proba(X), multi_class='ovr', average='macro')
                else:  # Binary classification
                    score = roc_auc_score(y, y_pred)
            elif self.scoring == 'precision':
                score = precision_score(y, y_pred, average='macro')
            elif self.scoring == 'mae':
                score = -mean_absolute_error(y, y_pred)  # Minimize the MAE, so we add a negative sign
            else:
                raise ValueError(f"Invalid scoring metric: {self.scoring}")

            all_scores[type(model).__name__] = score

        # Select the "top N" models based on the scores
        top_models = sorted(all_scores.items(), key=lambda x: x[1], reverse=True)[:self.top_n]
        top_model_names = [model[0] for model in top_models]

        print("\nModel ranking based on scores:")
        for rank, (model_name, score) in enumerate(top_models, 1):
            print(f"Rank {rank}: {model_name} - Score: {score:.4f}")
        print('\n\n')
        # Perform GridSearchCV only on the top N models
        best_scores = {}
        for model_name in top_model_names:
            model_index = [i for i, model in enumerate(self.models) if type(model).__name__ == model_name][0]
            model = self.models[model_index]
            print(f"Optimizing hyperparameters for model {model_name}")
            
            if self.search_method == 'grid':
                search = GridSearchCV(model, self.param_grids[model_index], scoring=self.scoring, cv=self.cv, n_jobs=-1)
            elif self.search_method == 'random':
                search = RandomizedSearchCV(model, self.param_grids[model_index], scoring=self.scoring, cv=self.cv, n_jobs=-1, n_iter=self.n_iter)
            elif self.search_method == 'bayesian':
                search = BayesSearchCV(model, self.param_grids[model_index], scoring=self.scoring, cv=self.cv, n_iter=self.n_iter, n_jobs=-1)
            else:
                raise ValueError("Invalid search method. Supported options: 'grid', 'random', 'bayesian'")
                
                
            search.fit(X, y)
            best_model = search.best_estimator_
            best_score = search.best_score_

            print(f"Best score for {model_name}: {best_score:.4f}")
            print(f"Params: {search.best_params_}\n\n")

            best_scores[model_name] = best_score
            self.best_models.append(best_model)
            
        return self

    def predict(self, X):
        predictions = {}
        for model in self.best_models:
            model_name = type(model).__name__
            y_pred = model.predict(X)
            predictions[model_name] = y_pred
            
        return predictions