In [30]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, mean_absolute_error
from sklearn.base import BaseEstimator
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, mean_absolute_error



class ModelOptimization(BaseEstimator):
    def __init__(self, models, param_grids, scoring, cv=5, top_n=3, n_inter=10, search_method = 'grid'):
        self.models = models
        self.param_grids = param_grids
        self.scoring = scoring
        self.cv = cv
        self.top_n = top_n
        self.best_models = []
        self.search_method = search_method
        self.n_iter = n_inter

    def fit(self, X, y):
        all_scores = {}
        for i, model in enumerate(self.models):
            print(f"Fitting model {i+1}/{len(self.models)}: {type(model).__name__}")
            model.fit(X, y)
            y_pred = model.predict(X)

            if self.scoring == 'accuracy':
                score = accuracy_score(y, y_pred)
            elif self.scoring == 'roc_auc':
                if len(set(y)) > 2:  # Multi-class classification
                    score = roc_auc_score(y, model.predict_proba(X), multi_class='ovr', average='macro')
                else:  # Binary classification
                    score = roc_auc_score(y, y_pred)
            elif self.scoring == 'precision':
                score = precision_score(y, y_pred, average='macro')
            elif self.scoring == 'mae':
                score = -mean_absolute_error(y, y_pred)  # Minimize the MAE, so we add a negative sign
            else:
                raise ValueError(f"Invalid scoring metric: {self.scoring}")

            all_scores[type(model).__name__] = score

        # Select the "top N" models based on the scores
        top_models = sorted(all_scores.items(), key=lambda x: x[1], reverse=True)[:self.top_n]
        top_model_names = [model[0] for model in top_models]

        print("\nModel ranking based on scores:")
        for rank, (model_name, score) in enumerate(top_models, 1):
            print(f"Rank {rank}: {model_name} - Score: {score:.4f}")
        print('\n\n')
        # Perform GridSearchCV only on the top N models
        best_scores = {}
        for model_name in top_model_names:
            model_index = [i for i, model in enumerate(self.models) if type(model).__name__ == model_name][0]
            model = self.models[model_index]
            print(f"Optimizing hyperparameters for model {model_name}")
            
            if self.search_method == 'grid':
                search = GridSearchCV(model, self.param_grids[model_index], scoring=self.scoring, cv=self.cv, n_jobs=-1)
            elif self.search_method == 'random':
                search = RandomizedSearchCV(model, self.param_grids[model_index], scoring=self.scoring, cv=self.cv, n_jobs=-1, n_iter=self.n_iter)
            elif self.search_method == 'bayesian':
                search = BayesSearchCV(model, self.param_grids[model_index], scoring=self.scoring, cv=self.cv, n_iter=self.n_iter, n_jobs=-1)
            else:
                raise ValueError("Invalid search method. Supported options: 'grid', 'random', 'bayesian'")
                
                
            search.fit(X, y)
            best_model = search.best_estimator_
            best_score = search.best_score_

            print(f"Best score for {model_name}: {best_score:.4f}")
            print(f"Params: {search.best_params_}\n\n")

            best_scores[model_name] = best_score
            self.best_models.append(best_model)
            
        return self

    def predict(self, X):
        predictions = {}
        for model in self.best_models:
            model_name = type(model).__name__
            y_pred = model.predict(X)
            predictions[model_name] = y_pred
            
        return predictions

In [31]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier



X, y = make_classification(n_samples=1000, n_features=10, random_state=42)

# Define models and parameter grids
models = [LogisticRegression(), RandomForestClassifier(), GradientBoostingClassifier()]

param_grids = [
    {'C': [0.1, 1, 10]},  # Parameter grid for LogisticRegression
    {'max_depth': [None, 5, 10], 'n_estimators': [50, 100, 150]},  # Parameter grid for RandomForestClassifier
    {'learning_rate': [0.1, 0.01, 0.001], 'n_estimators': [50, 100, 150]}  # Parameter grid for GradientBoostingClassifier
]

# Example: Use the ModelOptimization class to optimize models with roc_auc scoring
scoring = 'accuracy'
model_comparison = ModelOptimization(models, param_grids, scoring, cv=5, top_n=3)
model_comparison.fit(X, y)

# Make predictions using the best models
X_test, y_test = make_classification(n_samples=100, n_features=10, random_state=42)
predictions = model_comparison.predict(X_test)

# Print the predictions for each model
for model_name, y_pred in predictions.items():
    print(f"Predictions for {model_name}:\n{y_pred}\n")
    

Fitting model 1/3: LogisticRegression
Fitting model 2/3: RandomForestClassifier
Fitting model 3/3: GradientBoostingClassifier

Model ranking based on scores:
Rank 1: RandomForestClassifier - Score: 1.0000
Rank 2: GradientBoostingClassifier - Score: 0.9770
Rank 3: LogisticRegression - Score: 0.8590



Optimizing hyperparameters for model RandomForestClassifier
Best score for RandomForestClassifier: 0.9110
Params: {'max_depth': 10, 'n_estimators': 150}


Optimizing hyperparameters for model GradientBoostingClassifier
Best score for GradientBoostingClassifier: 0.9070
Params: {'learning_rate': 0.01, 'n_estimators': 150}


Optimizing hyperparameters for model LogisticRegression
Best score for LogisticRegression: 0.8580
Params: {'C': 0.1}


Predictions for RandomForestClassifier:
[1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 0 1 1 1 1 1 0 1 1 0 1
 1 0 0 0 0 1 1 1 0 1 0 0 1 0 0 1 1 1 1 0 1 1 0 1 1 0 0 1 1 1 1 1 0 1 0 0 1
 1 1 1 1 0 1 1 1 0 0 1 1 1 1 0 0 0 1 1 1 1 1 1 0 0 1]

Prediction