In [13]:
# example of ensemble pruning for classification
from numpy import mean
from numpy import std
from sklearn.datasets import make_moons
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.ensemble import VotingClassifier
import matplotlib.pyplot as plt
import numpy as np  
import os   
import joblib
from tensorflow.keras.models import load_model

# get the dataset
def get_dataset():
	X, y = make_moons(n_samples=300, noise=0.2, random_state=42)
	return X, y

# Charger les modèles depuis le dossier "models"
def get_models():
    """Load models from the 'models' directory"""
    models = {}
    models_folder = os.path.join(os.getcwd(), "models")
    for file in os.listdir(models_folder):
        model_name = file.split(".")[0]
        file_extension = file.split(".")[-1]
        if file_extension == "pkl":
            models[model_name] = joblib.load(os.path.join(models_folder, file))
            print(f"Imported sklearn model: {model_name}")
        elif file_extension == "keras":
            models[model_name] = load_model(os.path.join(models_folder, file))
            print(f"Imported keras model: {model_name}")
    return models

# evaluate a list of models
def evaluate_ensemble(models, X, y):
	# check for no models
	if len(models.items()) == 0:
		return 0.0
	# create the ensemble
	ensemble = VotingClassifier(estimators=models.items(), voting='soft')
	# define the evaluation procedure
	cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
	# evaluate the ensemble
	scores = cross_val_score(ensemble, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
	# return mean score
	return mean(scores)

def calculate_Q_statistic(predictions1, predictions2):
    """Calculate Q-statistic between two classifiers' predictions"""
    N11 = sum((predictions1 == 1) & (predictions2 == 1))
    N00 = sum((predictions1 == 0) & (predictions2 == 0))
    N10 = sum((predictions1 == 1) & (predictions2 == 0))
    N01 = sum((predictions1 == 0) & (predictions2 == 1))
    
    Q = (N11 * N00 - N10 * N01) / (N11 * N00 + N10 * N01 + 1e-10)
    return Q

def get_predictions(model, X, y):
    """Get binary predictions from a model"""
    if hasattr(model, 'predict_proba'):
        y_pred = model.predict_proba(X)[:,1] > 0.5
    else:
        y_pred = model.predict(X)
    return y_pred

def mean_Q_statistic(models, X, y):
    """Calculate mean Q-statistic across all pairs of models"""
    n_models = len(models)
    if n_models < 2:
        return 0.0
    
    Q_values = []
    predictions = [get_predictions(model, X, y) for model in models.values()]
    
    for i in range(n_models):
        for j in range(i+1, n_models):
            Q = calculate_Q_statistic(predictions[i], predictions[j])
            Q_values.append(Q)
            
    return np.mean(Q_values)

def prune_round(models_in, X, y):
    """Perform a single round of pruning based on Q-statistic diversity"""
    baseline_acc = evaluate_ensemble(models_in, X, y)
    baseline_Q = mean_Q_statistic(models_in, X, y)
    best_score = baseline_acc
    removed = None
    # Try removing each model and evaluate both accuracy and diversity
    for m in models_in:
        dup = models_in.copy()
        dup.remove(m)
        
        # Calculate new accuracy and Q-statistic
        new_acc = evaluate_ensemble(dup, X, y)
        new_Q = mean_Q_statistic(dup, X, y)
        
        # Accept removal if accuracy doesn't decrease significantly (within 1%)
        # and diversity improves (lower Q-statistic)
        if new_acc >= best_score * 0.99 and new_Q < baseline_Q:
            best_score = new_acc
            removed = m
            baseline_Q = new_Q
            
    return best_score, removed, baseline_Q

# prune an ensemble from scratch
def prune_ensemble(models, X, y):
    scores = []
    Q_stats = []
    best_score = 0.0
    m_length = len(models)-1
    iterations = 0
    # prune ensemble until no further improvement or max iterations reached
    while iterations < m_length:
        # remove one model from the ensemble
        score, removed, stat_Q = prune_round(models, X, y)
        scores.append(score)
        Q_stats.append(stat_Q)
        # check for no improvement
        if removed is None:
            print('>no further improvement')
            break
        # keep track of best score
        best_score = score
        # remove model from the list
        models.remove(removed)
        # report results along the way
        print('>%.3f (removed: %s)' % (score, removed[0]))
        iterations += 1
    return best_score, models, scores, Q_stats

# define dataset
X, y = get_dataset()
# get the models to evaluate
models = get_models()
# convert models dict to list of tuples for VotingClassifier
models = [(name, model) for name, model in models.items()]
score, model_list, scores, Q_stats = prune_ensemble(models, X, y)
# Plot scores and Q_stats as time series
plt.figure(figsize=(10, 6))
plt.plot(scores, label='Accuracy Scores', linestyle='-', linewidth=2, color='red')
plt.plot(Q_stats, label='Q-Statistics', linestyle='-', linewidth=2, color='blue')
plt.xlabel('Iteration')
plt.ylabel('Value')
plt.title('Accuracy Scores and Q-Statistics Over Iterations')
plt.legend()
plt.grid(True)
plt.show()
names = ','.join([n for n, _ in model_list])
print('Models: %s' % names)
print('Final Mean Accuracy: %.3f' % score)

Imported keras model: cnn_model
Imported sklearn model: decision_tree
Imported sklearn model: kernel_svc
Imported sklearn model: linear_svc
Imported sklearn model: random_forest


AttributeError: 'list' object has no attribute 'items'