In [1]:
import numpy as np
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification

# The Evidence Halving Algorithm (EH)
The EH class implements a learning algorithm with multiple SGDClassifier experts and a perfect expert. Initially, it assigns equal mass to each expert. It makes aggregated predictions based on the experts' outputs. If the coarsened prediction results in an abstention, the experts' masses are updated according to their prediction accuracy using the Dempster-Shafer rule (a refined mass of 1 is assigned to the set of experts who were correct, while those who were incorrect receive mass of 0). The goal is to aggregate predictions in adversarial settings and minimize errors through dynamic mass updates.

In [2]:
class EH:
    def __init__(self, num_experts):
        self.total_abstentions = 0
        self.num_experts = num_experts + 1
        self.experts_info = {}  # Dictionary to store expert information

    def initialize_experts(self, X_train, y_train):
        """
        Initializes the experts as SGDClassifier models and a perfect expert.
        Each expert is trained on a subset of the training data.
        """
        for i in range(self.num_experts - 1):
            X_subtrain, _, y_subtrain, _ = train_test_split(X_train, y_train, test_size=0.5)
            clf = SGDClassifier(max_iter=1000, tol=1e-3)
            clf.fit(X_subtrain, y_subtrain)

            # Store model information, initial mass, and empty predictions list for each expert
            self.experts_info[i] = {
                'model': clf,
                'mass': 1.0 / self.num_experts,
                'predictions': []
            }

        # Add the perfect expert
        self.experts_info['perfect'] = {
            'model': None,
            'mass': 1.0 / self.num_experts,
            'predictions': []
        }

        print(f"Initialization: {self.num_experts} experts with masses {[info['mass'] for info in self.experts_info.values()]}")

    def coarsening(self, t):
        """
        Aggregates the predictions of active experts and calculates the coarsened prediction.
        Returns the coarsened masses for predictions of 1 and 0.
        """

        # Identify experts predicting 1 and 0 at time t
        experts_predicted_1 = {i for i, info in self.experts_info.items() if info['predictions'][t] == 1}
        experts_predicted_0 = {i for i, info in self.experts_info.items() if info['predictions'][t] == 0}

        print(f"Experts predicting 1: {experts_predicted_1}")
        print(f"Experts predicting 0: {experts_predicted_0}")

        # Compute the coarsened mass function
        mass_1 = np.sum([info['mass'] for i, info in self.experts_info.items() if info['mass'] > 0 and info['predictions'][t] == 1])
        mass_0 = np.sum([info['mass'] for i, info in self.experts_info.items() if info['mass'] > 0 and info['predictions'][t] == 0])

        print(f"Coarsened masses: mass_1: {mass_1}, mass_0: {mass_0}")

        return mass_1, mass_0

    def update_masses(self, t, y_true):
        """
        Updates the experts' masses based on their predictions.
        Experts with zero mass are removed.
        Returns True if only the perfect expert remains, otherwise returns False.
        """

        # Set the masses of incorrect experts to 0
        for i in list(self.experts_info.keys()):
            if self.experts_info[i]['mass'] > 0 and self.experts_info[i]['predictions'][t] != y_true:
                self.experts_info[i]['mass'] = 0

        self.experts_info = {i: info for i, info in self.experts_info.items() if info['mass'] > 0}

        # Normalize the masses of the remaining experts
        total_mass = np.sum([info['mass'] for info in self.experts_info.values()])
        for i in self.experts_info:
            self.experts_info[i]['mass'] /= total_mass

        print(f"Updated masses: {[info['mass'] for info in self.experts_info.values()]}")

        # Check if only one expert remains
        if len(self.experts_info) == 1:
            return True

        return False

    def run(self, X_test):
        """
        Runs the algorithm on the test data.
        """
        t = 0

        while t < len(X_test):

            print(f"\nRound: {t + 1}")

            # Collect predictions from each expert (except for the perfect expert)
            for i in self.experts_info:
                if i != 'perfect':
                    prediction = self.experts_info[i]['model'].predict(X_test[t].reshape(1, -1))[0]
                    self.experts_info[i]['predictions'].append(prediction)
                else:
                    self.experts_info[i]['predictions'].append(None)

            predictions_list = [self.experts_info[i]['predictions'][t] for i in self.experts_info]
            print(f"Experts predictions for round {t + 1} without perfect expert: {predictions_list}")

            # Generate the adversarial label
            y_true = 1 - (np.bincount([p for p in predictions_list if p is not None]).argmax())
            print(f"Adversarial true label for round {t + 1}: {y_true}")

            # Pefect expert prediction (= true label)
            self.experts_info['perfect']['predictions'][t] = y_true

            # Update predictions_list with the perfect expert's prediction
            predictions_list = [p if p is not None else y_true for p in predictions_list]
            print(f"Experts predictions for round {t + 1} with perfect expert: {predictions_list}")

            # Coarsening
            print("\nCoarsening:")
            mass_1, mass_0 = self.coarsening(t)

            # Determine the coarsened prediction
            if mass_1 == 1:
                print(f"Coarsened prediction: 1")
                coarsened_pred = 1
            elif mass_0 == 1:
                print(f"Coarsened prediction: 0")
                coarsened_pred = 0
            else:
                print(f"Coarsened prediction: Abstention")
                coarsened_pred = 'Abstention'

            # If Abstention occurs, update the masses
            if coarsened_pred == 'Abstention':
              self.total_abstentions += 1
              print(f"Update total abstentions: {self.total_abstentions}")
              print("\nMass updating:")
              stop = self.update_masses(t, y_true)
            else:
              stop = False

            if stop:
              print("Algorithm terminated: only the perfect expert remains.")
              break

            t += 1

        ########################################################################
        print()
        print("\nFinal Results:")
        print(f"\nTotal number of instances: {len(X_test)}")
        print(f"Remaining experts: {self.experts_info}")
        print(f"Total number of abstentions: {self.total_abstentions}")

In [3]:
# Test the algorithm
X, y = make_classification(n_samples=100, n_features=20, n_informative=15, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

num_experts = 3  # (consider +1 for the perfect_expert)

eh = EH(num_experts=num_experts)
eh.initialize_experts(X_train, y_train)
eh.run(X_test)

Initialization: 4 experts with masses [0.25, 0.25, 0.25, 0.25]

Round: 1
Experts predictions for round 1 without perfect expert: [1, 0, 1, None]
Adversarial true label for round 1: 0
Experts predictions for round 1 with perfect expert: [1, 0, 1, 0]

Coarsening:
Experts predicting 1: {0, 2}
Experts predicting 0: {1, 'perfect'}
Coarsened masses: mass_1: 0.5, mass_0: 0.5
Coarsened prediction: Abstention
Update total abstentions: 1

Mass updating:
Updated masses: [0.5, 0.5]

Round: 2
Experts predictions for round 2 without perfect expert: [0, None]
Adversarial true label for round 2: 1
Experts predictions for round 2 with perfect expert: [0, 1]

Coarsening:
Experts predicting 1: {'perfect'}
Experts predicting 0: {1}
Coarsened masses: mass_1: 0.5, mass_0: 0.5
Coarsened prediction: Abstention
Update total abstentions: 2

Mass updating:
Updated masses: [1.0]
Algorithm terminated: only the perfect expert remains.


Final Results:

Total number of instances: 30
Remaining experts: {'perfect': {'