In [1]:
import numpy as np
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
import itertools
from itertools import chain, combinations

# The Evidence Weighted Majority Algorithm (EWM)
The EWM class implements a learning algorithm with multiple SGDClassifier experts.
Initially, it assigns equal mass to each expert. It makes agregated predictions based on experts' outputs.
If the coarsened prediction results in an Abstention or Error, the masses of incorrect experts are reduced by a factor of beta, and the residual mass is allocated on the set of remaining experts to maintain the sum of the masses at 1.
No updates are made when all experts agree, as the situation is uninformative.
The goal is to aggregate predictions in adversarial settings and minimize errors through dynamic mass updates.

In [2]:
class EWM:
    def __init__(self, num_experts, beta):
        self.total_abstentions = 0
        self.total_errors = 0
        self.num_experts = num_experts
        self.beta = beta
        self.experts_info = {}  # Dictionary to store expert information
        self.experts = list(range(self.num_experts))

        # Dictionary to store masses allocated on the subsets of experts
        self.mass_function: dict[tuple[int, ...], float] = {
            tuple(sorted(subset)): 0.0
            for r in range(len(self.experts) + 1)
            for subset in itertools.combinations(sorted(self.experts), r)
        }

        # Assign masses to individual experts
        for i in range(len(self.experts)):
            self.mass_function[(i,)] = 1.0 / self.num_experts

        print(f"Initialized power set and masses for {self.num_experts} experts: {self.mass_function}")

    def initialize_experts(self, X_train, y_train):
        """
        Initializes experts as SGDClassifier models.
        Each expert is trained on a subset of the training data.
        """

        for i in range(self.num_experts):
            X_subtrain, _, y_subtrain, _ = train_test_split(X_train, y_train, test_size=0.5)
            clf = SGDClassifier(max_iter=1000, tol=1e-3)
            clf.fit(X_subtrain, y_subtrain)

            # Store model information, initial mass, and empty predictions list for each expert
            self.experts_info[i] = {
                'model': clf,
                'mass': 1.0 / self.num_experts,
                'predictions': []
            }

        print(f"Initialized {self.num_experts} experts: {self.experts_info}")

    def coarsening(self, t):
        """
        Aggregates the predictions of active experts and calculates the coarsened prediction.
        Returns the coarsened masses for predictions of 1, 0 or 01 in case of uncertainty.
        """

        # Identify experts predicting 1 and 0 at time t
        experts_predicted_1 = {i for i, info in self.experts_info.items() if info['predictions'][t] == 1}
        experts_predicted_0 = {i for i, info in self.experts_info.items() if info['predictions'][t] == 0}

        print(f"Experts predicting 1: {experts_predicted_1}")
        print(f"Experts predicting 0: {experts_predicted_0}")

        mass_1, mass_0, mass_01 = 0, 0, 0

        # Compute the coarsened mass function
        for subset, mass in self.mass_function.items():
            if mass > 0:

                intersects_with_1 = experts_predicted_1.intersection(subset)
                intersects_with_0 = experts_predicted_0.intersection(subset)

                if intersects_with_1 and intersects_with_0:
                    print(f"Subset {subset} intersects with both: {intersects_with_1} and {intersects_with_0}")
                    mass_01 += mass
                elif intersects_with_1:
                    print(f"Subset {subset} intersects only with experts predicting 1: {intersects_with_1}")
                    mass_1 += mass
                elif intersects_with_0:
                    print(f"Subset {subset} intersects only with experts predicting 0: {intersects_with_0}")
                    mass_0 += mass

        print(f"Mass 1: {mass_1}, Mass 0: {mass_0}, Mass 01: {mass_01} ")

        return mass_0, mass_1, mass_01

    def update_masses(self, t, y_true):
        """
        Updates the experts' masses based on their predictions.
        Experts with zero mass are removed.
        Returns True if only one expert remains, otherwise returns False.
        """

        # Penalize experts who made an incorrect prediction
        for i in list(self.experts_info.keys()):
            if self.experts_info[i]['predictions'][t] != y_true:
              self.experts_info[i]['mass'] *= self.beta  # Update the mass in experts info dictionary
              self.mass_function[(i,)] *= self.beta


        # Assign the residual mass to the remaining active experts' set
        total_mass = sum(info['mass'] for info in self.experts_info.values())
        remaining_experts = tuple(sorted(self.experts_info.keys()))
        m_active_experts = self.mass_function[remaining_experts] = 1 - total_mass

        print(f"Updated power set and masses for {self.num_experts} experts: {self.mass_function}")
        print(f"Updated experts' masses: {[info['mass'] for info in self.experts_info.values()]}")
        print(f"m_active_experts: {m_active_experts}")

        total_mass = sum(self.mass_function.values())
        print(f"Total mass after update: {total_mass}")

        # Remove experts with zero mass
        self.experts_info = {i: info for i, info in self.experts_info.items() if info['mass'] > 0}

        # Check if only one expert remains
        if len(self.experts_info) == 1:
            print("Only one expert remaining. Stopping the algorithm.")
            return True

        return False

    def run(self, X_test):
        """
        Runs the algorithm on test data.
        """

        t = 0

        while t < len(X_test):

            print(f"\nRound: {t + 1}")

            # Add the prediction at the current index t for each expert
            for i in self.experts_info:
                prediction = self.experts_info[i]['model'].predict(X_test[t].reshape(1, -1))[0]
                self.experts_info[i]['predictions'].append(prediction)

            predictions_list = [self.experts_info[i]['predictions'][t] for i in self.experts_info]
            print(f"Expert predictions for round {t + 1}: {predictions_list}")

            # Check if all the predictions are the same
            all_equal = all(pred == predictions_list[0] for pred in predictions_list)

            # Generate the adversarial label
            y_true = 1 - (np.bincount(predictions_list).argmax())
            print(f"Adversarial true label for round {t + 1}: {y_true}")

            # Coarsening
            print("\nCoarsening:")
            mass_0, mass_1, mass_01 = self.coarsening(t)

            coarsened_pred = None

            # Determine the aggregated prediction
            if max(mass_0, mass_1, mass_01) == mass_0:
                coarsened_pred = 0
                print(f"Coarsened prediction for round {t + 1}: {coarsened_pred}")
            elif max(mass_0, mass_1, mass_01) == mass_1:
                coarsened_pred = 1
                print(f"Coarsened prediction for round {t + 1}: {coarsened_pred}")
            else:
                coarsened_pred = 'Abstention'
                print(f"Coarsened prediction for round {t + 1}: {coarsened_pred}")

            # If Abstention occurs, update the masses
            if coarsened_pred == 'Abstention':
                self.total_abstentions += 1
                print(f"Update total abstentions: {self.total_abstentions}")
                print("\nMass updating:")
                stop = self.update_masses(t, y_true)
                if stop:
                    break
            else:
                if coarsened_pred != y_true:
                    print("The aggregated prediction is different from the true label.")
                    self.total_errors += 1
                    print(f"Update total errors: {self.total_errors}")

                    # If experts agree completely, is not informative -> skip mass updating
                    if all_equal:
                        print("All experts agree completely. Skipping mass updating.")
                        t += 1
                        continue
                    else:
                      # If Errors occurs, update the masses
                      print("\nMass updating:")
                      stop = self.update_masses(t, y_true)
                      if stop:
                          break
                else:
                  print("The aggregated prediction is the same as the true label. No update is needed.")

            t += 1

        ########################################################################
        print()
        print("\nFinal Results:")
        print(f"\nTotal number of instances: {len(X_test)}")
        print(f"Remaining experts: {self.experts_info}")
        print(f"Total number of abstentions: {self.total_abstentions}")
        print(f"Total number of errors: {self.total_errors}")

In [3]:
# Test the algorithm
X, y = make_classification(n_samples=100, n_features=20, n_informative=15, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

num_experts = 3
beta = 0.50

ewm = EWM(num_experts=num_experts, beta=beta)
ewm.initialize_experts(X_train, y_train)
ewm.run(X_test)

Initialized power set and masses for 3 experts: {(): 0.0, (0,): 0.3333333333333333, (1,): 0.3333333333333333, (2,): 0.3333333333333333, (0, 1): 0.0, (0, 2): 0.0, (1, 2): 0.0, (0, 1, 2): 0.0}
Initialized 3 experts: {0: {'model': SGDClassifier(), 'mass': 0.3333333333333333, 'predictions': []}, 1: {'model': SGDClassifier(), 'mass': 0.3333333333333333, 'predictions': []}, 2: {'model': SGDClassifier(), 'mass': 0.3333333333333333, 'predictions': []}}

Round: 1
Expert predictions for round 1: [0, 1, 1]
Adversarial true label for round 1: 0

Coarsening:
Experts predicting 1: {1, 2}
Experts predicting 0: {0}
Subset (0,) intersects only with experts predicting 0: {0}
Subset (1,) intersects only with experts predicting 1: {1}
Subset (2,) intersects only with experts predicting 1: {2}
Mass 1: 0.6666666666666666, Mass 0: 0.3333333333333333, Mass 01: 0 
Coarsened prediction for round 1: 1
The aggregated prediction is different from the true label.
Update total errors: 1

Mass updating:
Updated power