In [1]:
import numpy as np
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
import itertools
from itertools import chain, combinations

# The Evidence Consistent Algorithm (EC)
The EC class implements a learning algorithm using multiple SGDClassifier experts and a perfect expert. It initializes masses for subsets of experts, with the initial mass set to 1 for the set of all experts. It makes aggregated predictions based on the experts' outputs. If the coarsened prediction results in an abstention, the masses allocated to subsets of experts are updated according to the experts' prediction accuracy using the Dempster-Shafer rule (a refined mass of 1 is assigned to the set of experts who were correct, while those who were incorrect receive mass of 0). The goal is to aggregate predictions in adversarial settings and minimize errors through dynamic mass updates.








In [2]:
class EC:
    def __init__(self, num_experts):
        self.total_abstentions = 0
        self.num_experts = num_experts
        self.experts_info = {}  # Dictionary to store expert information
        self.perfect_expert = -1  # Use -1 for the perfect expert

        all_experts = list(range(self.num_experts)) + [self.perfect_expert]

        print(f"All experts: {all_experts}")

        # Dictionary to store masses allocated on the subsets of experts
        self.mass_function: dict[tuple[int, ...], float] = {
            tuple(sorted(subset)): 0.0
            for r in range(len(all_experts) + 1)
            for subset in itertools.combinations(sorted(all_experts), r)
        }

        # Set the mass of the full set (including perfect expert) to 1.0
        self.mass_function[tuple(sorted(all_experts))] = 1.0

        print(f"Initialized power set and masses for {self.num_experts + 1} experts (including perfect expert with ID -1): {self.mass_function}")

    def initialize_experts(self, X_train, y_train):
        """
        Initialize experts as SGDClassifier models and a perfect expert.
        Each expert is trained on a subset of the training data.
        """

        for i in range(self.num_experts):
            X_subtrain, _, y_subtrain, _ = train_test_split(X_train, y_train, test_size=0.5)
            clf = SGDClassifier(max_iter=1000, tol=1e-3)
            clf.fit(X_subtrain, y_subtrain)

            # Store model information and empty predictions list for each expert
            self.experts_info[i] = {
                'model': clf,
                'predictions': []
            }

        # Add the perfect expert
        self.experts_info[self.perfect_expert] = {
            'model': None,
            'predictions': []
        }

        print(f"Initialized {self.num_experts + 1} experts: {self.experts_info}")

    def coarsening(self, t):
        """
        Aggregates the predictions of active experts and calculates the coarsened prediction.
        Returns the coarsened masses for predictions of 1, 0 or 01 in case of uncertainty.
        Returns the sets of experts predicting 1 and 0 at time t.
        """

        # Identify experts predicting 1 and 0 at time t
        experts_predicted_1 = {i for i, info in self.experts_info.items() if info['predictions'][t] == 1}
        experts_predicted_0 = {i for i, info in self.experts_info.items() if info['predictions'][t] == 0}

        print(f"Experts predicting 1: {experts_predicted_1}")
        print(f"Experts predicting 0: {experts_predicted_0}")

        mass_1, mass_0, mass_01 = 0, 0, 0

        # Compute the coarsened mass function
        for subset, mass in self.mass_function.items():
            if mass > 0:
                intersects_with_1 = experts_predicted_1.intersection(subset)
                intersects_with_0 = experts_predicted_0.intersection(subset)

                if intersects_with_1 and intersects_with_0:
                    print(f"Subset {subset} intersects with both: {intersects_with_1} and {intersects_with_0}")
                    mass_01 += mass
                elif intersects_with_1:
                    print(f"Subset {subset} intersects only with experts predicting 1: {intersects_with_1}")
                    mass_1 += mass
                elif intersects_with_0:
                    print(f"Subset {subset} intersects only with experts predicting 0: {intersects_with_0}")
                    mass_0 += mass

        print(f"Coarsened masses: mass_1: {mass_1}, mass_0: {mass_0}, mass_01: {mass_01}")

        return mass_0, mass_1, mass_01, experts_predicted_1, experts_predicted_0

    def update_masses(self, t, y_true, experts_predicted_1, experts_predicted_0):
        """
        Updates the experts' masses based on their predictions.
        Experts with zero mass are removed.
        Returns True if only the perfect expert remains, otherwise returns False.
        """

        # Set the refined mass function for correct set (1) and incorrect set (0)
        refined_mass = {}
        if y_true == 1:
            refined_mass[frozenset(experts_predicted_1)] = 1
            refined_mass[frozenset(experts_predicted_0)] = 0
        else:
            refined_mass[frozenset(experts_predicted_1)] = 0
            refined_mass[frozenset(experts_predicted_0)] = 1

        for subset, mass in refined_mass.items():
            print(f"Refined subset: {subset}, Refined mass: {mass}")

        combined_mass = {}

        K=0

        # Combine the current mass with the refined mass and compute the degree of conflict
        for subset, mass in self.mass_function.items():
            if mass > 0:
                print(f"\nConsidering subset {subset} with mass {mass}")
                for ref_subset, ref_mass in refined_mass.items():
                    intersection = frozenset(subset).intersection(ref_subset)
                    if intersection:
                        intersect_tuple = tuple(sorted(intersection))
                        print(f"Intersection between {subset} and {ref_subset}: {intersect_tuple}")
                        combined_mass[intersect_tuple] = combined_mass.get(intersect_tuple, 0) + (mass * ref_mass)
                        print(f"Updated combined mass for {intersect_tuple}: {combined_mass[intersect_tuple]}")
                    else:
                        print(f"No intersection between {subset} and {ref_subset}")
                        K += mass * ref_mass

        print(f"K conflict degree: {K}")

        # Normalization factor
        D = 1 - K

        print(f"1 - K: {D}")

        # Normalize the combined mass
        if K > 0:
            for subset in combined_mass:
                combined_mass[subset] /= D
                print(f"Normalized combined mass for {subset}: {combined_mass[subset]}")


        # Update the mass function with the new combined mass
        self.mass_function = {tuple(sorted(subset)): combined_mass.get(tuple(sorted(subset)), 0) for subset in self.mass_function}
        print(f"Updated mass function: {self.mass_function}")

        # Identify remaining experts
        remaining_experts = set()
        for subset, mass in self.mass_function.items():
            if mass > 0:
                remaining_experts.update(subset)

        print(f"Remaining experts: {remaining_experts}")

        # Update the experts' info dictionary
        self.experts_info = {expert: info for expert, info in self.experts_info.items() if expert in remaining_experts}
        print(f"Updated experts_info: {self.experts_info}")

        # Check if only one expert remains
        if len(remaining_experts) == 1:
            return True

        return False

    def run(self, X_test):
        """
        Runs the algorithm on the test data.
        """
        t = 0

        while t < len(X_test):

            print(f"\nRound: {t+1}")

            # Collect predictions from each expert (except for the perfect expert)
            for i in self.experts_info:
                if i != self.perfect_expert:
                    prediction = self.experts_info[i]['model'].predict(X_test[t].reshape(1, -1))[0]
                    self.experts_info[i]['predictions'].append(prediction)
                else:
                    self.experts_info[i]['predictions'].append(None)

            predictions_list = [self.experts_info[i]['predictions'][t] for i in self.experts_info]
            print(f"Expert predictions for round {t+1} without perfect expert: {predictions_list}")

            # Generate the adversarial label
            y_true = 1 - (np.bincount([p for p in predictions_list if p is not None]).argmax())
            print(f"Adversarial true label for round {t+1}: {y_true}")

            # Pefect expert prediction (= true label)
            self.experts_info[self.perfect_expert]['predictions'][t] = y_true

            # Update predictions_list with the perfect expert's prediction
            predictions_list = [p if p is not None else y_true for p in predictions_list]
            print(f"Experts predictions for round {t+1} with perfect expert: {predictions_list}")

            # Coarsening
            print("\nCoarsening:")
            mass_0, mass_1, mass_01, experts_predicted_1, experts_predicted_0 = self.coarsening(t)

            # Determine the coarsened prediction
            if mass_1 == 1:
                print(f"Coarsened prediction: 1")
                coarsened_pred = 1
            elif mass_0 == 1:
                print(f"Coarsened prediction: 0")
                coarsened_pred = 0
            else:
                print(f"Coarsened prediction: Abstention")
                coarsened_pred = 'Abstention'

            # If Abstention occurs, update the masses
            if coarsened_pred == 'Abstention':
              self.total_abstentions += 1
              print(f"Update total abstentions: {self.total_abstentions}")
              print("\nMass updating:")
              stop = self.update_masses(t, y_true, experts_predicted_1, experts_predicted_0)
            else:
              stop = False

            if stop:
              print("Algorithm terminated: only the perfect expert remains.")
              break

            t += 1

        ########################################################################
        print()
        print("\nFinal Results:")
        print(f"\nTotal number of instances: {len(X_test)}")
        print(f"Remaining experts: {self.experts_info}")
        print(f"Total number of abstentions: {self.total_abstentions}")

In [3]:
# Test the algorithm
X, y = make_classification(n_samples=100, n_features=20, n_informative=15, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

num_experts = 8

ec = EC(num_experts=num_experts)
ec.initialize_experts(X_train, y_train)
ec.run(X_test)

All experts: [0, 1, 2, 3, 4, 5, 6, 7, -1]
Initialized power set and masses for 9 experts (including perfect expert with ID -1): {(): 0.0, (-1,): 0.0, (0,): 0.0, (1,): 0.0, (2,): 0.0, (3,): 0.0, (4,): 0.0, (5,): 0.0, (6,): 0.0, (7,): 0.0, (-1, 0): 0.0, (-1, 1): 0.0, (-1, 2): 0.0, (-1, 3): 0.0, (-1, 4): 0.0, (-1, 5): 0.0, (-1, 6): 0.0, (-1, 7): 0.0, (0, 1): 0.0, (0, 2): 0.0, (0, 3): 0.0, (0, 4): 0.0, (0, 5): 0.0, (0, 6): 0.0, (0, 7): 0.0, (1, 2): 0.0, (1, 3): 0.0, (1, 4): 0.0, (1, 5): 0.0, (1, 6): 0.0, (1, 7): 0.0, (2, 3): 0.0, (2, 4): 0.0, (2, 5): 0.0, (2, 6): 0.0, (2, 7): 0.0, (3, 4): 0.0, (3, 5): 0.0, (3, 6): 0.0, (3, 7): 0.0, (4, 5): 0.0, (4, 6): 0.0, (4, 7): 0.0, (5, 6): 0.0, (5, 7): 0.0, (6, 7): 0.0, (-1, 0, 1): 0.0, (-1, 0, 2): 0.0, (-1, 0, 3): 0.0, (-1, 0, 4): 0.0, (-1, 0, 5): 0.0, (-1, 0, 6): 0.0, (-1, 0, 7): 0.0, (-1, 1, 2): 0.0, (-1, 1, 3): 0.0, (-1, 1, 4): 0.0, (-1, 1, 5): 0.0, (-1, 1, 6): 0.0, (-1, 1, 7): 0.0, (-1, 2, 3): 0.0, (-1, 2, 4): 0.0, (-1, 2, 5): 0.0, (-1, 2, 6): 0.