In [2]:
import os
from exp_framework.Ensemble import Ensemble, PretrainedEnsemble, StudentExpertEnsemble
from exp_framework.delegation import (
    DelegationMechanism,
    UCBDelegationMechanism,
    ProbaSlopeDelegationMechanism,
    RestrictedMaxGurusDelegationMechanism,
    StudentExpertDelegationMechanism,
)
from exp_framework.learning import Net
from exp_framework.experiment import (
    Experiment,
    calculate_avg_std_test_accs,
    calculate_avg_std_train_accs,
    calculate_avg_std_test_accs_per_trial,
)
from avalanche.training.supervised import Naive
from matplotlib import pyplot as plt
from exp_framework.data_utils import Data
from avalanche.benchmarks.classic import RotatedMNIST, SplitMNIST
import numpy as np
import matplotlib as mpl
import seaborn as sns
from itertools import product
import pandas as pd
import torch.optim as optim
from torch.nn import CrossEntropyLoss

from avalanche.training.plugins import (
    CWRStarPlugin,
    ReplayPlugin,
    EWCPlugin,
    TrainGeneratorAfterExpPlugin,
    LwFPlugin,
    SynapticIntelligencePlugin,
)
from exp_framework.MinibatchEvalAccuracy import MinibatchEvalAccuracy
from avalanche.training.plugins import EvaluationPlugin
from avalanche.evaluation.metrics import accuracy_metrics

from avalanche.training import EWC

  from .autonotebook import tqdm as notebook_tqdm


### learning the mapping $\mathcal{X} \rightarrow \mathcal{G}$ (i.e. $\mathcal{X} \rightarrow \mathcal{Y}\times\mathcal{C}$)

### Set up global experiment settings

In [3]:
batch_size = 128
window_size = 50
num_trials = 10
n_voters = 30

#### Create Delegation Mechanisms

### Create Delegation Mechanisms and Ensembles

For simplicity, only explore full ensemble and variants of ProbaSlopeDelegationMechanism since they can be created programmatically.

In [4]:
def get_ensembles_dict(lo_num_gurus=[1, 3, 5, 7, 9, 11]):
    NOOP_del_mech = DelegationMechanism(batch_size=batch_size, window_size=window_size)
    NOOP_del_mech2 = DelegationMechanism(batch_size=batch_size, window_size=window_size)

    probability_functions = [
        "random_better",
        "probabilistic_better",
        "probabilistic_weighted",
        "max_diversity",
    ]
    score_functions = [
        "accuracy_score",
        # "balanced_accuracy_score",
        # "f1_score",
        # "precision_score",
        # "recall_score",
        # "top_k_accuracy_score",
        # "roc_auc_score",
        # "log_loss_score",
        # "max_diversity",
    ]
    # probability_functions = ["max_diversity"]
    # score_functions = ["accuracy_score"]

    del_mechs = {"full-ensemble": NOOP_del_mech}
    for prob_func, score_func in product(probability_functions, score_functions):
        for num_gurus in lo_num_gurus:
            dm = ProbaSlopeDelegationMechanism(
                batch_size=batch_size,
                window_size=window_size,
                max_active=num_gurus,
                probability_function=prob_func,
                score_method=score_func,
            )
            del_mechs[f"{prob_func}-{score_func}-num_gurus-{num_gurus}"] = dm

    ensembles_dict = {
        dm_name: Ensemble(
            training_epochs=1,
            n_voters=n_voters,
            delegation_mechanism=dm,
            name=dm_name,
            input_dim=28 * 28,
            output_dim=10,
            width=16,
        )
        for dm_name, dm in del_mechs.items()
    }
    ensembles_dict["single_Net"] = Ensemble(
        training_epochs=1,
        n_voters=1,
        delegation_mechanism=NOOP_del_mech2,
        name="single_Net",
        input_dim=28 * 28,
        output_dim=10,
        width=512,
    )
    return ensembles_dict

#### Create Avalanche Strategies to Compare Against

In [5]:
def initialize_strategies_to_evaluate():
    plugins_to_evaluate = {
        "LwF": LwFPlugin(),
        "EWC": EWCPlugin(ewc_lambda=0.001),
        "SynapticIntelligence": SynapticIntelligencePlugin(si_lambda=0.5),
        # "Replay": ReplayPlugin(mem_size=100),
    }

    strategies_to_evaluate = {}
    for name, pte in plugins_to_evaluate.items():
        model = Net(input_dim=28 * 28, output_dim=10, width=512)
        optimize = optim.Adam(model.parameters(), lr=0.001)

        mb_eval = MinibatchEvalAccuracy()
        evp = EvaluationPlugin(
            accuracy_metrics(minibatch=True, epoch=True, experience=True, stream=True),
            mb_eval,
        )
        cl_strategy = Naive(
            model=model,
            optimizer=optimize,
            criterion=CrossEntropyLoss(),
            train_mb_size=batch_size,
            train_epochs=1,
            eval_mb_size=batch_size,
            # plugins=[pte, evp],
            plugins=[pte, evp, mb_eval],
        )
        # cl_strategy = EWC(
        #     model=model,
        #     optimizer=optimize,
        #     criterion=CrossEntropyLoss(),
        #     ewc_lambda=0.001,
        #     train_mb_size=batch_size,
        #     train_epochs=1,
        #     eval_mb_size=batch_size,
        # )
        strategies_to_evaluate[name] = (cl_strategy, evp)

    return strategies_to_evaluate

# Run Experiment

### Train Ensemble - single active voter

In [6]:
# Train ensembles - single guru

data = SplitMNIST(n_experiences=5, fixed_class_order=list(range(10)))


ensembles_dict = get_ensembles_dict()

exp = Experiment(
    n_trials=num_trials,
    ensembles=list(ensembles_dict.values()),
    benchmark=data,
    strategies_to_evaluate=initialize_strategies_to_evaluate,
)
_ = exp.run()

  0%|          | 0/10 [00:00<?, ?it/s]

Starting trial  0




-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 83.04it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0699
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9869
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 82.25it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0582
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9934
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 70.91it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0725
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9871
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 95/95 [00:01<00:00, 74.39it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 1.2080
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.8839
-- >> End of training phase << --
-- >> Start of train

 10%|█         | 1/10 [06:38<59:46, 398.47s/it]


> Eval on experience 4 (Task 0) from test stream ended.
	Loss_Exp/eval_phase/test_stream/Task000/Exp004 = 0.0987
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp004 = 0.9702
-- >> End of eval phase << --
	Loss_Stream/eval_phase/test_stream/Task000 = 8.8110
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.1924
Starting trial  1
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 84.96it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0690
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9874
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 82.66it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0774
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9863
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 72.91it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0753
	Top1_Acc_Epoch/train_ph

 20%|██        | 2/10 [13:23<53:37, 402.18s/it]

Starting trial  2
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 85.18it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0697
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9901
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 87.74it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0585
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9907
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 76.52it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0756
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9874
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 95/95 [00:01<00:00, 73.66it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 1.0622
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.8787
-- >> End of training phase << --
--

 30%|███       | 3/10 [20:05<46:56, 402.37s/it]


> Eval on experience 4 (Task 0) from test stream ended.
	Loss_Exp/eval_phase/test_stream/Task000/Exp004 = 0.0905
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp004 = 0.9662
-- >> End of eval phase << --
	Loss_Stream/eval_phase/test_stream/Task000 = 12.6260
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.1916
Starting trial  3
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 87.28it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0663
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9863
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 88.51it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0811
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9867
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 76.22it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0713
	Top1_Acc_Epoch/train_p

 40%|████      | 4/10 [26:46<40:11, 401.89s/it]

Starting trial  4
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 85.44it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0697
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9878
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 84.24it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0728
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9847
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 72.35it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0771
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9828
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 95/95 [00:01<00:00, 75.80it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 1.0835
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.8844
-- >> End of training phase << --
--

 50%|█████     | 5/10 [33:32<33:36, 403.22s/it]


> Eval on experience 4 (Task 0) from test stream ended.
	Loss_Exp/eval_phase/test_stream/Task000/Exp004 = 0.0820
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp004 = 0.9728
-- >> End of eval phase << --
	Loss_Stream/eval_phase/test_stream/Task000 = 10.1299
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.1929
Starting trial  5
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 85.01it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0619
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9888
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 85.74it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0793
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9863
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 71.62it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0620
	Top1_Acc_Epoch/train_p

 60%|██████    | 6/10 [40:09<26:44, 401.11s/it]


> Eval on experience 4 (Task 0) from test stream ended.
	Loss_Exp/eval_phase/test_stream/Task000/Exp004 = 0.0916
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp004 = 0.9667
-- >> End of eval phase << --
	Loss_Stream/eval_phase/test_stream/Task000 = 9.3871
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.1917
Starting trial  6
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 86.14it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0813
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9870
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 87.61it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0750
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9874
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 76.50it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0681
	Top1_Acc_Epoch/train_ph

 70%|███████   | 7/10 [46:31<19:44, 394.93s/it]


> Eval on experience 4 (Task 0) from test stream ended.
	Loss_Exp/eval_phase/test_stream/Task000/Exp004 = 0.0755
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp004 = 0.9713
-- >> End of eval phase << --
	Loss_Stream/eval_phase/test_stream/Task000 = 10.7059
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.1926
Starting trial  7
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 81.43it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0696
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9872
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 84.80it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0680
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9876
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 71.91it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0675
	Top1_Acc_Epoch/train_p

 80%|████████  | 8/10 [53:14<13:14, 397.43s/it]


> Eval on experience 4 (Task 0) from test stream ended.
	Loss_Exp/eval_phase/test_stream/Task000/Exp004 = 0.0611
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp004 = 0.9783
-- >> End of eval phase << --
	Loss_Stream/eval_phase/test_stream/Task000 = 10.6143
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.1940
Starting trial  8
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 87.36it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0779
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9865
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 86.16it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0666
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9882
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 78.73it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0719
	Top1_Acc_Epoch/train_p

 90%|█████████ | 9/10 [59:51<06:37, 397.25s/it]


> Eval on experience 4 (Task 0) from test stream ended.
	Loss_Exp/eval_phase/test_stream/Task000/Exp004 = 0.0704
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp004 = 0.9758
-- >> End of eval phase << --
	Loss_Stream/eval_phase/test_stream/Task000 = 11.6677
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.1935
Starting trial  9
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 85.53it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0743
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9882
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 82.54it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0673
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9873
-- >> End of training phase << --
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 71.87it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0814
	Top1_Acc_Epoch/train_p

100%|██████████| 10/10 [1:06:38<00:00, 399.87s/it]


> Eval on experience 4 (Task 0) from test stream ended.
	Loss_Exp/eval_phase/test_stream/Task000/Exp004 = 0.0777
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp004 = 0.9723
-- >> End of eval phase << --
	Loss_Stream/eval_phase/test_stream/Task000 = 9.4308
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.1928





### Save and Print Results

In [7]:
batch_metrics = exp.get_aggregate_batch_metrics()
dfs = []
for ens, metric_dict in batch_metrics.items():
    df = pd.DataFrame.from_dict(metric_dict, orient="index")
    df["ensemble_name"] = ens
    dfs.append(df)
single_active_df = pd.concat(dfs)
col_order = [len(single_active_df.columns) - 1] + list(
    range(len(single_active_df.columns) - 1)
)
single_active_df = single_active_df[single_active_df.columns[col_order]]
file_prefix = f"many_v_class_incremental-trials={num_trials}-batch_size={batch_size}_window_size={window_size}-feb10"
path = "results"

if not os.path.exists(path):
    os.mkdir(path)

filepath = f"{path}/{file_prefix}.csv"
single_active_df.to_csv(filepath)

In [8]:
# Print results - single guru

print(f"Results for mechanisms")

# Collect and print train accuracies - aggregate and by batch
train_results_dict = dict()
for ens_name, ensemble in ensembles_dict.items():
    train_acc, train_acc_std = calculate_avg_std_train_accs(exp, ens_name, num_trials)
    train_results_dict[ens_name] = (train_acc, train_acc_std)

for strat_name, (strat, eval_plugin) in initialize_strategies_to_evaluate().items():
    train_acc, train_acc_std = calculate_avg_std_train_accs(exp, strat_name, num_trials)
    train_results_dict[strat_name] = (train_acc, train_acc_std)

for ens_name, (train_acc, train_acc_std) in train_results_dict.items():
    print(
        f"Mean train acc for {ens_name}: {round(np.mean(train_acc), 3)}+-{round(np.mean(train_acc_std), 3)}"
    )
# for ens_name, (train_acc, train_acc_std) in train_results_dict.items():
#     print(f"All train accs for {ens_name}: {train_acc}")

print("--------------")

# Collect and print test accuracies
# results_dict = dict()
# for ens_name, ensemble in ensembles_dict.items():
#     test_acc, test_acc_std = calculate_avg_std_test_accs(exp, ens_name, num_trials)
#     results_dict[ens_name] = (test_acc, test_acc_std)

# for strat_name, (strat, eval_plugin) in initialize_strategies_to_evaluate().items():
#     test_acc, test_acc_std = calculate_avg_std_test_accs(exp, strat_name, num_trials)
#     results_dict[strat_name] = (test_acc, test_acc_std)


# for ens_name, (test_acc, test_acc_std) in results_dict.items():
#     print(
#         f"Mean test acc for {ens_name}: {round(np.mean(test_acc), 3)}+-{round(np.mean(test_acc_std), 3)}"
# )

print("--------------")

results_dict = dict()

for ens_name, ensemble in ensembles_dict.items():
    test_acc, _ = calculate_avg_std_test_accs_per_trial(exp, ens_name, num_trials)
    print(
        f"Mean test acc for {ens_name}: {round(np.mean(test_acc), 3)}+-{round(np.std(test_acc), 3)}"
    )
    results_dict[ens_name] = (np.mean(test_acc), np.std(test_acc))

for strat_name, (strat, eval_plugin) in initialize_strategies_to_evaluate().items():
    test_acc, _ = calculate_avg_std_test_accs_per_trial(exp, strat_name, num_trials)
    print(
        f"Mean test acc for {strat_name}: {round(np.mean(test_acc), 3)}+-{round(np.std(test_acc), 3)}"
    )
    results_dict[strat_name] = (np.mean(test_acc), np.std(test_acc))

Results for mechanisms
Mean train acc for full-ensemble: 0.793+-0.029
Mean train acc for random_better-accuracy_score-num_gurus-1: 0.75+-0.111
Mean train acc for random_better-accuracy_score-num_gurus-3: 0.8+-0.071
Mean train acc for random_better-accuracy_score-num_gurus-5: 0.821+-0.049
Mean train acc for random_better-accuracy_score-num_gurus-7: 0.828+-0.044
Mean train acc for random_better-accuracy_score-num_gurus-9: 0.818+-0.041
Mean train acc for random_better-accuracy_score-num_gurus-11: 0.82+-0.036
Mean train acc for probabilistic_better-accuracy_score-num_gurus-1: 0.776+-0.103
Mean train acc for probabilistic_better-accuracy_score-num_gurus-3: 0.83+-0.056
Mean train acc for probabilistic_better-accuracy_score-num_gurus-5: 0.841+-0.039
Mean train acc for probabilistic_better-accuracy_score-num_gurus-7: 0.832+-0.045
Mean train acc for probabilistic_better-accuracy_score-num_gurus-9: 0.837+-0.035
Mean train acc for probabilistic_better-accuracy_score-num_gurus-11: 0.837+-0.033
Mea



In [9]:
# print(results_dict)
# make data frame with three columns: name, mean, std
df = pd.DataFrame.from_dict(results_dict, orient="index", columns=["mean", "std"])
df = df.reset_index()
df = df.rename(columns={"index": "name"})
df = df.sort_values(by="mean", ascending=False, ignore_index=True)
# write to csv in results/keepers/class_inc_avgs.csv
df.to_csv("results/keepers/many_v_class_inc_avgs.csv")

In [11]:
exp.batch_metric_values["max_diversity-f1_score-num_gurus-1"]

{0: {'batch_train_acc': [1.0,
   1.0,
   0.984375,
   0.9921875,
   1.0,
   1.0,
   0.9921875,
   0.9921875,
   0.9921875,
   1.0,
   1.0,
   0.9921875,
   1.0,
   1.0,
   1.0,
   0.9921875,
   0.984375,
   0.984375,
   0.984375,
   1.0,
   1.0,
   1.0,
   0.9921875,
   0.9921875,
   1.0,
   1.0,
   0.9921875,
   1.0,
   0.9921875,
   0.984375,
   0.984375,
   1.0,
   0.9921875,
   1.0,
   1.0,
   0.9921875,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   0.9921875,
   0.9921875,
   1.0,
   1.0,
   0.9921875,
   1.0,
   1.0,
   1.0,
   0.984375,
   1.0,
   1.0,
   1.0,
   0.9921875,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   0.9921875,
   0.9921875,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   0.9921875,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   0.9921875,
   1.0,
   1.0,
   0.9921875,
   0.9921875,
   0.9921875,
   0.9921875,
   1.0,
   1.0,
   1.0,
   0.9921875,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   0.9921875,
   1.0,
   0.9921875,
   0.9921875,
   1.0,

# Explore Results

(leftover copied code from other file, not adapted for the above code)

In [13]:
print(len(exp.ensembles))
for ens in exp.ensembles:
    print(ens.name)

26
full-ensemble
random_better-accuracy_score-num_gurus-1
random_better-accuracy_score-num_gurus-3
random_better-accuracy_score-num_gurus-5
random_better-accuracy_score-num_gurus-7
random_better-accuracy_score-num_gurus-9
random_better-accuracy_score-num_gurus-11
probabilistic_better-accuracy_score-num_gurus-1
probabilistic_better-accuracy_score-num_gurus-3
probabilistic_better-accuracy_score-num_gurus-5
probabilistic_better-accuracy_score-num_gurus-7
probabilistic_better-accuracy_score-num_gurus-9
probabilistic_better-accuracy_score-num_gurus-11
probabilistic_weighted-accuracy_score-num_gurus-1
probabilistic_weighted-accuracy_score-num_gurus-3
probabilistic_weighted-accuracy_score-num_gurus-5
probabilistic_weighted-accuracy_score-num_gurus-7
probabilistic_weighted-accuracy_score-num_gurus-9
probabilistic_weighted-accuracy_score-num_gurus-11
max_diversity-accuracy_score-num_gurus-1
max_diversity-accuracy_score-num_gurus-3
max_diversity-accuracy_score-num_gurus-5
max_diversity-accuracy_

In [14]:
print(exp.ensembles[-5].name)
batch_accs = []
for voter in exp.ensembles[-5].voters:
    batch_accs.append(voter.batch_accuracies)
# exp.ensembles[-5].voters[0].batch_accuracies

max_diversity-accuracy_score-num_gurus-5


In [15]:
def find_active_streaks(voter_id, trial_num):
    """
    Find active streaks for a specified voter.

    :param voter_id: ID of the voter for which to find active streaks.
    :param batch_metric_values: Dictionary containing the batch metric values.
    :param metric_key: Key to access the relevant metric in batch_metric_values.
    :return: List of active streaks for the specified voter.
    """
    active_batches = []
    active_streak = [None, None]
    voter_active = False

    for i, av in enumerate(
        exp.batch_metric_values["max_diversity-f1_score-num_gurus-1"][trial_num][
            "active_voters-train"
        ]
    ):
        # print(av)
        if voter_id in av:
            if not voter_active:
                # Start a new streak
                active_streak[0] = i
                voter_active = True
                # print("streak started")
            active_streak[1] = i
        else:
            if voter_active:
                # End the current streak
                active_batches.append(active_streak.copy())
                active_streak = [None, None]
                voter_active = False
                # print("streak done")

    # Handle case where the streak continues till the end of the list
    if voter_active:
        active_batches.append(active_streak.copy())

    return active_batches

### Look at activity on last trial

In [16]:
for voter_id in range(n_voters):
    active_streaks = find_active_streaks(voter_id, num_trials - 1)
    print(active_streaks)
    # print(f"Active Streaks for Voter {voter_id}: {active_streaks}")

    plt.figure(figsize=(10, 5))  # Create a new figure for each voter
    plt.plot(batch_accs[voter_id])
    # plt.axvline(x=len_train, color="k", linestyle="--", linewidth=1)

    # Shade the active batches for this voter
    for streak in active_streaks:
        if streak[0] is not None and streak[1] is not None:
            plt.axvspan(streak[0], streak[1], alpha=0.3, color="red")

    # Plot a green vertical line at all train splits
    # for split in train_splits[:-1]:
    #     plt.axvline(x=split, color="g", linestyle="--", linewidth=2)

    plt.title(f"Voter {voter_id} Activity")
    plt.xlabel("Batches")
    plt.ylabel("Accuracy")
    plt.show()  # Display the plot for each voter

KeyError: 'max_diversity-f1_score-num_gurus-1'