In [1]:
import sys
if "/Users/carterblair/waterloo/research/BanditLiquidDem/banditLiquidDem" not in sys.path:
    sys.path.append("/Users/carterblair/waterloo/research/BanditLiquidDem/banditLiquidDem")
print(sys.path)

['/Users/carterblair/waterloo/research/BanditLiquidDem/banditLiquidDem/experiments', '/Users/carterblair/opt/anaconda3/envs/LDE/lib/python310.zip', '/Users/carterblair/opt/anaconda3/envs/LDE/lib/python3.10', '/Users/carterblair/opt/anaconda3/envs/LDE/lib/python3.10/lib-dynload', '', '/Users/carterblair/opt/anaconda3/envs/LDE/lib/python3.10/site-packages', '/Users/carterblair/waterloo/research/BanditLiquidDem/banditLiquidDem']


In [2]:
import os
from exp_framework.Ensemble import Ensemble, PretrainedEnsemble, StudentExpertEnsemble
from exp_framework.delegation import (
    DelegationMechanism,
    UCBDelegationMechanism,
    ProbaSlopeDelegationMechanism,
    RestrictedMaxGurusDelegationMechanism,
    StudentExpertDelegationMechanism,
)
from exp_framework.learning import Net
from exp_framework.experiment import (
    Experiment,
    calculate_avg_std_test_accs,
    calculate_avg_std_train_accs,
    calculate_avg_std_test_accs_per_trial,
)
from avalanche.training.supervised import Naive
from matplotlib import pyplot as plt
from exp_framework.data_utils import Data
from avalanche.benchmarks.classic import RotatedMNIST, SplitMNIST
import numpy as np
import matplotlib as mpl
import seaborn as sns
from itertools import product
import pandas as pd
import torch.optim as optim
from torch.nn import CrossEntropyLoss

from avalanche.training.plugins import (
    CWRStarPlugin,
    ReplayPlugin,
    EWCPlugin,
    TrainGeneratorAfterExpPlugin,
    LwFPlugin,
    SynapticIntelligencePlugin,
)
from exp_framework.MinibatchEvalAccuracy import MinibatchEvalAccuracy
from avalanche.training.plugins import EvaluationPlugin
from avalanche.evaluation.metrics import accuracy_metrics

from avalanche.training import EWC

  from .autonotebook import tqdm as notebook_tqdm


### learning the mapping $\mathcal{X} \rightarrow \mathcal{G}$ (i.e. $\mathcal{X} \rightarrow \mathcal{Y}\times\mathcal{C}$)

### Set up global experiment settings

In [9]:
batch_size = 128
window_size = 50
num_trials = 10
n_voters = 30

ensemble_width = 16

#### Create Delegation Mechanisms

### Create Delegation Mechanisms and Ensembles

For simplicity, only explore full ensemble and variants of ProbaSlopeDelegationMechanism since they can be created programmatically.

In [4]:
def get_ensembles_dict(lo_num_gurus=[1, 3, 5, 7, 9, 11]):
    NOOP_del_mech = DelegationMechanism(batch_size=batch_size, window_size=window_size)
    NOOP_del_mech2 = DelegationMechanism(batch_size=batch_size, window_size=window_size)

    probability_functions = [
        "random_better",
        "probabilistic_better",
        "probabilistic_weighted",
        "max_diversity",
    ]
    score_functions = [
        "accuracy_score",
        # "balanced_accuracy_score",
        # "f1_score",
        # "precision_score",
        # "recall_score",
        # "top_k_accuracy_score",
        # "roc_auc_score",
        # "log_loss_score",
        # "max_diversity",
    ]
    # probability_functions = ["max_diversity"]
    # score_functions = ["accuracy_score"]

    del_mechs = {"full-ensemble": NOOP_del_mech}
    for prob_func, score_func in product(probability_functions, score_functions):
        for num_gurus in lo_num_gurus:
            dm = ProbaSlopeDelegationMechanism(
                batch_size=batch_size,
                window_size=window_size,
                max_active=num_gurus,
                probability_function=prob_func,
                score_method=score_func,
            )
            del_mechs[f"{prob_func}-{score_func}-num_gurus-{num_gurus}"] = dm

    ensembles_dict = {
        dm_name: Ensemble(
            training_epochs=1,
            n_voters=n_voters,
            delegation_mechanism=dm,
            name=dm_name,
            input_dim=28 * 28,
            output_dim=10,
            width=16,
        )
        for dm_name, dm in del_mechs.items()
    }
    ensembles_dict["single_Net"] = Ensemble(
        training_epochs=1,
        n_voters=1,
        delegation_mechanism=NOOP_del_mech2,
        name="single_Net",
        input_dim=28 * 28,
        output_dim=10,
        width=512,
    )
    return ensembles_dict

#### Create Avalanche Strategies to Compare Against

In [5]:
def initialize_strategies_to_evaluate():
    plugins_to_evaluate = {
        "LwF": LwFPlugin(),
        "EWC": EWCPlugin(ewc_lambda=0.001),
        "SynapticIntelligence": SynapticIntelligencePlugin(si_lambda=0.5),
        # "Replay": ReplayPlugin(mem_size=100),
    }

    strategies_to_evaluate = {}
    for name, pte in plugins_to_evaluate.items():
        model = Net(input_dim=28 * 28, output_dim=10, width=512)
        optimize = optim.Adam(model.parameters(), lr=0.001)

        mb_eval = MinibatchEvalAccuracy()
        evp = EvaluationPlugin(
            accuracy_metrics(minibatch=True, epoch=True, experience=True, stream=True),
            mb_eval,
        )
        cl_strategy = Naive(
            model=model,
            optimizer=optimize,
            criterion=CrossEntropyLoss(),
            train_mb_size=batch_size,
            train_epochs=1,
            eval_mb_size=batch_size,
            # plugins=[pte, evp],
            plugins=[pte, evp, mb_eval],
        )
        # cl_strategy = EWC(
        #     model=model,
        #     optimizer=optimize,
        #     criterion=CrossEntropyLoss(),
        #     ewc_lambda=0.001,
        #     train_mb_size=batch_size,
        #     train_epochs=1,
        #     eval_mb_size=batch_size,
        # )
        strategies_to_evaluate[name] = (cl_strategy, evp)

    return strategies_to_evaluate

# Run Experiment

### Train Ensemble - single active voter

In [7]:
# Train ensembles - single guru

# data = SplitMNIST(n_experiences=5, fixed_class_order=list(range(10)))


ensembles_dict = get_ensembles_dict()

exp = Experiment(
    n_trials=num_trials,
    ensembles=list(ensembles_dict.values()),
    # benchmark=data,
    strategies_to_evaluate=initialize_strategies_to_evaluate,
    dataset_name="SplitMNIST",
)
_ = exp.run()

  0%|          | 0/10 [00:00<?, ?it/s]

Starting trial  0




Creating dataset
Training: LwF!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 63.67it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0725
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9863
-- >> End of training phase << --
Training: EWC!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 67.53it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0812
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9874
-- >> End of training phase << --
Training: SynapticIntelligence!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 54.41it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0706
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9875
-- >> End of training phase << --
Training: LwF!
-- >> Start of training phase << --
100%|██████████| 95/95 [00:01<00:00, 54.01it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 1.0963
	Top1_Acc_Epoch/tr

 10%|█         | 1/10 [15:04<2:15:37, 904.21s/it]


> Eval on experience 4 (Task 0) from test stream ended.
	Loss_Exp/eval_phase/test_stream/Task000/Exp004 = 0.0891
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp004 = 0.9713
-- >> End of eval phase << --
	Loss_Stream/eval_phase/test_stream/Task000 = 11.7357
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.1926
Starting trial  1
Creating dataset
Training: LwF!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 61.73it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0891
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9874
-- >> End of training phase << --
Training: EWC!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 55.64it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0867
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9864
-- >> End of training phase << --
Training: SynapticIntelligence!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 50.43it/s]
Epoch 0 ended

 20%|██        | 2/10 [29:47<1:58:55, 891.96s/it]


> Eval on experience 4 (Task 0) from test stream ended.
	Loss_Exp/eval_phase/test_stream/Task000/Exp004 = 0.0694
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp004 = 0.9733
-- >> End of eval phase << --
	Loss_Stream/eval_phase/test_stream/Task000 = 12.9920
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.1930
Starting trial  2
Creating dataset
Training: LwF!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 53.88it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0699
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9871
-- >> End of training phase << --
Training: EWC!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 50.89it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0660
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9905
-- >> End of training phase << --
Training: SynapticIntelligence!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:03<00:00, 30.58it/s]
Epoch 0 ended

 30%|███       | 3/10 [45:06<1:45:29, 904.17s/it]


> Eval on experience 4 (Task 0) from test stream ended.
	Loss_Exp/eval_phase/test_stream/Task000/Exp004 = 0.1287
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp004 = 0.9622
-- >> End of eval phase << --
	Loss_Stream/eval_phase/test_stream/Task000 = 8.9496
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.1908
Starting trial  3
Creating dataset
Training: LwF!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 64.34it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0779
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9862
-- >> End of training phase << --
Training: EWC!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 60.24it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0977
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9807
-- >> End of training phase << --
Training: SynapticIntelligence!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 51.53it/s]
Epoch 0 ended.

 40%|████      | 4/10 [1:01:41<1:34:01, 940.23s/it]

Starting trial  4
Creating dataset
Training: LwF!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 63.62it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0700
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9865
-- >> End of training phase << --
Training: EWC!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 61.01it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0815
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9779
-- >> End of training phase << --
Training: SynapticIntelligence!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 53.03it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0616
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9925
-- >> End of training phase << --
Training: LwF!
-- >> Start of training phase << --
100%|██████████| 95/95 [00:01<00:00, 56.79it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 1.6306


 50%|█████     | 5/10 [1:17:55<1:19:21, 952.25s/it]


> Eval on experience 4 (Task 0) from test stream ended.
	Loss_Exp/eval_phase/test_stream/Task000/Exp004 = 0.0834
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp004 = 0.9743
-- >> End of eval phase << --
	Loss_Stream/eval_phase/test_stream/Task000 = 10.2393
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.1932
Starting trial  5
Creating dataset
Training: LwF!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 57.29it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0680
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9883
-- >> End of training phase << --
Training: EWC!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 60.37it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0732
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9867
-- >> End of training phase << --
Training: SynapticIntelligence!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:02<00:00, 48.86it/s]
Epoch 0 ended

 60%|██████    | 6/10 [1:35:28<1:05:46, 986.69s/it]


> Eval on experience 4 (Task 0) from test stream ended.
	Loss_Exp/eval_phase/test_stream/Task000/Exp004 = 0.1189
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp004 = 0.9617
-- >> End of eval phase << --
	Loss_Stream/eval_phase/test_stream/Task000 = 9.3191
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.1907
Starting trial  6
Creating dataset
Training: LwF!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 64.55it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0721
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9871
-- >> End of training phase << --
Training: EWC!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 63.16it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0785
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9876
-- >> End of training phase << --
Training: SynapticIntelligence!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 51.64it/s]
Epoch 0 ended.

 70%|███████   | 7/10 [1:52:40<50:03, 1001.24s/it] 


> Eval on experience 4 (Task 0) from test stream ended.
	Loss_Exp/eval_phase/test_stream/Task000/Exp004 = 0.0845
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp004 = 0.9677
-- >> End of eval phase << --
	Loss_Stream/eval_phase/test_stream/Task000 = 10.2181
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.1919
Starting trial  7
Creating dataset
Training: LwF!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 55.79it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0747
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9873
-- >> End of training phase << --
Training: EWC!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 55.23it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0770
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9866
-- >> End of training phase << --
Training: SynapticIntelligence!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:02<00:00, 46.34it/s]
Epoch 0 ended

 80%|████████  | 8/10 [2:07:47<32:22, 971.45s/it] 


> Eval on experience 4 (Task 0) from test stream ended.
	Loss_Exp/eval_phase/test_stream/Task000/Exp004 = 0.0882
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp004 = 0.9697
-- >> End of eval phase << --
	Loss_Stream/eval_phase/test_stream/Task000 = 10.9085
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.1923
Starting trial  8
Creating dataset
Training: LwF!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 52.75it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0716
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9868
-- >> End of training phase << --
Training: EWC!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:03<00:00, 32.26it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0772
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9871
-- >> End of training phase << --
Training: SynapticIntelligence!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:03<00:00, 25.24it/s]
Epoch 0 ended

 90%|█████████ | 9/10 [2:22:17<15:39, 939.79s/it]


> Eval on experience 4 (Task 0) from test stream ended.
	Loss_Exp/eval_phase/test_stream/Task000/Exp004 = 0.1254
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp004 = 0.9617
-- >> End of eval phase << --
	Loss_Stream/eval_phase/test_stream/Task000 = 10.1598
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.1907
Starting trial  9
Creating dataset
Training: LwF!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 64.37it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0708
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9876
-- >> End of training phase << --
Training: EWC!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 64.73it/s]
Epoch 0 ended.
	Loss_Epoch/train_phase/train_stream/Task000 = 0.0775
	Top1_Acc_Epoch/train_phase/train_stream/Task000 = 0.9868
-- >> End of training phase << --
Training: SynapticIntelligence!
-- >> Start of training phase << --
100%|██████████| 99/99 [00:01<00:00, 53.83it/s]
Epoch 0 ended

100%|██████████| 10/10 [2:37:57<00:00, 947.78s/it]


> Eval on experience 4 (Task 0) from test stream ended.
	Loss_Exp/eval_phase/test_stream/Task000/Exp004 = 0.0717
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp004 = 0.9758
-- >> End of eval phase << --
	Loss_Stream/eval_phase/test_stream/Task000 = 11.5476
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.1935





### Save and Print Results

In [10]:
exp_metrics = exp.batch_metric_values
num_trials = exp.n_trials

result_dict = {}

for ens in exp_metrics.keys():
    num_contexts = len(exp_metrics[ens][0]["experience_test_acc"])

    ens_values = np.zeros((num_trials, num_contexts))

    for i in range(num_trials):
        for j in range(num_contexts):
            # ith trial, jth context
            ens_values[i, j] = exp_metrics[ens][i]["experience_test_acc"][j]

    # calculate the mean for each context
    experience_mean_values = np.mean(ens_values, axis=0)
    experience_std_values = np.std(ens_values, axis=0)

    # calculate the mean test acc for each trial
    trial_mean_values = np.mean(ens_values, axis=1)

    # calculate the mean and std of the mean test acc for each trial
    avg_test_acc = np.mean(trial_mean_values)
    std_test_acc = np.std(trial_mean_values)

    # in our final df we want to have the following columns:
    # ensemble_name, avg_test_acc, context_1_test_acc, context_2_test_acc, ..., context_n_test_acc
    # we want to set up the dictionary to be able to easily convert to a pandas dataframe
    result_dict[ens] = {
        "avg_test_acc": f"${avg_test_acc*100:.2f} \pm {std_test_acc*100:.2f}$"
    }
    for i in range(num_contexts):
        result_dict[ens][
            f"context_{i+1}_test_acc"
        ] = f"${experience_mean_values[i]*100:.2f} \pm {experience_std_values[i]*100:.2f}$"

table_df = pd.DataFrame(result_dict).T
# add the ensemble name as the first column
table_df["ensemble"] = table_df.index
table_df = table_df.reset_index(drop=True)

# move the ensemble name to the first column
cols = table_df.columns.tolist()
cols = cols[-1:] + cols[:-1]
table_df = table_df[cols]


# sort the rows based on the avg_test_acc
table_df = table_df.sort_values(by="avg_test_acc", ascending=False)
exp_name_suffix = "CAI-experiments-feb16"

file_prefix = f"many_v_class_incremental-trials={num_trials}-batch_size={batch_size}-window_size={window_size}-num_voters={n_voters}-width={ensemble_width}-{exp_name_suffix}"
path = "results"

filepath = f"{path}/{file_prefix}-tabledata-test-feb16.csv"
table_df.to_csv(filepath, index=False)

table_df.head(100)

Unnamed: 0,ensemble,avg_test_acc,context_1_test_acc,context_2_test_acc,context_3_test_acc,context_4_test_acc,context_5_test_acc
22,max_diversity-accuracy_score-num_gurus-7,$42.69 \pm 4.66$,$62.99 \pm 21.32$,$9.76 \pm 23.14$,$0.01 \pm 0.02$,$89.83 \pm 8.79$,$50.84 \pm 25.25$
16,probabilistic_weighted-accuracy_score-num_gurus-7,$41.97 \pm 6.85$,$76.25 \pm 26.43$,$2.29 \pm 6.75$,$0.03 \pm 0.08$,$80.33 \pm 17.92$,$50.96 \pm 22.67$
3,random_better-accuracy_score-num_gurus-5,$41.64 \pm 7.27$,$88.42 \pm 7.03$,$10.73 \pm 20.72$,$7.15 \pm 21.01$,$72.74 \pm 24.01$,$29.14 \pm 18.87$
10,probabilistic_better-accuracy_score-num_gurus-7,$40.52 \pm 3.50$,$82.38 \pm 9.12$,$4.03 \pm 8.91$,$0.00 \pm 0.00$,$91.53 \pm 6.07$,$24.66 \pm 17.29$
9,probabilistic_better-accuracy_score-num_gurus-5,$39.34 \pm 9.29$,$83.54 \pm 7.96$,$6.59 \pm 15.17$,$6.54 \pm 10.62$,$78.04 \pm 32.37$,$21.98 \pm 17.65$
21,max_diversity-accuracy_score-num_gurus-5,$38.25 \pm 6.71$,$86.09 \pm 12.29$,$7.80 \pm 10.06$,$0.31 \pm 0.83$,$70.00 \pm 23.22$,$27.06 \pm 21.98$
4,random_better-accuracy_score-num_gurus-7,$36.75 \pm 6.85$,$46.41 \pm 25.04$,$2.03 \pm 5.33$,$0.01 \pm 0.02$,$96.07 \pm 4.12$,$39.23 \pm 23.33$
23,max_diversity-accuracy_score-num_gurus-9,$36.26 \pm 4.71$,$26.79 \pm 25.35$,$0.00 \pm 0.00$,$0.00 \pm 0.00$,$92.90 \pm 8.02$,$61.59 \pm 17.01$
15,probabilistic_weighted-accuracy_score-num_gurus-5,$35.56 \pm 7.63$,$89.30 \pm 7.27$,$2.54 \pm 6.88$,$0.01 \pm 0.03$,$59.96 \pm 31.14$,$25.98 \pm 18.35$
20,max_diversity-accuracy_score-num_gurus-3,$35.08 \pm 8.63$,$94.36 \pm 2.41$,$4.62 \pm 5.33$,$42.93 \pm 30.92$,$19.69 \pm 12.34$,$13.78 \pm 18.99$


In [11]:
# make a df with mean and std in separate columns using result_dict
exp_metrics = exp.batch_metric_values
num_trials = exp.n_trials

result_dict = {}

for ens in exp_metrics.keys():
    num_contexts = len(exp_metrics[ens][0]["experience_test_acc"])

    ens_values = np.zeros((num_trials, num_contexts))

    for i in range(num_trials):
        for j in range(num_contexts):
            # ith trial, jth context
            ens_values[i, j] = exp_metrics[ens][i]["experience_test_acc"][j]

    # calculate the mean for each context
    experience_mean_values = np.mean(ens_values, axis=0)
    experience_std_values = np.std(ens_values, axis=0)

    # calculate the mean test acc for each trial
    trial_mean_values = np.mean(ens_values, axis=1)

    # calculate the mean and std of the mean test acc for each trial
    avg_test_acc = np.mean(trial_mean_values)
    std_test_acc = np.std(trial_mean_values)

    # in our final df we want to have the following columns:
    # ensemble_name, avg_test_acc, context_1_test_acc, context_2_test_acc, ..., context_n_test_acc
    # we want to set up the dictionary to be able to easily convert to a pandas dataframe
    result_dict[ens] = {
        "avg_test_acc": (avg_test_acc, std_test_acc),
    }
    for i in range(num_contexts):
        result_dict[ens][
            f"context_{i+1}_test_acc"
        ] = (experience_mean_values[i], experience_std_values[i])

viz_df = pd.DataFrame(result_dict).T
# add the ensemble name as the first column
viz_df["ensemble"] = viz_df.index
viz_df = viz_df.reset_index(drop=True)

# move the ensemble name to the first column
cols = viz_df.columns.tolist()
cols = cols[-1:] + cols[:-1]
viz_df = viz_df[cols]


# sort the rows based on the avg_test_acc
viz_df = viz_df.sort_values(by="avg_test_acc", ascending=False)

file_prefix = f"many_v_class_incremental-trials={num_trials}-batch_size={batch_size}-window_size={window_size}-num_voters={n_voters}-width={ensemble_width}-{exp_name_suffix}"
path = "results"

filepath = f"{path}/{file_prefix}-vizdata-test-feb16.csv"
viz_df.to_csv(filepath, index=False)

viz_df.head(100)

Unnamed: 0,ensemble,avg_test_acc,context_1_test_acc,context_2_test_acc,context_3_test_acc,context_4_test_acc,context_5_test_acc
22,max_diversity-accuracy_score-num_gurus-7,"(0.42685280867182096, 0.046591724857778544)","(0.6298733818180421, 0.21322122350562783)","(0.09759781630709767, 0.2314161489931543)","(0.00010416666666666666, 0.00020833333333333332)","(0.898323567584157, 0.0878625756163807)","(0.5083651109831407, 0.2525438559963803)"
16,probabilistic_weighted-accuracy_score-num_gurus-7,"(0.41971254492886184, 0.0684984405783983)","(0.7625205773476731, 0.2642577162003916)","(0.022916399873793125, 0.06745186859265988)","(0.00026041666666666666, 0.0007812499999999998)","(0.8032655656337738, 0.17924192592119692)","(0.5095997651224025, 0.22668790077780585)"
3,random_better-accuracy_score-num_gurus-5,"(0.416369122616344, 0.07270632164832862)","(0.8841541377937092, 0.07025500116672306)","(0.10730660858098418, 0.20716754154190262)","(0.0715218497812748, 0.21006342393610444)","(0.7274280896410346, 0.24014412126233356)","(0.29143492728471754, 0.18872776528179425)"
10,probabilistic_better-accuracy_score-num_gurus-7,"(0.40519822597284527, 0.03499663341029803)","(0.8238380702102885, 0.09120869650725318)","(0.04027119623497129, 0.0890982365810496)","(0.0, 0.0)","(0.9152595292776823, 0.060655886471500255)","(0.24662233414128423, 0.17290631658093217)"
9,probabilistic_better-accuracy_score-num_gurus-5,"(0.39338250183272083, 0.09291525386720795)","(0.8354271839646732, 0.07961377260173243)","(0.06588034722954035, 0.15173229462625515)","(0.06535696138938268, 0.1062283322124734)","(0.7804036461748183, 0.3236834442629436)","(0.2198443704051897, 0.17653178596394184)"
21,max_diversity-accuracy_score-num_gurus-5,"(0.38251666255195754, 0.06709621439858314)","(0.8609038910445044, 0.12293389551149314)","(0.07801613711053505, 0.10058701621114396)","(0.003102134143312772, 0.008317085594861759)","(0.6999866832047701, 0.2321712285539682)","(0.27057446725666523, 0.21979014037341538)"
4,random_better-accuracy_score-num_gurus-7,"(0.36748627999654115, 0.06848024411252156)","(0.4640906215426238, 0.25044897729914994)","(0.020323706476483495, 0.05328522958404879)","(5.208333333333333e-05, 0.00015625)","(0.9606844820082188, 0.0412193774914316)","(0.3922805066220462, 0.23331669007906114)"
23,max_diversity-accuracy_score-num_gurus-9,"(0.362559008657494, 0.04710417207190865)","(0.26785900454749084, 0.2535398012568469)","(0.0, 0.0)","(0.0, 0.0)","(0.9289994679391385, 0.08022095008609775)","(0.6159365708008409, 0.17011574919914177)"
15,probabilistic_weighted-accuracy_score-num_gurus-5,"(0.35557163570460626, 0.07627631068242749)","(0.8929961863686054, 0.0726861622376472)","(0.02540903559420258, 0.06882217916530971)","(0.00010416666666666666, 0.0003125)","(0.5995575877837837, 0.3113734104184359)","(0.2597912021097727, 0.18353036093410768)"
20,max_diversity-accuracy_score-num_gurus-3,"(0.3507551274254973, 0.08633530147014985)","(0.943640941030839, 0.02406090188507261)","(0.04616499096155167, 0.05331551050121002)","(0.42925304859876634, 0.3091933928367468)","(0.1968942355364561, 0.1234055671069808)","(0.13782242099987344, 0.1898954534720106)"


In [7]:
batch_metrics = exp.get_aggregate_batch_metrics()
dfs = []
for ens, metric_dict in batch_metrics.items():
    df = pd.DataFrame.from_dict(metric_dict, orient="index")
    df["ensemble_name"] = ens
    dfs.append(df)
single_active_df = pd.concat(dfs)
col_order = [len(single_active_df.columns) - 1] + list(
    range(len(single_active_df.columns) - 1)
)
single_active_df = single_active_df[single_active_df.columns[col_order]]
file_prefix = f"many_v_class_incremental-trials={num_trials}-batch_size={batch_size}_window_size={window_size}-feb10"
path = "results"

if not os.path.exists(path):
    os.mkdir(path)

filepath = f"{path}/{file_prefix}.csv"
single_active_df.to_csv(filepath)

In [8]:
# Print results - single guru

print(f"Results for mechanisms")

# Collect and print train accuracies - aggregate and by batch
train_results_dict = dict()
for ens_name, ensemble in ensembles_dict.items():
    train_acc, train_acc_std = calculate_avg_std_train_accs(exp, ens_name, num_trials)
    train_results_dict[ens_name] = (train_acc, train_acc_std)

for strat_name, (strat, eval_plugin) in initialize_strategies_to_evaluate().items():
    train_acc, train_acc_std = calculate_avg_std_train_accs(exp, strat_name, num_trials)
    train_results_dict[strat_name] = (train_acc, train_acc_std)

for ens_name, (train_acc, train_acc_std) in train_results_dict.items():
    print(
        f"Mean train acc for {ens_name}: {round(np.mean(train_acc), 3)}+-{round(np.mean(train_acc_std), 3)}"
    )
# for ens_name, (train_acc, train_acc_std) in train_results_dict.items():
#     print(f"All train accs for {ens_name}: {train_acc}")

print("--------------")

# Collect and print test accuracies
# results_dict = dict()
# for ens_name, ensemble in ensembles_dict.items():
#     test_acc, test_acc_std = calculate_avg_std_test_accs(exp, ens_name, num_trials)
#     results_dict[ens_name] = (test_acc, test_acc_std)

# for strat_name, (strat, eval_plugin) in initialize_strategies_to_evaluate().items():
#     test_acc, test_acc_std = calculate_avg_std_test_accs(exp, strat_name, num_trials)
#     results_dict[strat_name] = (test_acc, test_acc_std)


# for ens_name, (test_acc, test_acc_std) in results_dict.items():
#     print(
#         f"Mean test acc for {ens_name}: {round(np.mean(test_acc), 3)}+-{round(np.mean(test_acc_std), 3)}"
# )

print("--------------")

results_dict = dict()

for ens_name, ensemble in ensembles_dict.items():
    test_acc, _ = calculate_avg_std_test_accs_per_trial(exp, ens_name, num_trials)
    print(
        f"Mean test acc for {ens_name}: {round(np.mean(test_acc), 3)}+-{round(np.std(test_acc), 3)}"
    )
    results_dict[ens_name] = (np.mean(test_acc), np.std(test_acc))

for strat_name, (strat, eval_plugin) in initialize_strategies_to_evaluate().items():
    test_acc, _ = calculate_avg_std_test_accs_per_trial(exp, strat_name, num_trials)
    print(
        f"Mean test acc for {strat_name}: {round(np.mean(test_acc), 3)}+-{round(np.std(test_acc), 3)}"
    )
    results_dict[strat_name] = (np.mean(test_acc), np.std(test_acc))

Results for mechanisms
Mean train acc for full-ensemble: 0.793+-0.029
Mean train acc for random_better-accuracy_score-num_gurus-1: 0.75+-0.111
Mean train acc for random_better-accuracy_score-num_gurus-3: 0.8+-0.071
Mean train acc for random_better-accuracy_score-num_gurus-5: 0.821+-0.049
Mean train acc for random_better-accuracy_score-num_gurus-7: 0.828+-0.044
Mean train acc for random_better-accuracy_score-num_gurus-9: 0.818+-0.041
Mean train acc for random_better-accuracy_score-num_gurus-11: 0.82+-0.036
Mean train acc for probabilistic_better-accuracy_score-num_gurus-1: 0.776+-0.103
Mean train acc for probabilistic_better-accuracy_score-num_gurus-3: 0.83+-0.056
Mean train acc for probabilistic_better-accuracy_score-num_gurus-5: 0.841+-0.039
Mean train acc for probabilistic_better-accuracy_score-num_gurus-7: 0.832+-0.045
Mean train acc for probabilistic_better-accuracy_score-num_gurus-9: 0.837+-0.035
Mean train acc for probabilistic_better-accuracy_score-num_gurus-11: 0.837+-0.033
Mea



In [9]:
# print(results_dict)
# make data frame with three columns: name, mean, std
df = pd.DataFrame.from_dict(results_dict, orient="index", columns=["mean", "std"])
df = df.reset_index()
df = df.rename(columns={"index": "name"})
df = df.sort_values(by="mean", ascending=False, ignore_index=True)
# write to csv in results/keepers/class_inc_avgs.csv
df.to_csv("results/keepers/many_v_class_inc_avgs.csv")

In [11]:
exp.batch_metric_values["max_diversity-f1_score-num_gurus-1"]

{0: {'batch_train_acc': [1.0,
   1.0,
   0.984375,
   0.9921875,
   1.0,
   1.0,
   0.9921875,
   0.9921875,
   0.9921875,
   1.0,
   1.0,
   0.9921875,
   1.0,
   1.0,
   1.0,
   0.9921875,
   0.984375,
   0.984375,
   0.984375,
   1.0,
   1.0,
   1.0,
   0.9921875,
   0.9921875,
   1.0,
   1.0,
   0.9921875,
   1.0,
   0.9921875,
   0.984375,
   0.984375,
   1.0,
   0.9921875,
   1.0,
   1.0,
   0.9921875,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   0.9921875,
   0.9921875,
   1.0,
   1.0,
   0.9921875,
   1.0,
   1.0,
   1.0,
   0.984375,
   1.0,
   1.0,
   1.0,
   0.9921875,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   0.9921875,
   0.9921875,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   0.9921875,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   0.9921875,
   1.0,
   1.0,
   0.9921875,
   0.9921875,
   0.9921875,
   0.9921875,
   1.0,
   1.0,
   1.0,
   0.9921875,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   1.0,
   0.9921875,
   1.0,
   0.9921875,
   0.9921875,
   1.0,

# Explore Results

(leftover copied code from other file, not adapted for the above code)

In [13]:
print(len(exp.ensembles))
for ens in exp.ensembles:
    print(ens.name)

26
full-ensemble
random_better-accuracy_score-num_gurus-1
random_better-accuracy_score-num_gurus-3
random_better-accuracy_score-num_gurus-5
random_better-accuracy_score-num_gurus-7
random_better-accuracy_score-num_gurus-9
random_better-accuracy_score-num_gurus-11
probabilistic_better-accuracy_score-num_gurus-1
probabilistic_better-accuracy_score-num_gurus-3
probabilistic_better-accuracy_score-num_gurus-5
probabilistic_better-accuracy_score-num_gurus-7
probabilistic_better-accuracy_score-num_gurus-9
probabilistic_better-accuracy_score-num_gurus-11
probabilistic_weighted-accuracy_score-num_gurus-1
probabilistic_weighted-accuracy_score-num_gurus-3
probabilistic_weighted-accuracy_score-num_gurus-5
probabilistic_weighted-accuracy_score-num_gurus-7
probabilistic_weighted-accuracy_score-num_gurus-9
probabilistic_weighted-accuracy_score-num_gurus-11
max_diversity-accuracy_score-num_gurus-1
max_diversity-accuracy_score-num_gurus-3
max_diversity-accuracy_score-num_gurus-5
max_diversity-accuracy_

In [14]:
print(exp.ensembles[-5].name)
batch_accs = []
for voter in exp.ensembles[-5].voters:
    batch_accs.append(voter.batch_accuracies)
# exp.ensembles[-5].voters[0].batch_accuracies

max_diversity-accuracy_score-num_gurus-5


In [15]:
def find_active_streaks(voter_id, trial_num):
    """
    Find active streaks for a specified voter.

    :param voter_id: ID of the voter for which to find active streaks.
    :param batch_metric_values: Dictionary containing the batch metric values.
    :param metric_key: Key to access the relevant metric in batch_metric_values.
    :return: List of active streaks for the specified voter.
    """
    active_batches = []
    active_streak = [None, None]
    voter_active = False

    for i, av in enumerate(
        exp.batch_metric_values["max_diversity-f1_score-num_gurus-1"][trial_num][
            "active_voters-train"
        ]
    ):
        # print(av)
        if voter_id in av:
            if not voter_active:
                # Start a new streak
                active_streak[0] = i
                voter_active = True
                # print("streak started")
            active_streak[1] = i
        else:
            if voter_active:
                # End the current streak
                active_batches.append(active_streak.copy())
                active_streak = [None, None]
                voter_active = False
                # print("streak done")

    # Handle case where the streak continues till the end of the list
    if voter_active:
        active_batches.append(active_streak.copy())

    return active_batches

### Look at activity on last trial

In [16]:
for voter_id in range(n_voters):
    active_streaks = find_active_streaks(voter_id, num_trials - 1)
    print(active_streaks)
    # print(f"Active Streaks for Voter {voter_id}: {active_streaks}")

    plt.figure(figsize=(10, 5))  # Create a new figure for each voter
    plt.plot(batch_accs[voter_id])
    # plt.axvline(x=len_train, color="k", linestyle="--", linewidth=1)

    # Shade the active batches for this voter
    for streak in active_streaks:
        if streak[0] is not None and streak[1] is not None:
            plt.axvspan(streak[0], streak[1], alpha=0.3, color="red")

    # Plot a green vertical line at all train splits
    # for split in train_splits[:-1]:
    #     plt.axvline(x=split, color="g", linestyle="--", linewidth=2)

    plt.title(f"Voter {voter_id} Activity")
    plt.xlabel("Batches")
    plt.ylabel("Accuracy")
    plt.show()  # Display the plot for each voter

KeyError: 'max_diversity-f1_score-num_gurus-1'