# CIFAR-10

In [26]:
import mlflow
import numpy as np
import pylab as plt
import pandas as pd

client = mlflow.tracking.MlflowClient(tracking_uri='file:///home/dhuseljic/mlflow/ssal/updating')
experiment_id = '473961987545598420'
runs = client.search_runs(experiment_ids=experiment_id)
print('Found {} experiments'.format(len(runs)))

Found 1000 experiments


In [44]:
query_list = [
    "parameter.dataset_name = 'cifar10'",
    "parameter.model.name = 'laplace'",
    "parameter.dino_model_name = 'dinov2_vitl14'",
    "parameter.num_init_samples = '50'",
    "parameter.num_new_samples = '10'",
    "parameter.model.num_epochs = '500'",
    "parameter.update_lmb = '1'",
    "parameter.update_gamma = '1'",
]
query = ' and '.join(query_list)
runs = client.search_runs(experiment_ids=experiment_id, filter_string=query)
print('Found {} experiments'.format(len(runs)))

Found 200 experiments


In [45]:
base_accuracies = []
updated_accuracies = []
retrained_accuracies = []

base_ACEs = []
updated_ACEs = []
retrained_ACEs = []

base_AUPRs = []
updated_AUPRs = []
retrained_AUPRs = []

num_improved = 0
for run in runs:
    if len(run.data.metrics) == 0: continue # failed run
    base_accuracies.append(run.data.metrics['base_accuracy'])
    updated_accuracies.append(run.data.metrics['updated_accuracy'])
    retrained_accuracies.append(run.data.metrics['retrained_accuracy'])
    
    base_ACEs.append(run.data.metrics['base_ACE'])
    updated_ACEs.append(run.data.metrics['updated_ACE'])
    retrained_ACEs.append(run.data.metrics['retrained_ACE'])

    base_AUPRs.append(run.data.metrics['base_AUPR'])
    updated_AUPRs.append(run.data.metrics['updated_AUPR'])
    retrained_AUPRs.append(run.data.metrics['retrained_AUPR'])

    if updated_accuracies[-1] > base_accuracies[-1]:
        num_improved += 1
    else:
        print(base_accuracies[-1], updated_accuracies[-1], retrained_accuracies[-1])
        # Retraining test?

    # if np.isnan(updated_ACEs[-1]):
    #    print(run.data.params['random_seed'], base_accuracies[-1], updated_accuracies[-1])
print('Improvment {} / {}'.format(num_improved, len(runs)))


0.8995000123977661 0.8665000200271606 0.9003999829292297
0.9366000294685364 0.9365000128746033 0.935699999332428
0.8824999928474426 0.8824999928474426 0.8822000026702881
0.8693000078201294 0.8691999912261963 0.8682000041007996
0.9028000235557556 0.34470000863075256 0.9168000221252441
Improvment 195 / 200


In [42]:
results = {
    'accuracy': [np.mean(base_accuracies), np.mean(updated_accuracies), np.mean(retrained_accuracies)],
    'ACE': [np.mean(base_ACEs), np.mean(updated_ACEs), np.mean(retrained_ACEs)],
    'AUPR': [np.mean(base_AUPRs), np.mean(updated_AUPRs), np.mean(retrained_AUPRs)],
}
df = pd.DataFrame(results).T

df.columns = ['base', 'updated', 'retrained']

print(df.to_latex(float_format="{:.3f}".format))

\begin{tabular}{lrrr}
\toprule
 & base & updated & retrained \\
\midrule
accuracy & 0.660 & 0.677 & 0.710 \\
ACE & 0.134 & 0.136 & 0.139 \\
AUPR & 0.657 & 0.666 & 0.689 \\
\bottomrule
\end{tabular}



# SVHN

In [50]:
import mlflow
import numpy as np
import pylab as plt
import pandas as pd

client = mlflow.tracking.MlflowClient(tracking_uri='file:///home/dhuseljic/mlflow/ssal/updating_svhn')
experiment_id = '620601089375835831'

runs = client.search_runs(experiment_ids=experiment_id)
print('Found {} experiments'.format(len(runs)))

Found 1000 experiments


In [51]:
query_list = [
    "parameter.model.name = 'laplace'",
    "parameter.dino_model_name = 'dinov2_vitl14'",
    "parameter.num_init_samples = '500'",
    "parameter.num_new_samples = '100'",
    "parameter.model.num_epochs = '500'",
    "parameter.update_lmb = '1'",
    "parameter.update_gamma = '1'",
]
query = ' and '.join(query_list)
runs = client.search_runs(experiment_ids=experiment_id, filter_string=query)
print('Found {} experiments'.format(len(runs)))

Found 200 experiments


In [52]:
base_accuracies = []
updated_accuracies = []
retrained_accuracies = []

base_ACEs = []
updated_ACEs = []
retrained_ACEs = []

base_AUPRs = []
updated_AUPRs = []
retrained_AUPRs = []

for run in runs:
    if len(run.data.metrics) == 0: continue # failed run
    base_accuracies.append(run.data.metrics['base_accuracy'])
    updated_accuracies.append(run.data.metrics['updated_accuracy'])
    retrained_accuracies.append(run.data.metrics['retrained_accuracy'])
    
    base_ACEs.append(run.data.metrics['base_ACE'])
    updated_ACEs.append(run.data.metrics['updated_ACE'])
    retrained_ACEs.append(run.data.metrics['retrained_ACE'])

    base_AUPRs.append(run.data.metrics['base_AUPR'])
    updated_AUPRs.append(run.data.metrics['updated_AUPR'])
    retrained_AUPRs.append(run.data.metrics['retrained_AUPR'])


In [53]:
results = {
    'accuracy': [np.mean(base_accuracies), np.mean(updated_accuracies), np.mean(retrained_accuracies)],
    'ACE': [np.mean(base_ACEs), np.mean(updated_ACEs), np.mean(retrained_ACEs)],
    'AUPR': [np.mean(base_AUPRs), np.mean(updated_AUPRs), np.mean(retrained_AUPRs)],
}
df = pd.DataFrame(results).T

df.columns = ['base', 'updated', 'retrained']

print(df.to_latex(float_format="{:.3f}".format))

\begin{tabular}{lrrr}
\toprule
 & base & updated & retrained \\
\midrule
accuracy & 0.334 & 0.180 & 0.346 \\
ACE & 0.061 & 0.038 & 0.062 \\
AUPR & 0.953 & 0.412 & 0.950 \\
\bottomrule
\end{tabular}



# CIFAR100 

In [56]:
import mlflow
import numpy as np
import pylab as plt
import pandas as pd

client = mlflow.tracking.MlflowClient(tracking_uri='file:///home/dhuseljic/mlflow/ssal/updating_cifar100')
experiment_id = '580146024816781076'

runs = client.search_runs(experiment_ids=experiment_id)
print('Found {} experiments'.format(len(runs)))

Found 399 experiments


In [90]:
query_list = [
    "parameter.model.name = 'laplace'",
    "parameter.dino_model_name = 'dinov2_vitl14'",
    "parameter.num_init_samples = '500'",
    "parameter.num_new_samples = '100'",
    "parameter.model.num_epochs = '500'",
    "parameter.update_lmb = '1'",
    "parameter.update_gamma = '1'",
]
query = ' and '.join(query_list)
runs = client.search_runs(experiment_ids=experiment_id, filter_string=query)
print('Found {} experiments'.format(len(runs)))

Found 200 experiments


In [94]:
base_accuracies = []
updated_accuracies = []
retrained_accuracies = []

base_ACEs = []
updated_ACEs = []
retrained_ACEs = []

base_AUPRs = []
updated_AUPRs = []
retrained_AUPRs = []

num_improved = 0
for run in runs:
    if len(run.data.metrics) == 0: continue # failed run
    base_accuracies.append(run.data.metrics['base_accuracy'])
    updated_accuracies.append(run.data.metrics['updated_accuracy'])
    retrained_accuracies.append(run.data.metrics['retrained_accuracy'])
    
    base_ACEs.append(run.data.metrics['base_ACE'])
    updated_ACEs.append(run.data.metrics['updated_ACE'])
    retrained_ACEs.append(run.data.metrics['retrained_ACE'])

    base_AUPRs.append(run.data.metrics['base_AUPR'])
    updated_AUPRs.append(run.data.metrics['updated_AUPR'])
    retrained_AUPRs.append(run.data.metrics['retrained_AUPR'])

    if updated_accuracies[-1] > base_accuracies[-1]:
        num_improved += 1

    # if np.isnan(updated_ACEs[-1]):
    #    print(run.data.params['random_seed'], base_accuracies[-1], updated_accuracies[-1])
print('Improvment {} / {}'.format(num_improved, len(runs)))

Improvment 96 / 200


In [92]:
results = {
    'accuracy': [np.mean(base_accuracies), np.mean(updated_accuracies), np.mean(retrained_accuracies)],
    'ACE': [np.mean(base_ACEs), np.mean(updated_ACEs), np.mean(retrained_ACEs)],
    'AUPR': [np.mean(base_AUPRs), np.mean(updated_AUPRs), np.mean(retrained_AUPRs)],
}
df = pd.DataFrame(results).T

df.columns = ['base', 'updated', 'retrained']

print(df.to_latex(float_format="{:.3f}".format))

\begin{tabular}{lrrr}
\toprule
 & base & updated & retrained \\
\midrule
accuracy & 0.737 & 0.559 & 0.759 \\
ACE & 0.028 & NaN & 0.029 \\
AUPR & 0.834 & 0.749 & 0.848 \\
\bottomrule
\end{tabular}



# Food101

In [2]:
import mlflow
import numpy as np
import pylab as plt
import pandas as pd

client = mlflow.tracking.MlflowClient(tracking_uri='file:///home/dhuseljic/mlflow/ssal/updating_food101')
experiment_id = '102008380090652354'

runs = client.search_runs(experiment_ids=experiment_id)
print('Found {} experiments'.format(len(runs)))

Found 598 experiments


In [22]:
query_list = [
    "parameter.model.name = 'laplace'",
    "parameter.dino_model_name = 'dinov2_vitl14'",
    "parameter.num_init_samples = '1000'",
    "parameter.num_new_samples = '100'",
    "parameter.model.num_epochs = '500'",
    "parameter.update_lmb = '1'",
    "parameter.update_gamma = '1'",
]
query = ' and '.join(query_list)
runs = client.search_runs(experiment_ids=experiment_id, filter_string=query)
print('Found {} experiments'.format(len(runs)))

Found 100 experiments


In [23]:
base_accuracies = []
updated_accuracies = []
retrained_accuracies = []

base_ACEs = []
updated_ACEs = []
retrained_ACEs = []

base_AUPRs = []
updated_AUPRs = []
retrained_AUPRs = []

num_improved = 0
for run in runs:
    if len(run.data.metrics) == 0: continue # failed run
    base_accuracies.append(run.data.metrics['base_accuracy'])
    updated_accuracies.append(run.data.metrics['updated_accuracy'])
    retrained_accuracies.append(run.data.metrics['retrained_accuracy'])
    
    base_ACEs.append(run.data.metrics['base_ACE'])
    updated_ACEs.append(run.data.metrics['updated_ACE'])
    retrained_ACEs.append(run.data.metrics['retrained_ACE'])

    base_AUPRs.append(run.data.metrics['base_AUPR'])
    updated_AUPRs.append(run.data.metrics['updated_AUPR'])
    retrained_AUPRs.append(run.data.metrics['retrained_AUPR'])

    if updated_accuracies[-1] > base_accuracies[-1]:
        num_improved += 1

    # if np.isnan(updated_ACEs[-1]):
    #    print(run.data.params['random_seed'], base_accuracies[-1], updated_accuracies[-1])
print('Improvment {} / {}'.format(num_improved, len(runs)))

Improvment 59 / 100


In [24]:
results = {
    'accuracy': [np.mean(base_accuracies), np.mean(updated_accuracies), np.mean(retrained_accuracies)],
    'ACE': [np.mean(base_ACEs), np.mean(updated_ACEs), np.mean(retrained_ACEs)],
    'AUPR': [np.mean(base_AUPRs), np.mean(updated_AUPRs), np.mean(retrained_AUPRs)],
}
df = pd.DataFrame(results).T

df.columns = ['base', 'updated', 'retrained']

print(df.to_latex(float_format="{:.3f}".format))

\begin{tabular}{lrrr}
\toprule
 & base & updated & retrained \\
\midrule
accuracy & 0.821 & 0.821 & 0.833 \\
ACE & 0.037 & 0.037 & 0.039 \\
AUPR & 1.000 & 1.000 & 1.000 \\
\bottomrule
\end{tabular}



___

In [40]:
query_list = [
    "parameter.dino_model_name = 'dinov2_vitl14'",
    "parameter.num_init_samples = '10'",
    "parameter.num_new_samples = '10'",
    "parameter.model.num_epochs = '500'",
]
query = ' and '.join(query_list)
runs = client.search_runs(experiment_ids=experiment_id, filter_string=query)
print('Found {} experiments.'.format(len(runs)))

Found 1000 experiments.


In [29]:
updating_times_standard = []
retraining_times_standard = []

updating_times_optimal = []
retraining_times_optimal = []

updating_times_lr = []
retraining_times_lr = []

for run in runs:
    if len(run.data.metrics) == 0: continue
    if run.data.params['update_lmb'] == '1' and run.data.params['update_gamma'] == '1':
        updating_times_standard.append(run.data.metrics['updated_updating_time'])
        retraining_times_standard.append(run.data.metrics['retrained_retraining_time'])
    elif run.data.params['update_lmb'] == '10' and run.data.params['update_gamma'] == '1':
        updating_times_optimal.append(run.data.metrics['updated_updating_time'])
        retraining_times_optimal.append(run.data.metrics['retrained_retraining_time'])
    elif run.data.params['update_lmb'] == '1' and run.data.params['update_gamma'] == '10':
        updating_times_lr.append(run.data.metrics['updated_updating_time'])
        retraining_times_lr.append(run.data.metrics['retrained_retraining_time'])

In [30]:
results = {
    'standard': [0, np.mean(updating_times_standard), np.mean(retraining_times_standard)],
    'optimal': [0, np.mean(updating_times_optimal), np.mean(retraining_times_optimal)],
    'lr': [0, np.mean(updating_times_lr), np.mean(retraining_times_lr)],
}
df = pd.DataFrame(results).T
df.columns = ['base', 'updated', 'retrained']

print(df.to_markdown())


|          |   base |   updated |   retrained |
|:---------|-------:|----------:|------------:|
| standard |      0 | 0.0703375 |     3.66768 |
| optimal  |      0 | 0.589755  |     3.47345 |
| lr       |      0 | 0.0709146 |     3.81529 |


In [57]:
query_list = [
    "parameter.model.name = 'deterministic'",
    "parameter.dino_model_name = 'dinov2_vitl14'",
    "parameter.num_init_samples = '100'",
    "parameter.num_new_samples = '10'",
    "parameter.model.num_epochs = '500'",
    "parameter.update_lmb = '1'",
    "parameter.update_gamma = '1'",
]
query = ' and '.join(query_list)
runs = client.search_runs(experiment_ids=experiment_id, filter_string=query)
print('Found {} experiments.'.format(len(runs)))
print('ACC: {:.3f}'.format(np.mean([run.data.metrics['base_accuracy'] for run in runs])))
print('ACE: {:.3f}'.format(np.mean([run.data.metrics['base_ACE'] for run in runs])))
print('AUROC: {:.3f}'.format(np.mean([run.data.metrics['base_AUROC'] for run in runs])))
print('AUPR: {:.3f}'.format(np.mean([run.data.metrics['base_AUPR'] for run in runs])))

Found 200 experiments.
ACC: 0.936
ACE: 0.038
AUROC: 0.948
AUPR: 0.935
