**Notebook Setup for Experiments**

In [None]:
!git clone https://github.com/XuyangAbert/CALFD
!pip install numpy
!pip install scipy
!pip install torch
!pip install torchvision
!pip install scikit-learn
!pip install tqdm
!pip install ipdb==0.13.9
!pip install openml==0.12.2
!pip install faiss-gpu==1.7.2
!pip install toma==1.1.0
!pip install opencv-python==4.5.5.64
!pip install wilds==2.0.0

Cloning into 'CALFD'...
remote: Enumerating objects: 375, done.[K
remote: Counting objects: 100% (294/294), done.[K
remote: Compressing objects: 100% (162/162), done.[K
remote: Total 375 (delta 192), reused 184 (delta 131), pack-reused 81[K
Receiving objects: 100% (375/375), 191.86 KiB | 4.46 MiB/s, done.
Resolving deltas: 100% (220/220), done.
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_

In [None]:
import sys
sys.path.append("/content/CALFD/deepALplus-master")
import argparse
import numpy as np
import warnings
import torch
from utils import get_dataset, get_net, get_net_lpl, get_net_waal, get_strategy
from pprint import pprint

torch.set_printoptions(profile='full')

import sys
import os
import re
import random
import math
import datetime

# import arguments
from parameters import *
from utils import *

**Parameters Setup/Specifications**

In [None]:
initseed = 100 # Set this value to 100 for CIFAR-10, MNIST, FashionMNIST, and SVHN datasets
# For CIFAR-100, TinyImageNet datsets, set this value to 1000.
NUM_QUERY = 4900 # The value specificies the labeling budget.
# For Each dataset, test for NUM_QUERY + initseed = round(label_ratio * Ns)
# where Ns denotes the number of samples in the training set
# label_ratio should be set as 1%, 5%, 10%, 15%, and 20%, respectively
NUM_ROUND = 1 # fix this value
DATA_NAME = "CIFAR10" # Change the dataset name here
# Several benchmark datasets are: CIFAR-10, CIFAR-100, MNIST, FashionMNIST, SVHN, and TinyImageNet
# Number of iterations for repeating each experiment
iteration = 10

Specificy the active learning strategy:
1. "EntropySampling"
2. "CALFD"
4. "LeastConfidence"
5. "MarginSampling"
6. "LossPredictionLoss"
7. "BadgeSampling"
11. "BALDDropout"



Note: CALFD is our approach

In [None]:
ALstrategy = "CALFD"

In [None]:
# device
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# #recording
# sys.stdout = Logger(os.path.abspath('') + '/logfile/' + DATA_NAME+ '_'  + STRATEGY_NAME + '_' + str(NUM_QUERY) + '_' + str(NUM_INIT_LB) +  '_' + str(args_input.quota) + '_normal_log.txt')
warnings.filterwarnings('ignore')

all_acc = []
all_f1 = []
acq_time = []

args_input = {}
dataset_name = DATA_NAME

# repeate # iteration trials
while (iteration > 0):
	iteration = iteration - 1
	# data, network, strategy
	args_task = args_pool[dataset_name]
	dataset = get_dataset(dataset_name, args_task)				# load dataset
	if ALstrategy == 'LossPredictionLoss':
		net = get_net_lpl(dataset_name, args_task, device)		# load network
	elif ALstrategy == 'WAAL':
		net = get_net_waal(dataset_name, args_task, device)		# load network
	else:
		net = get_net(dataset_name, args_task, device)			# load network
	strategy = get_strategy(ALstrategy, dataset, net, args_input, args_task)  # load strategy
	start = datetime.datetime.now()
  # generate initial labeled pool
	dataset.initialize_labels(initseed)
  #record acc performance
	acc = np.zeros(NUM_ROUND + 1)
	f1 = np.zeros(NUM_ROUND + 1)
  # only for special cases that need additional data
	new_X = torch.empty(0)
	new_Y = torch.empty(0)
	# One-time query for ALCS-Diversity
	if ALstrategy == 'CALFD':
		strategy.train()
		q_idxs = strategy.query(NUM_QUERY*(NUM_ROUND))
		strategy.update(q_idxs)
		strategy.train()
		preds = strategy.predict(dataset.get_test_data())
		acc = dataset.cal_test_acc(preds)
		f1 = dataset.cal_test_f1(preds)
		all_acc.append(acc)
		all_f1.append(f1)
		# print('testing accuracy {}'.format(acc))
	else:
		# round 0 accuracy
		if ALstrategy == 'WAAL':
			strategy.train(model_name = ALstrategy)
		else:
			strategy.train()

		# round 1 to rd
		for rd in range(1, NUM_ROUND+1):
			# print('Round {}'.format(rd))
			high_confident_idx = []
			high_confident_pseudo_label = []
			# query
			if 'CEALSampling' in ALstrategy:
				q_idxs, new_data = strategy.query(NUM_QUERY, rd, option = ALstrategy[13:])
			else:
				q_idxs = strategy.query(NUM_QUERY)

			# update
			strategy.update(q_idxs)
			#train
			if 'CEALSampling' in ALstrategy:
				strategy.train(new_data)
			elif ALstrategy == 'WAAL':
				strategy.train(model_name = ALstrategy)
			else:
				strategy.train()
			# round rd accuracy
			preds = strategy.predict(dataset.get_test_data())
			acc[rd] = dataset.cal_test_acc(preds)
			f1[rd] = dataset.cal_test_f1(preds)
		all_acc.append(acc[-1])
		all_f1.append(f1[-1])
	print("Iteration "+str(iteration)+" Finished!")
print("Mean of Acc across ten iterations for "+str(ALstrategy)+" : ", np.mean(all_acc))
print("Standard Deviation of Acc across ten iterations for "+str(ALstrategy)+" : ", np.std(all_acc))
print("Mean of Macro-F1 across ten iterations for "+str(ALstrategy)+" : ", np.mean(all_f1))
print("Standard Deviation of Macro-F1 across ten iterations for "+str(ALstrategy)+" : ", np.std(all_f1))