<a href="https://colab.research.google.com/github/XuyangAbert/DeepALCS/blob/main/deepALplus_alcs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Notebook Setup for Experiments**

In [None]:
!git clone https://github.com/XuyangAbert/DeepALCS
!pip install numpy
!pip install scipy
!pip install torch
!pip install torchvision
!pip install scikit-learn
!pip install tqdm
!pip install ipdb==0.13.9
!pip install openml==0.12.2
!pip install faiss-gpu==1.7.2
!pip install toma==1.1.0
!pip install opencv-python==4.5.5.64
!pip install wilds==2.0.0

Cloning into 'DeepALCS'...
remote: Enumerating objects: 150, done.[K
remote: Counting objects: 100% (69/69), done.[K
remote: Compressing objects: 100% (69/69), done.[K
remote: Total 150 (delta 42), reused 0 (delta 0), pack-reused 81[K
Receiving objects: 100% (150/150), 108.58 KiB | 4.72 MiB/s, done.
Resolving deltas: 100% (70/70), done.
Collecting ipdb==0.13.9
  Downloading ipdb-0.13.9.tar.gz (16 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting jedi>=0.16 (from ipython>=7.17.0->ipdb==0.13.9)
  Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: ipdb
  Building wheel for ipdb (setup.py) ... [?25l[?25hdone
  Created wheel for ipdb: filename=ipdb-0.13.9-py3-none-any.whl size=11626 sha256=62af98a00357981bf47c1bdfe18561a7213529ee01a7966a827f058ca7f3d592
  Stored in directory: /root/.cache/pip/wheels/54/02/23

In [None]:
import sys
sys.path.append("/content/DeepALCS/deepALplus-master")
import argparse
import numpy as np
import warnings
import torch
from utils import get_dataset, get_net, get_net_lpl, get_net_waal, get_strategy
from pprint import pprint

torch.set_printoptions(profile='full')

import sys
import os
import re
import random
import math
import datetime

# import arguments
from parameters import *
from utils import *

**Parameters Setup/Specifications**

In [None]:
initseed = 100 # Set this value to 100 for CIFAR-10, MNIST, FashionMNIST, and SVHN datasets
# For CIFAR-100, TinyImageNet datsets, set this value to 1000.
NUM_QUERY = 4900 # The value specificies the labeling budget.
# For Each dataset, test for NUM_QUERY + initseed = round(label_ratio * Ns)
# where Ns denotes the number of samples in the training set
# label_ratio should be set as 1%, 5%, 10%, 15%, and 20%, respectively
NUM_ROUND = 1 # fix this value
DATA_NAME = "CIFAR10" # Change the dataset name here
# Several benchmark datasets are: CIFAR-10, CIFAR-100, MNIST, FashionMNIST, SVHN, and TinyImageNet
# Number of iterations for repeating each experiment
iteration = 10

Specificy the active learning strategy:
1. "EntropySampling"
2. "ALCS_Diversity"
4. "LeastConfidence"
5. "MarginSampling"
6. "LossPredictionLoss"
7. "BadgeSampling"
8. "CEALSampling"
9. "VAAL"
10. "WAAL"
11. "BALDDropout"



Note: ALCS_Diversity is our approach and it is named as "DeepALCS"

In [None]:
ALstrategy = "EntropySampling"

In [None]:
# device
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# #recording
# sys.stdout = Logger(os.path.abspath('') + '/logfile/' + DATA_NAME+ '_'  + STRATEGY_NAME + '_' + str(NUM_QUERY) + '_' + str(NUM_INIT_LB) +  '_' + str(args_input.quota) + '_normal_log.txt')
warnings.filterwarnings('ignore')

all_acc = []
all_f1 = []
acq_time = []

args_input = {}
dataset_name = DATA_NAME

# data, network, strategy
args_task = args_pool[dataset_name]
dataset = get_dataset(dataset_name, args_task)				# load dataset

# repeate # iteration trials
while (iteration > 0):
	iteration = iteration - 1
	# # data, network, strategy
	# args_task = args_pool[dataset_name]

	# dataset = get_dataset(dataset_name, args_task)				# load dataset
	if ALstrategy == 'LossPredictionLoss':
		net = get_net_lpl(dataset_name, args_task, device)		# load network
	elif ALstrategy == 'WAAL':
		net = get_net_waal(dataset_name, args_task, device)		# load network
	else:
		net = get_net(dataset_name, args_task, device)			# load network
	strategy = get_strategy(ALstrategy, dataset, net, args_input, args_task)  # load strategy
	start = datetime.datetime.now()
  # generate initial labeled pool
	dataset.initialize_labels(initseed)
  #record acc performance
	acc = np.zeros(NUM_ROUND + 1)
	f1 = np.zeros(NUM_ROUND + 1)
  # only for special cases that need additional data
	new_X = torch.empty(0)
	new_Y = torch.empty(0)
	# One-time query for ALCS-Diversity
	if ALstrategy == 'ALCS_Diversity':
		strategy.train()
		q_idxs = strategy.query(NUM_QUERY*(NUM_ROUND))
		strategy.update(q_idxs)
		strategy.train()
		preds = strategy.predict(dataset.get_test_data())
		acc = dataset.cal_test_acc(preds)
		all_acc.append(acc)
		# print('testing accuracy {}'.format(acc))
	else:
		# round 0 accuracy
		if ALstrategy == 'WAAL':
			strategy.train(model_name = ALstrategy)
		else:
			strategy.train()
		# round 1 to rd
		for rd in range(1, NUM_ROUND+1):
			# print('Round {}'.format(rd))
			high_confident_idx = []
			high_confident_pseudo_label = []
			# query
			if 'CEALSampling' in ALstrategy:
				q_idxs, new_data = strategy.query(NUM_QUERY, rd, option = ALstrategy[13:])
			else:
				q_idxs = strategy.query(NUM_QUERY)

			# update
			strategy.update(q_idxs)

			#train
			if 'CEALSampling' in ALstrategy:
				strategy.train(new_data)
			elif ALstrategy == 'WAAL':
				strategy.train(model_name = ALstrategy)
			else:
				strategy.train()
			# round rd accuracy
			preds = strategy.predict(dataset.get_test_data())
			acc[rd] = dataset.cal_test_acc(preds)
			f1[rd] = dataset.cal_test_f1(preds)
			# print("f1-score macro-average: ", dataset.cal_test_f1(preds))
		all_acc.append(acc[-1])
		all_f1.append(f1[-1])
	print("Iteration "+str(iteration)+" Finished!")
print("Mean of Acc across ten iterations for "+str(ALstrategy)+" : ", np.mean(all_acc))
print("Standard Deviation of Acc across ten iterations for "+str(ALstrategy)+" : ", np.std(all_acc))
print("Mean of Macro-F1 across ten iterations for "+str(ALstrategy)+" : ", np.mean(all_f1))
print("Standard Deviation of Macro-F1 across ten iterations for "+str(ALstrategy)+" : ", np.std(all_f1))

Files already downloaded and verified
Files already downloaded and verified


 50%|███████████████████████████████▌                               | 15/30 [00:06<00:09,  1.52it/s]

In [None]:
ALstrategy = "ALCS_Diversity"
iteration = 10

In [None]:
# device
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# #recording
# sys.stdout = Logger(os.path.abspath('') + '/logfile/' + DATA_NAME+ '_'  + STRATEGY_NAME + '_' + str(NUM_QUERY) + '_' + str(NUM_INIT_LB) +  '_' + str(args_input.quota) + '_normal_log.txt')
warnings.filterwarnings('ignore')

all_acc = []
all_f1 = []
acq_time = []

args_input = {}
dataset_name = DATA_NAME

# repeate # iteration trials
while (iteration > 0):
	iteration = iteration - 1
	# data, network, strategy
	args_task = args_pool[dataset_name]
	dataset = get_dataset(dataset_name, args_task)				# load dataset
	if ALstrategy == 'LossPredictionLoss':
		net = get_net_lpl(dataset_name, args_task, device)		# load network
	elif ALstrategy == 'WAAL':
		net = get_net_waal(dataset_name, args_task, device)		# load network
	else:
		net = get_net(dataset_name, args_task, device)			# load network
	strategy = get_strategy(ALstrategy, dataset, net, args_input, args_task)  # load strategy
	start = datetime.datetime.now()
  # generate initial labeled pool
	dataset.initialize_labels(initseed)
  #record acc performance
	acc = np.zeros(NUM_ROUND + 1)
	f1 = np.zeros(NUM_ROUND + 1)
  # only for special cases that need additional data
	new_X = torch.empty(0)
	new_Y = torch.empty(0)
	# One-time query for ALCS-Diversity
	if ALstrategy == 'ALCS_Diversity2':
		strategy.train()
		q_idxs = strategy.query(NUM_QUERY*(NUM_ROUND))
		strategy.update(q_idxs)
		strategy.train()
		preds = strategy.predict(dataset.get_test_data())
		acc = dataset.cal_test_acc(preds)
		all_acc.append(acc)
		# print('testing accuracy {}'.format(acc))
	else:
		# round 0 accuracy
		if ALstrategy == 'WAAL':
			strategy.train(model_name = ALstrategy)
		else:
			strategy.train()

		# round 1 to rd
		for rd in range(1, NUM_ROUND+1):
			# print('Round {}'.format(rd))
			high_confident_idx = []
			high_confident_pseudo_label = []
			# query
			if 'CEALSampling' in ALstrategy:
				q_idxs, new_data = strategy.query(NUM_QUERY, rd, option = ALstrategy[13:])
			else:
				q_idxs = strategy.query(NUM_QUERY)

			# update
			strategy.update(q_idxs)
			#train
			if 'CEALSampling' in ALstrategy:
				strategy.train(new_data)
			elif ALstrategy == 'WAAL':
				strategy.train(model_name = ALstrategy)
			else:
				strategy.train()
			# round rd accuracy
			preds = strategy.predict(dataset.get_test_data())
			acc[rd] = dataset.cal_test_acc(preds)
			f1[rd] = dataset.cal_test_f1(preds)
		all_acc.append(acc[-1])
		all_f1.append(f1[-1])
	print("Iteration "+str(iteration)+" Finished!")
print("Mean of Acc across ten iterations for "+str(ALstrategy)+" : ", np.mean(all_acc))
print("Standard Deviation of Acc across ten iterations for "+str(ALstrategy)+" : ", np.std(all_acc))
print("Mean of Macro-F1 across ten iterations for "+str(ALstrategy)+" : ", np.mean(all_f1))
print("Standard Deviation of Macro-F1 across ten iterations for "+str(ALstrategy)+" : ", np.std(all_f1))

9
Files already downloaded and verified
Files already downloaded and verified
<query_strategies.alcs_diversity.ALCS_Diversity object at 0x7f8206cda770>


100%|███████████████████████████████████████████████████████████████| 30/30 [00:09<00:00,  3.30it/s]

Round 1





fps-clustering stage finish!


100%|███████████████████████████████████████████████████████████████| 30/30 [01:01<00:00,  2.06s/it]


Iteration 9 Finished!
8
Files already downloaded and verified
Files already downloaded and verified
<query_strategies.alcs_diversity.ALCS_Diversity object at 0x7f8206c60ee0>


100%|███████████████████████████████████████████████████████████████| 30/30 [00:09<00:00,  3.24it/s]

Round 1





fps-clustering stage finish!


100%|███████████████████████████████████████████████████████████████| 30/30 [00:55<00:00,  1.86s/it]


Iteration 8 Finished!
7
Files already downloaded and verified
Files already downloaded and verified
<query_strategies.alcs_diversity.ALCS_Diversity object at 0x7f82006f1c00>


100%|███████████████████████████████████████████████████████████████| 30/30 [00:09<00:00,  3.23it/s]

Round 1





fps-clustering stage finish!


100%|███████████████████████████████████████████████████████████████| 30/30 [00:58<00:00,  1.94s/it]


Iteration 7 Finished!
6
Files already downloaded and verified
Files already downloaded and verified
<query_strategies.alcs_diversity.ALCS_Diversity object at 0x7f820e1474f0>


100%|███████████████████████████████████████████████████████████████| 30/30 [00:08<00:00,  3.37it/s]

Round 1





fps-clustering stage finish!


100%|███████████████████████████████████████████████████████████████| 30/30 [00:51<00:00,  1.72s/it]


Iteration 6 Finished!
5
Files already downloaded and verified
Files already downloaded and verified
<query_strategies.alcs_diversity.ALCS_Diversity object at 0x7f820459be20>


100%|███████████████████████████████████████████████████████████████| 30/30 [00:09<00:00,  3.30it/s]

Round 1





fps-clustering stage finish!


100%|███████████████████████████████████████████████████████████████| 30/30 [01:06<00:00,  2.23s/it]


Iteration 5 Finished!
4
Files already downloaded and verified
Files already downloaded and verified
<query_strategies.alcs_diversity.ALCS_Diversity object at 0x7f820e1474f0>


100%|███████████████████████████████████████████████████████████████| 30/30 [00:09<00:00,  3.16it/s]

Round 1





fps-clustering stage finish!


100%|███████████████████████████████████████████████████████████████| 30/30 [01:01<00:00,  2.05s/it]


Iteration 4 Finished!
3
Files already downloaded and verified
Files already downloaded and verified
<query_strategies.alcs_diversity.ALCS_Diversity object at 0x7f8204eb7340>


100%|███████████████████████████████████████████████████████████████| 30/30 [00:08<00:00,  3.50it/s]

Round 1





fps-clustering stage finish!


100%|███████████████████████████████████████████████████████████████| 30/30 [00:45<00:00,  1.53s/it]


Iteration 3 Finished!
2
Files already downloaded and verified
Files already downloaded and verified
<query_strategies.alcs_diversity.ALCS_Diversity object at 0x7f820e144460>


100%|███████████████████████████████████████████████████████████████| 30/30 [00:09<00:00,  3.28it/s]

Round 1





fps-clustering stage finish!


100%|███████████████████████████████████████████████████████████████| 30/30 [01:09<00:00,  2.32s/it]


Iteration 2 Finished!
1
Files already downloaded and verified
Files already downloaded and verified
<query_strategies.alcs_diversity.ALCS_Diversity object at 0x7f820048e9e0>


100%|███████████████████████████████████████████████████████████████| 30/30 [00:10<00:00,  2.98it/s]

Round 1





fps-clustering stage finish!


100%|███████████████████████████████████████████████████████████████| 30/30 [01:08<00:00,  2.28s/it]


Iteration 1 Finished!
0
Files already downloaded and verified
Files already downloaded and verified
<query_strategies.alcs_diversity.ALCS_Diversity object at 0x7f82003072b0>


100%|███████████████████████████████████████████████████████████████| 30/30 [00:09<00:00,  3.31it/s]

Round 1





fps-clustering stage finish!


100%|███████████████████████████████████████████████████████████████| 30/30 [01:06<00:00,  2.22s/it]


Iteration 0 Finished!
Mean of Acc across ten iterations for ALCS_Diversity :  0.62935
Standard Deviation of Acc across ten iterations for ALCS_Diversity :  0.022643818140940818
Mean of Macro-F1 across ten iterations for ALCS_Diversity :  0.6281847444023478
Standard Deviation of Macro-F1 across ten iterations for ALCS_Diversity :  0.024867157483555797
