In [None]:
import random
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from dataset import CIFAR10Dataset

### Parameters.

In [None]:
# Classifier parameters.
CLASSIFIER_NUMBER_OF_CLASSES = 10
CLASSIFIER_NUMBER_OF_EPOCHS = 50
CLASSIFIER_LEARNING_RATE = 0.01
CLASSIFIER_BATCH_SIZE = 64

# Parameters for both agents.

REPLAY_BUFFER_SIZE = 5e4
PRIOROTIZED_REPLAY_EXPONENT = 3

BATCH_SIZE = 32
LEARNING_RATE = 1e-3
TARGET_COPY_FACTOR = 0.01
BIAS_INITIALIZATION = 0

# BatchAgent's parameters.

DIRNAME = './batch_agent/' # The resulting batch_agent of this experiment will be written in a file.

WARM_START_EPISODES_BATCH_AGENT = 5
NN_UPDATES_PER_EPOCHS_BATCH_AGENT = 50

TRAINING_EPOCHS_BATCH_AGENT = 5
TRAINING_EPISODES_PER_EPOCH_BATCH_AGENT = 5

In [None]:
import os
import shutil

cwd = os.getcwd()

# Delete following directories if they exist.
for directory in [cwd+'/__pycache__', cwd+'/wandb', cwd+'/batch_agent', cwd+'/libact', cwd+'/AL_results', cwd+'/checkpoints', cwd+'/summaries', cwd+'/data']:
    if os.path.exists(directory):
        shutil.rmtree(directory, ignore_errors=True)

In [None]:
from torch.utils.data import DataLoader
from torchvision import transforms

ROOT_DIR = './data'
STATE_DATA = 5000
WARM_START_DATA = 5000
AGENT_DATA = 30000
TEST_METHODS_DATA = 10000

# Transformation for data normalization.
transform = transforms.Compose([
    transforms.ToTensor(),
])

# Create dataset instance.
cifar10_dataset = CIFAR10Dataset(ROOT_DIR, STATE_DATA, WARM_START_DATA, AGENT_DATA, TEST_METHODS_DATA, transform=transform)

train_loader = DataLoader(cifar10_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(cifar10_dataset.test_data, batch_size=64, shuffle=False)

In [None]:
print("Warm-start data are {}.".format(len(cifar10_dataset.warm_start_data)))
print("State data are {}.".format(len(cifar10_dataset.state_data)))
print("Agent data are {}.".format(len(cifar10_dataset.agent_data)))
print("Test-methods data are {}.".format(len(cifar10_dataset.test_methods_data)))
print("Test data are {}.".format(len(cifar10_dataset.test_data)))

In [None]:
# Initialize the model.
class CNNClassifier(nn.Module):
    def __init__(self):
        super(CNNClassifier, self).__init__()
        self.resnet18 = models.resnet18(pretrained=True)
        for param in self.resnet18.parameters():
            param.requires_grad = False
        
        # Modify the layers to handle smaller input sizes
        self.resnet18.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.resnet18.maxpool = nn.Identity()  # Remove the max pooling layer
        
        num_ftrs = self.resnet18.fc.in_features
        self.resnet18.fc = nn.Linear(num_ftrs, 10)

    def forward(self, x):
        x = x.reshape(-1, 3, 32, 32)
        return self.resnet18(x)

# Initialize the model and device.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
classifier = CNNClassifier()
classifier.to(device)

# Define the loss function and optimizer.
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(classifier.parameters(), lr=0.01)

In [None]:
TARGET_PRECISION = 0.0

In [None]:
from batch_envs import LalEnvFirstAccuracy
batch_env = LalEnvFirstAccuracy(cifar10_dataset, classifier, epochs=CLASSIFIER_NUMBER_OF_EPOCHS, classifier_batch_size=CLASSIFIER_BATCH_SIZE, target_precision=TARGET_PRECISION)

In [None]:
from batch_helpers import ReplayBuffer
replay_buffer = ReplayBuffer(buffer_size=REPLAY_BUFFER_SIZE, prior_exp=PRIOROTIZED_REPLAY_EXPONENT)

In [None]:
torch.cuda.empty_cache()  # Clear unused memory after each episode.

In [None]:
# WARM-START EPISODES.

import torch
import numpy as np

# Initialize the variables.
episode_durations = []
episode_scores = []
episode_number = 1
episode_losses = []
episode_precisions = []
batches = []

# Warm start procedure.
for _ in range(WARM_START_EPISODES_BATCH_AGENT):
    print("Episode {}.".format(episode_number))
    # Reset the environment to start a new episode.
    # print("- Reset.")
    state, next_action, indicies_unknown, reward = batch_env.reset(code_state="Warm-Start", target_precision=TARGET_PRECISION, target_budget=1.0)
    done = False
    episode_duration = CLASSIFIER_NUMBER_OF_CLASSES

    # Before we reach a terminal state, make steps.
    while not done:
        # Choose a random action.
        # print("-- Number of actions left: {}.".format(batch_env.n_actions))
        batch = torch.randint(1, batch_env.n_actions + 1, (1,)).item()
        # print("-- Batch: {}.".format(batch))
        batches.append(batch)

        # Get the numbers from 0 to n_actions.
        input_numbers = range(0, batch_env.n_actions)

        # Non-repeating using sample() function.
        batch_actions_indices = torch.tensor(np.random.choice(input_numbers, batch, replace=False))
        # print("batch_actions_indices", batch_actions_indices)
        action = batch
        # print("- Step.")
        next_state, next_action, indicies_unknown, reward, done = batch_env.step(batch_actions_indices)

        if next_action == []:
            next_action.append(np.array([0]))

        # Store the transition in the replay buffer.
        replay_buffer.store_transition(state, action, reward, next_state, next_action, done)

        # Get ready for the next step.
        state = next_state
        episode_duration += batch

    # Calculate the final accuracy and precision of the episode.
    episode_final_acc = batch_env.return_episode_qualities()     
    episode_scores.append(episode_final_acc[-1])
    episode_final_precision = batch_env.return_episode_precisions()     
    episode_precisions.append(episode_final_precision[-1])    
    episode_durations.append(episode_duration)  
    episode_number += 1

    torch.cuda.empty_cache()  # Clear unused memory after each episode.

# Compute the average episode duration of episodes generated during the warm start procedure.
av_episode_duration = np.mean(episode_durations)
BIAS_INITIALIZATION = - av_episode_duration / 2

In [None]:
import torch
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d

# Plot total budget size per episode.
xpoints = torch.tensor(range(0, len(episode_durations)))
ypoints = torch.tensor(episode_durations)
plt.figure(figsize=(20,10))
plt.subplot(3, 1, 1)
plt.plot(xpoints, ypoints, 'o', color='m')  # Plot points as blue circles.
xnew = torch.linspace(xpoints.min(), xpoints.max(), 500)
spl = interp1d(xpoints, ypoints, kind='cubic')
power_smooth = spl(xnew)
plt.plot(xnew, power_smooth, color='m')
plot_label = "Budget per episode. *Size of unlabeled data: " + str(len(cifar10_dataset.warm_start_data))
plt.title(plot_label, loc='left')
plt.xlabel("Episodes")
plt.ylabel("Budget size (percentage of the UD)")

# Plot total budget size (percentage of the UD) per episode.
xpoints = torch.tensor(range(0, len(episode_durations)))
ypoints = torch.tensor([x/len(cifar10_dataset.warm_start_data) for x in episode_durations])
plt.figure(figsize=(20,10))
plt.subplot(3, 1, 2)
plt.plot(xpoints, ypoints, 'o', color='k')  # Plot points as blue circles.
xnew = torch.linspace(xpoints.min(), xpoints.max(), 500)
spl = interp1d(xpoints, ypoints, kind='cubic')
power_smooth = spl(xnew)
plt.plot(xnew, power_smooth, color='k')
plot_label = "Budget per episode. *Size of unlabeled data: " + str(len(cifar10_dataset.warm_start_data))
plt.title(plot_label, loc='left')
plt.xlabel("Episodes")
plt.ylabel("Budget size (percentage of the UD)")

# Plot final achieved accuracy per episode.
xpoints = torch.tensor(range(0, len(episode_scores)))
ypoints = torch.tensor(episode_scores)
plt.figure(figsize=(20,10))
plt.subplot(3, 1, 3)
plt.plot(xpoints, ypoints, 'o', color='c')  # Plot points as blue circles.
xnew = torch.linspace(xpoints.min(), xpoints.max(), 500)
spl = interp1d(xpoints, ypoints, kind='cubic')
power_smooth = spl(xnew)
plt.plot(xnew, power_smooth, color='c')
plt.title("Final achieved accuracy per episode", loc='left')
plt.xlabel("Episodes")
plt.ylabel("ACC")
legend_label = "Maximum ACC: " + str(max(episode_scores))[:4]
plt.legend([legend_label])

# Plot final achieved precision per episode.
xpoints = torch.tensor(range(0, len(episode_precisions)))
ypoints = torch.tensor(episode_precisions)
plt.figure(figsize=(20,10))
plt.subplot(3, 1, 3)
plt.plot(xpoints, ypoints, 'o', color='y')  # Plot points as blue circles.
xnew = torch.linspace(xpoints.min(), xpoints.max(), 500)
spl = interp1d(xpoints, ypoints, kind='cubic')
power_smooth = spl(xnew)
plt.plot(xnew, power_smooth, color='y')
plt.title("Final achieved precision per episode", loc='left')
plt.xlabel("Episodes")
plt.ylabel("Precision")
legend_label = "Maximum precision: " + str(max(episode_precisions))[:4]
plt.legend([legend_label])

plt.show()

In [None]:
import torch

# Convert the list to a PyTorch tensor.
episode_precisions = torch.tensor(episode_precisions)
max_precision = torch.max(episode_precisions)

warm_start_batches = []
i=0
for precision in episode_precisions:
    if precision >= max(episode_precisions):
        warm_start_batches.append(episode_durations[i])
    i+=1
TARGET_BUDGET = min(warm_start_batches)/(len(cifar10_dataset.warm_start_data))
print("Target budget is {}.".format(TARGET_BUDGET))
# TARGET_PRECISION = max(episode_precisions)
print("Target precision is {}.".format(TARGET_PRECISION))

In [None]:
from batch_dqn import DQN
batch_agent = DQN(
            observation_length=STATE_DATA,
            learning_rate=LEARNING_RATE,
            batch_size=BATCH_SIZE,
            target_copy_factor=TARGET_COPY_FACTOR,
            bias_average=BIAS_INITIALIZATION,
           )

In [None]:
for update in range(NN_UPDATES_PER_EPOCHS_BATCH_AGENT):
    print("Update:", update+1)
    minibatch = replay_buffer.sample_minibatch(BATCH_SIZE)
    td_error = batch_agent.train(minibatch)
    replay_buffer.update_td_errors(td_error, minibatch.indices)
    torch.cuda.empty_cache()  # Clear unused memory after each update.

In [None]:
# BATCH-AGENT TRAINING.

# Initialize the agent.
agent_epoch_durations = []
agent_epoch_scores = []
agent_epoch_precisions = []

for epoch in range(TRAINING_EPOCHS_BATCH_AGENT):
    print("Training epoch {}.".format(epoch+1))

    # Simulate training episodes.
    agent_episode_durations = []
    agent_episode_scores = []
    agent_episode_precisions = []

    for training_episode in range(TRAINING_EPISODES_PER_EPOCH_BATCH_AGENT):

        print("- Training episode {}.".format(training_episode+1))

        # Reset the environment to start a new episode.
        print("- Reset.")
        state, action_batch, action_unlabeled_data, reward = batch_env.reset(code_state="Agent", target_precision=TARGET_PRECISION, target_budget=TARGET_BUDGET)
        done = False
        episode_duration = CLASSIFIER_NUMBER_OF_CLASSES
        first_batch = True

        # Run an episode.
        while not done:
            if first_batch:
                next_batch = action_batch
                next_unlabeled_data = action_unlabeled_data
                first_batch = False
            else:
                next_batch = next_action_batch_size
                next_unlabeled_data = next_action_unlabeled_data

            selected_batch, selected_indices = batch_agent.get_action(code_state="Agent", dataset=cifar10_dataset, model=classifier, state=state, next_action_batch=next_batch, next_action_unlabeled_data=next_unlabeled_data)
            print("- Step.")
            next_state, next_action_batch_size, next_action_unlabeled_data, reward, done = batch_env.step(selected_indices)
            if next_action_batch_size==[]:
                next_action_batch_size.append(np.array([0]))

            print("- Buffer.")
            replay_buffer.store_transition(state, selected_batch, reward, next_state, next_action_batch_size, done)
        
            # Change the state of the environment.
            state = torch.tensor(next_state, dtype=torch.float32).to(device)
            episode_duration += selected_batch
            print("---   Selected batch is {}.".format(selected_batch))

        print("\n")
        
        agent_episode_final_acc = batch_env.return_episode_qualities()
        agent_episode_scores.append(agent_episode_final_acc[-1])
        agent_episode_final_precision = batch_env.return_episode_precisions()
        agent_episode_precisions.append(agent_episode_final_precision[-1])
        agent_episode_durations.append(episode_duration)
        
    maximum_epoch_precision = max(agent_episode_precisions)
    minimum_batches_for_the_maximum_epoch_precision = []
    accuracy_for_the_maximum_epoch_precision = []
    for i in range(len(agent_episode_precisions)):
        if agent_episode_precisions[i] == maximum_epoch_precision:
            minimum_batches_for_the_maximum_epoch_precision.append(agent_episode_durations[i])
            accuracy_for_the_maximum_epoch_precision.append(agent_episode_scores[i])
    agent_epoch_precisions.append(maximum_epoch_precision)
    agent_epoch_scores.append(accuracy_for_the_maximum_epoch_precision)
    agent_epoch_durations.append(min(minimum_batches_for_the_maximum_epoch_precision))

    torch.cuda.empty_cache()  # Clear unused memory after each episode.

    # NEURAL NETWORK UPDATES.
    for update in range(NN_UPDATES_PER_EPOCHS_BATCH_AGENT):
        minibatch = replay_buffer.sample_minibatch(BATCH_SIZE)
        td_error = batch_agent.train(minibatch)
        replay_buffer.update_td_errors(td_error, minibatch.indices)
        torch.cuda.empty_cache()  # Clear unused memory after each update.

In [None]:
# Plot precisions.
from scipy.interpolate import make_interp_spline

warm_start_xpoints = np.array(range(0,len(episode_precisions)))
warm_start_ypoints = np.array([x*100 for x in episode_precisions])
warm_start_xnew = np.linspace(warm_start_xpoints.min(), warm_start_xpoints.max(), 150)
warm_start_spl = make_interp_spline(warm_start_xpoints, warm_start_ypoints, k=3)
warm_start_power_smooth = warm_start_spl(warm_start_xnew)

batch_agent_xpoints = np.array(range(0,len(agent_epoch_precisions)))
batch_agent_ypoints = np.array([x*100 for x in agent_epoch_precisions])
batch_agent_xnew = np.linspace(batch_agent_xpoints.min(), batch_agent_xpoints.max(), 150)
batch_agent_spl = make_interp_spline(batch_agent_xpoints, batch_agent_ypoints, k=3)
batch_agent_power_smooth = batch_agent_spl(batch_agent_xnew)

plt.figure(figsize=(20,10))
plt.plot(warm_start_xnew, warm_start_power_smooth, color='y', linewidth=2.5)
plt.plot(batch_agent_xnew, batch_agent_power_smooth, color='m', linewidth=2.5)
plt.legend(["Warm-start", "Agent"]) 
plt.xlabel("Epoch")
plt.ylabel("Precision")

plt.show()

In [None]:
# Plot budgets.
warm_start_xpoints = np.array(range(0,len(episode_durations)))
warm_start_ypoints = np.array([(x/len(CIFAR10Dataset.warm_start_data))*100 for x in episode_durations])
warm_start_xnew = np.linspace(warm_start_xpoints.min(), warm_start_xpoints.max(), 150)
warm_start_spl = make_interp_spline(warm_start_xpoints, warm_start_ypoints, k=3)
warm_start_power_smooth = warm_start_spl(warm_start_xnew)

batch_agent_xpoints = np.array(range(0,len(agent_epoch_durations)))
batch_agent_ypoints = np.array([(x/len(CIFAR10Dataset.agent_data))*100 for x in agent_epoch_durations])
batch_agent_xnew = np.linspace(batch_agent_xpoints.min(), batch_agent_xpoints.max(), 150)
batch_agent_spl = make_interp_spline(batch_agent_xpoints, batch_agent_ypoints, k=3)
batch_agent_power_smooth = batch_agent_spl(batch_agent_xnew)

plt.figure(figsize=(20,10))
plt.plot(warm_start_xnew, warm_start_power_smooth, color='y', linewidth=2.5)
plt.plot(batch_agent_xnew, batch_agent_power_smooth, color='m', linewidth=2.5)
plt.legend(["Warm-start", "Agent"]) 
plt.xlabel("Epoch")
plt.ylabel("Budget")

plt.show()

In [None]:
TESTING_EPISODES = 5
CLASSIFIER_NUMBER_OF_CLASSES = 10

Reinforced_Active_Learning = True
RAL_episode_durations = []
RAL_episode_scores = []
RAL_episode_precisions = []

Random_Sampling = True
Random_Sampling_episode_durations = []
Random_Sampling_episode_scores = []
Random_Sampling_episode_precisions = []

for episode in range(TESTING_EPISODES):
    state, action_batch, action_unlabeled_data, _ = batch_env.reset(code_state="Test methods", target_precision=TARGET_PRECISION, target_budget=TARGET_BUDGET)
    random_sampling_episode_durarion = CLASSIFIER_NUMBER_OF_CLASSES
    print("Testing episode {}.".format(episode + 1))

    # Reinforced Active Learning
    if Reinforced_Active_Learning:
        print("- Reinforced Active Learning.")
        
        RAL_episode_duration = CLASSIFIER_NUMBER_OF_CLASSES
        first_batch = True
        done = False

        while not done:
            if first_batch:
                next_batch = action_batch
                next_unlabeled_data = action_unlabeled_data
                first_batch = False
            else:
                next_batch = next_action_batch_size
                next_unlabeled_data = next_action_unlabeled_data

            selected_batch, selected_indices = batch_agent.get_action(code_state="Test methods", dataset=cifar10_dataset, model=classifier, state=state, next_action_batch=next_batch, next_action_unlabeled_data=next_unlabeled_data)
            _, next_action_batch_size, next_action_unlabeled_data, _, done = batch_env.step(selected_indices)

            RAL_episode_duration += selected_batch

        agent_episode_final_acc = batch_env.return_episode_qualities()
        RAL_episode_scores.append(agent_episode_final_acc[-1])
        agent_episode_final_precision = batch_env.return_episode_precisions()
        RAL_episode_precisions.append(agent_episode_final_precision[-1])
        RAL_episode_durations.append(RAL_episode_duration)

    # Random Sampling
    if Random_Sampling:
        print("- Random Sampling.")

        Random_Sampling_episode_duration = CLASSIFIER_NUMBER_OF_CLASSES
        done = False

        while not done:
            batch = random.randint(1, batch_env.n_actions)
            batch_actions_indices = np.array(random.sample(range(0, batch_env.n_actions), batch))
            action = batch
            _, next_action, _, _, done = batch_env.step(batch_actions_indices)

            Random_Sampling_episode_duration += batch

        episode_final_acc = batch_env.return_episode_qualities()
        Random_Sampling_episode_scores.append(episode_final_acc[-1])
        episode_final_precision = batch_env.return_episode_precisions()
        Random_Sampling_episode_precisions.append(episode_final_precision[-1])
        Random_Sampling_episode_durations.append(Random_Sampling_episode_duration)


In [None]:
# Plot precisions.

random_sampling_xpoints = np.array(range(0,len(Random_Sampling_episode_precisions)))
random_sampling_ypoints = np.array([x*100 for x in Random_Sampling_episode_precisions])
random_sampling_xnew = np.linspace(random_sampling_xpoints.min(), random_sampling_xpoints.max(), 150)
random_sampling_spl = make_interp_spline(random_sampling_xpoints, random_sampling_ypoints, k=3)
random_sampling_power_smooth = random_sampling_spl(random_sampling_xnew)

reinforced_active_learning_xpoints = np.array(range(0,len(RAL_episode_precisions)))
reinforced_active_learning_ypoints = np.array([x*100 for x in RAL_episode_precisions])
reinforced_active_learning_xnew = np.linspace(reinforced_active_learning_xpoints.min(), reinforced_active_learning_xpoints.max(), 150)
reinforced_active_learning_spl = make_interp_spline(reinforced_active_learning_xpoints, reinforced_active_learning_ypoints, k=3)
reinforced_active_learning_power_smooth = reinforced_active_learning_spl(reinforced_active_learning_xnew)

plt.figure(figsize=(20,10))
plt.plot(random_sampling_xnew, random_sampling_power_smooth, color='y', linewidth=2.5)
plt.plot(reinforced_active_learning_xnew, reinforced_active_learning_power_smooth, color='m', linewidth=2.5)
plt.legend(["RS", "RAL"]) 
plt.xlabel("Episode")
plt.ylabel("Precision")

plt.show()

In [None]:
# Plot budgets.

random_sampling_xpoints = np.array(range(0,len(Random_Sampling_episode_durations)))
random_sampling_ypoints = np.array([(x/len(cifar10_dataset.test_methods_data))*100 for x in Random_Sampling_episode_durations])
random_sampling_xnew = np.linspace(random_sampling_xpoints.min(), random_sampling_xpoints.max(), 150)
random_sampling_spl = make_interp_spline(random_sampling_xpoints, random_sampling_ypoints, k=3)
random_sampling_power_smooth = random_sampling_spl(random_sampling_xnew)

reinforced_active_learning_xpoints = np.array(range(0,len(RAL_episode_durations)))
reinforced_active_learning_ypoints = np.array([(x/len(cifar10_dataset.test_methods_data))*100 for x in RAL_episode_durations])
reinforced_active_learning_xnew = np.linspace(reinforced_active_learning_xpoints.min(), reinforced_active_learning_xpoints.max(), 150)
reinforced_active_learning_spl = make_interp_spline(reinforced_active_learning_xpoints, reinforced_active_learning_ypoints, k=3)
reinforced_active_learning_power_smooth = reinforced_active_learning_spl(reinforced_active_learning_xnew)

plt.figure(figsize=(20,10))
plt.plot(random_sampling_xnew, random_sampling_power_smooth, color='y', linewidth=2.5)
plt.plot(reinforced_active_learning_xnew, reinforced_active_learning_power_smooth, color='m', linewidth=2.5)
plt.legend(["RS", "RAL"]) 
plt.xlabel("Episode")
plt.ylabel("Budget")

plt.show()