In [None]:
import numpy as np
import tensorflow as tf
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
# Depending on the classification model use, we might need to import other packages.
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from datasets import DatasetUCI
from envs import LalEnvFirstAccuracy
from helpers import Minibatch, ReplayBuffer
from dqn import DQN
from Test_AL import policy_rl
import matplotlib.pyplot as plt
import random

Parameters for dataset and model.

- australian: 690
- breast_cancer: 263
- diabetis: 768
- flare_solar: 144
- german: 1000
- heart: 270
- mushrooms: 8124
- waveform: 5000
- wdbc: 569

In [None]:
N_STATE_ESTIMATION = 30
SIZE = -1
SUBSET = -1 # -1 for using all data points, 0 for even, 1 for odd.
N_JOBS = 1 # Can set more if we want to parallelise.
# Remove the dataset that will be used for testing.
# ['australian', 'breast_cancer', 'diabetis', 'flare_solar', 'german', 'heart', 'mushrooms', 'waveform', 'wdbc']
# possible_dataset_names = ['breast_cancer', 'diabetis', 'flare_solar', 'german', 'heart', 'mushrooms', 'waveform', 'wdbc']
possible_dataset_names = ['mushrooms']
test_dataset_names = ['waveform']
# The quality is measured according to a given quality measure "quality_method". 
QUALITY_METHOD = metrics.accuracy_score

In [None]:
import shutil
import os

cwd = os.getcwd() # Find current directory.

# Delete following directories if they exist.
shutil.rmtree(cwd+'/__pycache__', ignore_errors=True)
shutil.rmtree(cwd+'/agents', ignore_errors=True)
shutil.rmtree(cwd+'/AL_results', ignore_errors=True)
shutil.rmtree(cwd+'/checkpoints', ignore_errors=True)
shutil.rmtree(cwd+'/summaries', ignore_errors=True)
shutil.rmtree(cwd+'/Output images', ignore_errors=True)

Initialise a dataset that will contain a sample of datapoint from one the indicated classes.

In [None]:
dataset = DatasetUCI(possible_dataset_names, n_state_estimation=N_STATE_ESTIMATION, subset=SUBSET, size=SIZE)
# If we want to measure test error along with training.
dataset_test = DatasetUCI(test_dataset_names, n_state_estimation=N_STATE_ESTIMATION, subset=SUBSET, size=SIZE)
# dataset_test: Diabetis datasets consists of 768 data.

In [None]:
model = LogisticRegression(n_jobs=N_JOBS)

Initialise the environment.

In [None]:
env = LalEnvFirstAccuracy(dataset, model, quality_method=QUALITY_METHOD)
env_test = LalEnvFirstAccuracy(dataset_test, model, quality_method=QUALITY_METHOD)
tf.reset_default_graph()

Parameters for training RL.

In [None]:
DIRNAME = './agents/1-australian-logreg-8-to-1/' # The resulting agent of this experiment will be written in a file.

# Adaptive batch size.
MAX_BATCH = 20 # Maximum batch size per iteration.

# Replay buffer parameters.
REPLAY_BUFFER_SIZE = 1e4
PRIOROTIZED_REPLAY_EXPONENT = 3

# Agent parameters.
BATCH_SIZE = 32
LEARNING_RATE = 1e-3
TARGET_COPY_FACTOR = 0.01
BIAS_INITIALIZATION = 0 # Default 0 # will be set to minus half of average duration during warm start experiments.

# Warm start parameters.
WARM_START_EPISODES = 150 # Reduce for test.
NN_UPDATES_PER_WARM_START = 100

# Episode simulation parameters.
EPSILON_START = 1
EPSILON_END = 0.1
EPSILON_STEPS = 1000

# Training parameters.
TRAINING_EPOCHS = 1000 # Reduce for test.
TRAINING_EPISODES_PER_EPOCH = 10 # At each training iteration x episodes are simulated.
NN_UPDATES_PER_EPOCHS = 60 # At each training iteration x gradient steps are made.

# Validation and test parameters.
VALIDATION_ITERATIONS = 500 # Reduce for test.
TESTING_ITERATIONS = 500 # Reduce for test.
VALIDATION_TESTING_FREQUENCY = 100 # Every x iterations, val and test are performed.

Initialise replay buffer.

In [None]:
replay_buffer = ReplayBuffer(buffer_size=REPLAY_BUFFER_SIZE, prior_exp=PRIOROTIZED_REPLAY_EXPONENT)

Warm-start the replay buffer with random episodes. 

Collect episodes.

In [None]:
# Keep track of episode duration to compute average.
episode_durations = []
episode_scores = []
episode_number = 1

for _ in range(WARM_START_EPISODES):
    
    print("Episode {}.".format(episode_number))
    # Reset the environment to start a new episode.
    # The classifier_state contains vector representation of state of the environment (depends on the classifier).
    # The next_action contains vector representations of all actions available to be taken at the next step.
    state, next_action, reward = env.reset()
    batch = 6
    done = False
    episode_duration = 6

    # Before we reach a terminal state, make steps.
    while not done:

        # Choose a random action.
        batch = random.randint(1,MAX_BATCH)
        if batch > env.n_actions:
            done = True
            break

        # Getting numbers from 0 to n_actions.
        inputNumbers =range(0,env.n_actions)

        # Non-repeating using sample() function.
        a = np.array(random.sample(inputNumbers, batch))
        action = next_action[:,a]
        next_state, next_action, reward, done = env.step(a)
        # Store the transition in the replay buffer.
        buffer_action = []
        for _ in range(MAX_BATCH):
            buffer_action.append([0,0,0])
        for i in range(len(action.T)):
            buffer_action[i]=action.T[0]
        replay_buffer.store_transition(state, buffer_action, reward, next_state, next_action, done)
        # Get ready for next step.
        state = next_state

        episode_duration += batch

    episode_final_acc = env.return_episode_qualities()
    episode_scores.append(episode_final_acc[-1])    
    episode_durations.append(episode_duration)
    episode_number+=1

# Compute the average episode duration of episodes generated during the warm start procedure.
av_episode_duration = np.mean(episode_durations)
BIAS_INITIALIZATION = -av_episode_duration/2

Plots for warm-start episodes.

In [None]:
# Plot total budget size per episode.
# Total number of episodes: 100.
xpoints = np.array(range(0,len(episode_durations)))
ypoints = np.array([x/len(dataset.train_data) for x in episode_durations])
plt.figure(figsize=(20,10))
plt.subplot(2, 1, 1)
plt.plot(xpoints, ypoints)
plot_label = "Budget per episode. *Size of unlabeled data: " + str(len(dataset.train_data))
plt.title(plot_label, loc = "left")
plt.xlabel("Episodes")
plt.ylabel("Budget size (percentage of the UD)")

# Plot final achieved accuracy per episode.
# Total number of episodes: 100.
xpoints = np.array(range(0,len(episode_scores)))
ypoints = np.array(episode_scores)
plt.figure(figsize=(20,10))
plt.subplot(2, 1, 2)
plt.plot(xpoints, ypoints)
plt.title("Final achieved accuracy per episode", loc = "left")
plt.xlabel("Episodes")
plt.ylabel("ACC")
legend_label = "Maximum ACC: " + str(max(episode_scores))[:4]
plt.legend([legend_label]) 

plt.show()

Initialize the DQN agent.

In [None]:
agent = DQN(experiment_dir=DIRNAME,
            observation_length=N_STATE_ESTIMATION,
            learning_rate=LEARNING_RATE,
            batch_size=BATCH_SIZE,
            target_copy_factor=TARGET_COPY_FACTOR,
            bias_average=BIAS_INITIALIZATION,
            max_batch=MAX_BATCH,
           )

Do updates of the network based on warm start episodes.

In [None]:
for _ in range(NN_UPDATES_PER_WARM_START):
    
    # Sample a batch from the replay buffer proportionally to the probability of sampling.
    minibatch = replay_buffer.sample_minibatch(BATCH_SIZE)

    # Use batch to train an agent. Keep track of temporal difference errors during training.
    td_error = agent.train(minibatch)

    # Update probabilities of sampling each datapoint proportionally to the error.
    replay_buffer.update_td_errors(td_error, minibatch.indices)

# Train RL

Run multiple training iterations. Each iteration consists of:
- Generating episodes following agent's actions with exploration.
- Validation and test episodes for evaluating performance.
- Q-network updates.

In [None]:
train_episode_rewards = []
i_episode = 0

In [None]:
is_training = True
final_episode_scores_training = []
final_episode_durations_training = []
final_episode_scores_validation = []
final_episode_durations_validation = []
final_episode_scores_testing = []
final_episode_durations_testing = []
validation_and_testing_round = 0

for iteration in range(TRAINING_EPOCHS):

    print("ITERATION {}.".format(iteration+1))
    # GENERATE NEW EPISODES.
    # Compute epsilon value according to the schedule.
    epsilon = max(EPSILON_END, EPSILON_START-iteration*(EPSILON_START-EPSILON_END)/EPSILON_STEPS)

    # Simulate training episodes.
    episode_scores_training = []
    episode_durations_training = []
    episode_duration = 6

    for _ in range(TRAINING_EPISODES_PER_EPOCH):

        # Reset the environment to start a new episode.
        state, next_action, reward = env.reset()
        batch = 6
        done = False
        done = False
        
        # Run an episode.
        while not done:
            train_batch = batch
            train_batch = env._find_batch_size(train_batch, reward, env.n_actions, MAX_BATCH)
            if train_batch > env.n_actions:
                done = True
                break
            episode_duration += train_batch
            action = agent.get_action(state, next_action, train_batch)
            if np.random.ranf() < epsilon:
                batch = random.randint(1,MAX_BATCH)
                inputNumbers =range(0,env.n_actions)
                action = np.array(random.sample(inputNumbers, batch))

            # With epsilon probability, take a random action.
            # taken_action is a vector that corresponds to a taken action.
            taken_action = next_action[:,action]

            # Make another step.
            next_state, next_action, reward, done = env.step(action)

            # Store a step in replay buffer.
            buffer_action = []
            for _ in range(MAX_BATCH):
                buffer_action.append([0,0,0])
            for i in range(len(taken_action.T)):
                buffer_action[i]=taken_action.T[0]
            replay_buffer.store_transition(state, buffer_action, reward, next_state, next_action, done)
            # Change a state of environment.
            state = next_state

            episode_duration += train_batch

        episode_final_acc_training = env.return_episode_qualities()
        episode_scores_training.append(episode_final_acc_training[-1])
        episode_durations_training.append(episode_duration)

    final_episode_scores_training.append(episode_scores_training)
    final_episode_durations_training.append(episode_durations_training)

    # VALIDATION AND TEST EPISODES.
    if iteration%VALIDATION_TESTING_FREQUENCY == 0:
        
        validation_and_testing_round+=1
        print("Validation and testing round: ", validation_and_testing_round)

        # Validation episodes are run. Use env for it.
        episode_scores_validation = []
        episode_durations_validation = []

        for i in range(VALIDATION_ITERATIONS):
            print("Validation round:", i)
            episode_duration = 6
            validation_batch = 6
            done = False
            state, next_action, reward = env.reset()
            while not(done):
                validation_batch = env._find_batch_size(validation_batch, reward, env.n_actions, MAX_BATCH)
                if validation_batch > env.n_actions:
                    done = True
                else:
                    action = policy_rl(agent, state, next_action, validation_batch)        
                    next_state, next_action, reward, done = env.step(action)
                    state = next_state
                episode_duration += validation_batch
            episode_final_acc_validation = env.return_episode_qualities()
            episode_scores_validation.append(episode_final_acc_validation[-1])
            episode_durations_validation.append(episode_duration)
        
        # Test episodes are run. Use env_test for it.
        episode_scores_testing = []
        episode_durations_testing = []

        for i in range(TESTING_ITERATIONS):
            print("Testing round:", i)
            episode_duration = 6
            test_batch = 6
            done = False
            state, next_action, reward = env_test.reset()
            while not(done):
                test_batch = env_test._find_batch_size(test_batch, reward, env_test.n_actions, MAX_BATCH)
                if test_batch > env_test.n_actions:
                    done = True
                else:
                    action = policy_rl(agent, state, next_action, test_batch)       
                    next_state, next_action, reward, done = env_test.step(action)
                    state = next_state
                episode_duration += test_batch
                episode_final_acc_testing = env_test.return_episode_qualities()
                episode_scores_testing.append(episode_final_acc_testing[-1])
                episode_durations_testing.append(episode_duration)

        final_episode_scores_validation.append(episode_scores_validation)
        final_episode_durations_validation.append(episode_durations_validation)
        final_episode_scores_testing.append(episode_scores_testing)
        final_episode_durations_testing.append(episode_durations_testing)

    # NEURAL NETWORK UPDATES.
    for _ in range(NN_UPDATES_PER_EPOCHS):
        minibatch = replay_buffer.sample_minibatch(BATCH_SIZE)
        td_error = agent.train(minibatch)
        replay_buffer.update_td_errors(td_error, minibatch.indices)

- Total number of validation and testing epochs: 10
- Total number of episodes per validation and testing epochs: 500

#### Plots.

In [None]:
# Create the folder to store the results from the experiments.
results_path = r'./Output images' 
if not os.path.exists(results_path):
    os.makedirs(results_path)

Maximum accuracy per epoch.

In [None]:
# Find the maximum accuracy per training epoch (each epoch consists of 10 episodes)
# and their respective budgets.
max_score_per_training_epoch = []
budget_for_max_score = []
for i in range(len(final_episode_scores_training)):
    max_score_per_training_epoch.append(max(final_episode_scores_training[i]))
    minimum_budget_per_max_ACC = []
    for j in range(len(final_episode_scores_training[i])):
        if final_episode_scores_training[i][j] == max_score_per_training_epoch[i]:
            minimum_budget_per_max_ACC.append(final_episode_durations_training[i][j])
    budget_for_max_score.append(np.array(minimum_budget_per_max_ACC).min())

# Print maximum ACC.
print("The maximum accuracy is {}.".format(max(max_score_per_training_epoch)))

# Print smallest budget for the maximum ACC.
max_ACC = max(max_score_per_training_epoch)
minimum_budget = []
for i in range(len(max_score_per_training_epoch)):
    if max_score_per_training_epoch[i] == max_ACC:
        minimum_budget.append(budget_for_max_score[i])
print("The budget for the maximum accuracy is {}.".format(np.array(minimum_budget).min()))

In [None]:
# Plot maximum achieved accuracy per epoch and the respective budgets.
# Total number of iterations (epochs): 1000.
# Total number of episodes per epoch: 10.

xpoints = np.array(range(0,len(budget_for_max_score)))
ypoints = np.array(budget_for_max_score)
plt.figure(figsize=(20,10))
plt.subplot(2, 1, 1)
plt.plot(xpoints, ypoints, color='m')
plot_label = "Budget per max ACC. | Unlabeled data: " + str(len(dataset.train_data))
plt.title(plot_label, loc = "left")
plt.xlabel("Epochs")
plt.ylabel("Budget")
plt.savefig("Output images/TRAINING, Budget.png")

xpoints = np.array(range(0,len(budget_for_max_score)))
ypoints = np.array([x/len(dataset.train_data) for x in budget_for_max_score])
plt.figure(figsize=(20,10))
plt.subplot(2, 1, 1)
plt.plot(xpoints, ypoints, color='k')
plot_label = "Budget (percentage of the UD) per max ACC. | Unlabeled data: " + str(len(dataset.train_data))
plt.title(plot_label, loc = "left")
plt.xlabel("Epochs")
plt.ylabel("Budget")
plt.savefig("Output images/TRAINING, Budget percentage.png")

xpoints = np.array(range(0,len(max_score_per_training_epoch)))
ypoints = np.array(max_score_per_training_epoch)
plt.figure(figsize=(20,15))
plt.subplot(2, 1, 2)
plt.plot(xpoints, ypoints, color='c')
plt.title("Max ACC per epoch", loc = "left")
plt.xlabel("Epochs")
plt.ylabel("Max ACC")
legend_1 = "Maximum ACC: " + str(np.array(max_score_per_training_epoch).max())[:4]
legend_2 = ", "
legend_3 = "Budget: " + str(np.array(minimum_budget).min())[:4]
plt.legend([legend_1 + legend_2 + legend_3])
plt.savefig("Output images/TRAINING, Max ACC.png")

plt.show()

In [None]:
# Plot total budget size per episode for the final epoch.
# Total number of episodes: 10.

xpoints = np.array(range(0,len(final_episode_durations_training[-1])))
ypoints = np.array(final_episode_durations_training[-1])
plt.figure(figsize=(20,10))
plt.subplot(2, 1, 1)
plt.plot(xpoints, ypoints, color='m')
plot_label = "Budget | Unlabeled data: " + str(len(dataset.train_data))
plt.title(plot_label, loc = "left")
plt.xlabel("Episodes")
plt.ylabel("Budget")

xpoints = np.array(range(0,len(final_episode_durations_training[-1])))
ypoints = np.array([x/len(dataset.train_data) for x in final_episode_durations_training[-1]])
plt.figure(figsize=(20,10))
plt.subplot(2, 1, 1)
plt.plot(xpoints, ypoints, color='k')
plot_label = "Budget size (percentage of the UD) | Unlabeled data: " + str(len(dataset.train_data))
plt.title(plot_label, loc = "left")
plt.xlabel("Episodes")
plt.ylabel("Budget")

xpoints = np.array(range(0,len(final_episode_scores_training[-1])))
ypoints = np.array(final_episode_scores_training[-1])
plt.figure(figsize=(20,15))
plt.subplot(2, 1, 2)
plt.plot(xpoints, ypoints, color='c')
plt.title("ACC per episode | Final epoch", loc = "left")
plt.xlabel("Episodes")
plt.ylabel("ACC")

plt.show()

Plots for validation episodes.

In [None]:
# Find the maximum accuracy per validation epoch (each epoch consists of 500 episodes).
# and their respective budgets.
max_score_per_validation_epoch = []
budget_for_max_score = []
min = 0
for i in range(len(final_episode_scores_validation)):
    max_score_per_validation_epoch.append(np.array(final_episode_scores_validation[i]).max())
    for j in range(len(final_episode_scores_validation[i])):
        if final_episode_scores_validation[i][j] == max_score_per_validation_epoch[i]:
            min = final_episode_durations_validation[i][j]
            for k in range(1, len(final_episode_durations_validation[i])):
                if final_episode_durations_validation[i][k] < min:
                    min = final_episode_durations_validation[i][k]
            budget_for_max_score.append(min)
            break
print("The maximum accuracy is {}.".format(np.array(max_score_per_validation_epoch).max()))

In [None]:
# Plot maximum achieved accuracy per validation iteration and the respective budgets.
# Total number of iterations (epochs): 10.
# Total number of episodes per iteration: 500.

xpoints = np.array(range(0,len(budget_for_max_score)))
ypoints = np.array(budget_for_max_score)
plt.figure(figsize=(20,10))
plt.subplot(2, 1, 1)
plt.plot(xpoints, ypoints, color='m')
plot_label = "Mean budget per max ACC. | Unlabeled data: " + str(len(dataset.train_data))
plt.title(plot_label, loc = "left")
plt.xlabel("Episodes")
plt.ylabel("Mean budget")

xpoints = np.array(range(0,len(budget_for_max_score)))
ypoints = np.array([x/len(dataset.train_data) for x in budget_for_max_score])
plt.figure(figsize=(20,10))
plt.subplot(2, 1, 1)
plt.plot(xpoints, ypoints, color='k')
plot_label = "Mean budget (percentage of the UD) per max ACC. | Unlabeled data: " + str(len(dataset.train_data))
plt.title(plot_label, loc = "left")
plt.xlabel("Episodes")
plt.ylabel("Mean budget")

xpoints = np.array(range(0,len(max_score_per_validation_epoch)))
ypoints = np.array(max_score_per_validation_epoch)
plt.figure(figsize=(20,15))
plt.subplot(2, 1, 2)
plt.plot(xpoints, ypoints, color='c')
plt.title("Max ACC per iteration", loc = "left")
plt.xlabel("Episodes")
plt.ylabel("Max ACC")

plt.show()

Plots for testing episodes.

In [None]:
# Find the maximum accuracy per testing epoch (each epoch consists of 500 episodes).
# and their respective budgets.
max_score_per_testing_epoch = []
budget_for_max_score = []
min = 0
for i in range(len(final_episode_scores_testing)):
    max_score_per_testing_epoch.append(np.array(final_episode_scores_testing[i]).max())
    minimum_budget_per_max_ACC = []
    for j in range(len(final_episode_scores_testing[i])):
        if final_episode_scores_testing[i][j] == max_score_per_testing_epoch[i]:
            minimum_budget_per_max_ACC.append(final_episode_durations_testing[i][j])
    budget_for_max_score.append(np.array(minimum_budget_per_max_ACC).min())

# Print maximum ACC.
print("The maximum accuracy is {}.".format(max(max_score_per_testing_epoch)))

# Print smallest budget for the maximum ACC.
max_ACC = max(max_score_per_testing_epoch)
minimum_budget = []
for i in range(len(max_score_per_testing_epoch)):
    if max_score_per_testing_epoch[i] == max_ACC:
        minimum_budget.append(budget_for_max_score[i])
print("The budget for the maximum accuracy is {}.".format(np.array(minimum_budget).min()))

In [None]:
# Plot maximum achieved accuracy per testing iteration and the respective budgets.
# Total number of iterations (epochs): 10.
# Total number of episodes per iteration: 500.

xpoints = np.array(range(0,len(budget_for_max_score)))
ypoints = np.array(budget_for_max_score)
plt.figure(figsize=(20,10))
plt.subplot(2, 1, 1)
plt.plot(xpoints, ypoints, color='m')
plot_label = "Mean budget per max ACC. | Unlabeled data: " + str(len(dataset.test_data))
plt.title(plot_label, loc = "left")
plt.xlabel("Episodes")
plt.ylabel("Mean budget")

xpoints = np.array(range(0,len(budget_for_max_score)))
ypoints = np.array([x/len(dataset.test_data) for x in budget_for_max_score])
plt.figure(figsize=(20,10))
plt.subplot(2, 1, 1)
plt.plot(xpoints, ypoints, color='k')
plot_label = "Mean budget (percentage of the UD) per max ACC. | Unlabeled data: " + str(len(dataset.test_data))
plt.title(plot_label, loc = "left")
plt.xlabel("Episodes")
plt.ylabel("Mean budget")

xpoints = np.array(range(0,len(max_score_per_testing_epoch)))
ypoints = np.array(max_score_per_testing_epoch)
plt.figure(figsize=(20,15))
plt.subplot(2, 1, 2)
plt.plot(xpoints, ypoints, color='c')
plt.title("Max ACC", loc = "left")
plt.xlabel("Episodes")
plt.ylabel("Max ACC")

plt.show()