In [1]:
import numpy as np
import tensorflow as tf
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
# Depending on the classification model use, we might need to import other packages.
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from datasets import DatasetUCI
from envs import LalEnvFirstAccuracy
from helpers import Minibatch, ReplayBuffer
from dqn import DQN
from Test_AL import policy_rl
import matplotlib.pyplot as plt
import random

import shutil
import os

from scipy.interpolate import make_interp_spline, BSpline

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Parameters for dataset and model.

- australian: 690
- breast_cancer: 263
- diabetis: 768
- flare_solar: 144
- german: 1000
- heart: 270
- mushrooms: 8124
- waveform: 5000
- wdbc: 569

In [2]:
N_STATE_ESTIMATION = 30
SIZE = -1
SUBSET = -1 # -1 for using all data points, 0 for even, 1 for odd.
N_JOBS = 1 # Can set more if we want to parallelise.
# Remove the dataset that will be used for testing.
# ['australian', 'breast_cancer', 'diabetis', 'flare_solar', 'german', 'heart', 'mushrooms', 'waveform', 'wdbc']
# possible_dataset_names = ['breast_cancer', 'diabetis', 'flare_solar', 'german', 'heart', 'mushrooms', 'waveform', 'wdbc']
possible_dataset_names = ['mushrooms']
test_dataset_names = ['waveform']
# The quality is measured according to a given quality measure "quality_method". 
QUALITY_METHOD = metrics.accuracy_score

In [3]:
cwd = os.getcwd() # Find current directory.

# Delete following directories if they exist.
shutil.rmtree(cwd+'/__pycache__', ignore_errors=True)
shutil.rmtree(cwd+'/agents', ignore_errors=True)
shutil.rmtree(cwd+'/AL_results', ignore_errors=True)
shutil.rmtree(cwd+'/checkpoints', ignore_errors=True)
shutil.rmtree(cwd+'/summaries', ignore_errors=True)
shutil.rmtree(cwd+'/Output images', ignore_errors=True)

Initialise a dataset that will contain a sample of datapoint from one the indicated classes.

In [4]:
dataset = DatasetUCI(possible_dataset_names, n_state_estimation=N_STATE_ESTIMATION, subset=SUBSET, size=SIZE)
# If we want to measure test error along with training.
dataset_test = DatasetUCI(test_dataset_names, n_state_estimation=N_STATE_ESTIMATION, subset=SUBSET, size=SIZE)
# dataset_test: Diabetis datasets consists of 768 data.

In [5]:
model = LogisticRegression(n_jobs=N_JOBS)

Initialise the environment.

In [6]:
env = LalEnvFirstAccuracy(dataset, model, quality_method=QUALITY_METHOD)
env_test = LalEnvFirstAccuracy(dataset_test, model, quality_method=QUALITY_METHOD)
tf.reset_default_graph()

LalEnv init


self.dataset
<datasets.DatasetUCI object at 0x7f1f6f3ffc88>


self.model
LogisticRegression(n_jobs=1)


self.quality_method
<function accuracy_score at 0x7f1f7088fe18>


LalEnv init


self.dataset
<datasets.DatasetUCI object at 0x7f1f6f3ffda0>


self.model
LogisticRegression(n_jobs=1)


self.quality_method
<function accuracy_score at 0x7f1f7088fe18>




Parameters for training RL.

In [7]:
DIRNAME = './agents/' # The resulting agent of this experiment will be written in a file.

# Replay buffer parameters.
REPLAY_BUFFER_SIZE = 1e4
PRIOROTIZED_REPLAY_EXPONENT = 3

# Agent parameters.
BATCH_SIZE = 32
LEARNING_RATE = 1e-3
TARGET_COPY_FACTOR = 0.01
BIAS_INITIALIZATION = 0 # Default 0 # will be set to minus half of average duration during warm start experiments.

# Warm start parameters.
WARM_START_EPISODES = 5 # Reduce for test.
NN_UPDATES_PER_WARM_START = 100

# Episode simulation parameters.
EPSILON_START = 1
EPSILON_END = 0.1
EPSILON_STEPS = 1000

# Training parameters.
TRAINING_EPOCHS = 5 # Reduce for test.
TRAINING_EPISODES_PER_EPOCH = 2 # At each training iteration x episodes are simulated.
NN_UPDATES_PER_EPOCHS = 2 # At each training iteration x gradient steps are made.

# Validation and test parameters.
VALIDATION_EPISODES = 10 # Reduce for test.
TESTING_EPISODES = 10 # Reduce for test.

Initialise replay buffer.

In [8]:
replay_buffer = ReplayBuffer(buffer_size=REPLAY_BUFFER_SIZE, prior_exp=PRIOROTIZED_REPLAY_EXPONENT)

Warm-start the replay buffer with random episodes. 

Collect episodes.

In [9]:
# Keep track of episode duration to compute average.
episode_durations = []
episode_scores = []
episode_number = 1

for _ in range(WARM_START_EPISODES):
    
    print("Episode {}.".format(episode_number))
    # Reset the environment to start a new episode.
    # The state value contains vector representation of state of the environment (depends on the classifier).
    # The next_action contains vector representations of all actions available to be taken at the next step.
    state, next_action, reward = env.reset()
    done = False
    episode_duration = 2

    # Before we reach a terminal state, make steps.
    while not done:

        # Choose a random action.
        batch = random.choice(next_action)[0]
        print("WARM-STARM, batch:", batch)
        print("\n")

        # Getting numbers from 0 to n_actions.
        inputNumbers =range(0,env.n_actions)

        # Non-repeating using sample() function.
        batch_actions_indices = np.array(random.sample(inputNumbers, batch))
        print("WARM-STARM, batch_actions_indices:", batch_actions_indices)
        print("WARM-STARM, batch_actions_indices length:", len(batch_actions_indices))
        print("\n")
        action = batch
        next_state, next_action, reward, done = env.step(batch_actions_indices)

        # Store the transition in the replay buffer.
        """ print("BUFFER")
        print("state",state)
        print("action", action)
        print("reward",reward)
        print("next_state", next_state)
        print("next_action", next_action)
        print("done", done)
        print("\n")
        print("REPLAY BUFFER STORE TRANSITION") """
        print("\n")
        print("REPLAY BUFFER STORE TRANSITION")
        replay_buffer.store_transition(state, action, reward, next_state, next_action, done)

        # Get ready for next step.
        state = next_state
        episode_duration += batch
    print("TOTAL BUDGET", episode_duration)
    
    episode_final_acc = env.return_episode_qualities()
    episode_scores.append(episode_final_acc[-1])
    print("Final ACC", episode_final_acc[-1])   
    episode_durations.append(episode_duration)
    episode_number+=1

# Compute the average episode duration of episodes generated during the warm start procedure.
av_episode_duration = np.mean(episode_durations)
BIAS_INITIALIZATION = -av_episode_duration/2

Episode 1.
envs, def reset(self, n_start=2):


IndexError: list index out of range

Plots for warm-start episodes.

In [None]:
# Plot total budget size per episode.
# Total number of episodes: 100.
xpoints = np.array(range(0,len(episode_durations)))
ypoints = np.array(episode_durations)
plt.figure(figsize=(20,10))
plt.subplot(3, 1, 1)
plt.plot(xpoints, ypoints)
plot_label = "Budget per episode. *Size of unlabeled data: " + str(len(dataset.train_data))
plt.title(plot_label, loc = "left")
plt.xlabel("Episodes")
plt.ylabel("Budget size (percentage of the UD)")

# Plot total budget size per episode.
# Total number of episodes: 100.
xpoints = np.array(range(0,len(episode_durations)))
ypoints = np.array([x/len(dataset.train_data) for x in episode_durations])
plt.figure(figsize=(20,10))
plt.subplot(3, 1, 2)
plt.plot(xpoints, ypoints)
plot_label = "Budget per episode. *Size of unlabeled data: " + str(len(dataset.train_data))
plt.title(plot_label, loc = "left")
plt.xlabel("Episodes")
plt.ylabel("Budget size (percentage of the UD)")

# Plot final achieved accuracy per episode.
# Total number of episodes: 100.
xpoints = np.array(range(0,len(episode_scores)))
ypoints = np.array(episode_scores)
plt.figure(figsize=(20,10))
plt.subplot(3, 1, 3)
plt.plot(xpoints, ypoints)
plt.title("Final achieved accuracy per episode", loc = "left")
plt.xlabel("Episodes")
plt.ylabel("ACC")
legend_label = "Maximum ACC: " + str(max(episode_scores))[:4]
plt.legend([legend_label]) 

plt.show()

Initialize the DQN agent.

In [None]:
agent = DQN(experiment_dir=DIRNAME,
            observation_length=N_STATE_ESTIMATION,
            learning_rate=LEARNING_RATE,
            batch_size=BATCH_SIZE,
            target_copy_factor=TARGET_COPY_FACTOR,
            bias_average=BIAS_INITIALIZATION,
           )

Do updates of the network based on warm start episodes.

In [None]:
for update in range(NN_UPDATES_PER_WARM_START):

    print("Update:", update)
    
    # Sample a batch from the replay buffer proportionally to the probability of sampling.
    minibatch = replay_buffer.sample_minibatch(BATCH_SIZE)

    # Use batch to train an agent. Keep track of temporal difference errors during training.
    td_error = agent.train(minibatch)

    # Update probabilities of sampling each datapoint proportionally to the error.
    replay_buffer.update_td_errors(td_error, minibatch.indices)

# Train RL

Run multiple training iterations. Each iteration consists of:
- Generating episodes following agent's actions with exploration.
- Validation and test episodes for evaluating performance.
- Q-network updates.

In [None]:
final_episode_scores_training = []
final_episode_durations_training = []

for epoch in range(TRAINING_EPOCHS):

    print("EPOCH {}.".format(epoch+1))
    
    # Compute epsilon value according to the schedule.
    epsilon = max(EPSILON_END, EPSILON_START-epoch*(EPSILON_START-EPSILON_END)/EPSILON_STEPS)

    # Simulate training episodes.
    episode_scores_training = []
    episode_durations_training = []

    for training_episode in range(TRAINING_EPISODES_PER_EPOCH):
        
        print("Training episode:", training_episode+1)
        # Reset the environment to start a new episode.
        state, next_action, reward = env.reset()
        print("state", state)
        print("length state", len(state))
        print("length next_action", len(next_action))
        print("indices known", len(env.indices_known), env.indices_known)
        print("indices unknown", len(env.indices_unknown), env.indices_unknown)
        print("n_actions", env.n_actions)
        done = False
        episode_duration = 2
        
        # Run an episode.
        while not done:
            batch = agent.get_action(state, next_action)
            batch = int(next_action[action])
            if np.random.ranf() < epsilon:
                batch = random.choice(next_action)[0]
            print("batch", batch)
            inputNumbers =range(0,env.n_actions)
            batch_actions_indices = np.array(random.sample(inputNumbers, batch))
            print(batch_actions_indices)
            # Make another step.
            next_state, next_action, reward, done = env.step(batch_actions_indices)

            # Store a step in replay buffer.
            action = batch
            replay_buffer.store_transition(state, action, reward, next_state, next_action, done)
            
            # Change a state of environment.
            state = next_state
            episode_duration += batch
        epoch_episode = epoch.__str__() + "." + training_episode.__str__()
        episode_accuracies_training = env.return_episode_qualities()
        episode_scores_training.append(episode_accuracies_training[-1])
        episode_durations_training.append(episode_duration)
    final_episode_durations_training.append(episode_durations_training)
    final_episode_scores_training.append(episode_scores_training)

    # NEURAL NETWORK UPDATES.
    for _ in range(NN_UPDATES_PER_EPOCHS):
        minibatch = replay_buffer.sample_minibatch(BATCH_SIZE)
        td_error = agent.train(minibatch)
        replay_buffer.update_td_errors(td_error, minibatch.indices)

Create folder to store the results.

In [None]:
# Create the folder to store the results from the experiments.
results_path = r'./Output images' 
if not os.path.exists(results_path):
    os.makedirs(results_path)

Plots for training episodes.

In [None]:
# Find the maximum accuracy per validation epoch and their respective budgets.
budgets = []
max_scores = []
max_score = max(final_episode_scores_training[0])
budgets_for_max_scores = []
budgets_for_max_scores_help = []
for i in range(TRAINING_EPOCHS):
    max_scores.append(max(final_episode_scores_training[i]))
    for j in range(TRAINING_EPISODES_PER_EPOCH):
        if final_episode_scores_training[i][j]==max(final_episode_scores_training[i]):
            budgets_for_max_scores_help.append(final_episode_durations_training[i][j])
    budgets_for_max_scores.append(np.array(budgets_for_max_scores_help).min())
    budgets_for_max_scores_help = []
    if max(final_episode_scores_training[i])>max_score:
        max_score = max(final_episode_scores_training[i])
for i in range(TRAINING_EPOCHS):
    for j in range(TRAINING_EPISODES_PER_EPOCH):
        if final_episode_scores_training[i][j]==max_score:
            budgets.append(final_episode_durations_training[i][j])
print("The maximum accuracy is {}.".format(max_score))
print("The budget for the maximum accuracy is {}.".format(np.array(budgets).min()))

In [None]:
# Plot maximum achieved accuracy per epoch and the respective budgets.
# Total number of iterations (epochs): 1000.
# Total number of episodes per epoch: 10.

xpoints = np.array(range(1,len(budgets_for_max_scores)+1))
ypoints = np.array(budgets_for_max_scores)
plt.figure(figsize=(20,10))
plt.subplot(2, 1, 1)
plt.plot(xpoints, ypoints, color='m')
plot_label = "Budget per max ACC. | Unlabeled data: " + str(len(dataset.train_data))
plt.title(plot_label, loc = "left")
plt.xlabel("Epochs")
plt.ylabel("Budget")
plt.savefig("Output images/TRAINING, Budget.png")

xpoints = np.array(range(1,len(budgets_for_max_scores)+1))
ypoints = np.array([x/len(dataset.train_data) for x in budgets_for_max_scores])
plt.figure(figsize=(20,10))
plt.subplot(2, 1, 1)
plt.plot(xpoints, ypoints, color='k')
plot_label = "Budget (percentage of the UD) per max ACC. | Unlabeled data: " + str(len(dataset.train_data))
plt.title(plot_label, loc = "left")
plt.xlabel("Epochs")
plt.ylabel("Budget")
#plt.savefig("Output images/TRAINING, Budget percentage.png")

xpoints = np.array(range(1,len(max_scores)+1))
ypoints = np.array(max_scores)
plt.figure(figsize=(20,15))
plt.subplot(2, 1, 2)
plt.plot(xpoints, ypoints, color='c')
plt.title("Max ACC per epoch", loc = "left")
plt.xlabel("Epochs")
plt.ylabel("Max ACC")
legend_1 = "Maximum ACC: " + str(max_score)[:4]
legend_2 = ", "
legend_3 = "Budget: " + str(np.array(budgets).min())[:4]
plt.legend([legend_1 + legend_2 + legend_3])
#plt.savefig("Output images/TRAINING, Max ACC.png")

plt.show()

Validation.

Testing.

In [None]:
# Test episodes are run. Use env_test for it.
episode_scores_testing = []
episode_durations_testing = []
final_episode_scores_testing = []
final_episode_durations_testing = []
for testing_episode in range(TESTING_EPISODES):
    print("Testing episode:", testing_episode)
    episode_duration = 2
    done = False
    state, next_action, reward = env_test.reset()
    print("next_action",next_action)
    print("state", state)
    while not(done):
        batch = agent.get_action(state, next_action)
        batch = int(next_action[batch])
        print("batch",batch)
        inputNumbers =range(0,env.n_actions)
        print(inputNumbers)
        batch_actions_indices = np.array(random.sample(inputNumbers, batch))
        print("batch_actions_indices",batch_actions_indices)
        next_state, next_action, reward, done = env_test.step(batch_actions_indices)
        state = next_state
        episode_duration += batch
    episode_accuracies_testing = env_test.return_episode_qualities()
    episode_scores_testing.append(episode_accuracies_testing[-1])
    episode_durations_testing.append(episode_duration)
final_episode_scores_testing.append(episode_scores_testing)
final_episode_durations_testing.append(episode_durations_testing)

Plots for testing episodes.

In [None]:
# Find the maximum accuracy per validation epoch and their respective budgets.
budgets = []
max_score = max(final_episode_scores_testing[0])
for i in range(len(final_episode_scores_testing[0])):
    if final_episode_scores_testing[0][i]==max_score:
        budgets.append(final_episode_durations_testing[0][i])
print("The maximum accuracy is {}.".format(max_score))
print("The budget for the maximum accuracy is {}.".format(np.array(budgets).min()))

In [None]:
# Plot maximum achieved accuracy per testing iteration and the respective budgets.
# Total number of iterations (epochs): 10.
# Total number of episodes per iteration: 500.

xpoints = np.array(range(1,len(final_episode_durations_testing[0])+1))
ypoints = np.array(final_episode_durations_testing[0])
plt.figure(figsize=(20,10))
plt.subplot(2, 1, 1)
plt.plot(xpoints, ypoints, color='m')
plot_label = "Budget per max ACC. | Unlabeled data: " + str(len(dataset.test_data))
plt.title(plot_label, loc = "left")
plt.xlabel("Epochs")
plt.ylabel("Mean budget")

xpoints = np.array(range(1,len(final_episode_durations_testing[0])+1))
ypoints = np.array([x/len(dataset.test_data) for x in final_episode_durations_testing[0]])
plt.figure(figsize=(20,10))
plt.subplot(2, 1, 1)
plt.plot(xpoints, ypoints, color='k')
plot_label = "Budget (percentage of the UD) per max ACC. | Unlabeled data: " + str(len(dataset.test_data))
plt.title(plot_label, loc = "left")
plt.xlabel("Episodes")
plt.ylabel("Mean budget")

xpoints = np.array(range(1,len(final_episode_scores_testing[0])+1))
ypoints = np.array(final_episode_scores_testing[0])
plt.figure(figsize=(20,15))
plt.subplot(2, 1, 2)
plt.plot(xpoints, ypoints, color='c')
plt.title("Max ACC", loc = "left")
plt.xlabel("Episodes")
plt.ylabel("Max ACC")
legend_1 = "Maximum ACC: " + str(np.array(final_episode_scores_testing[0]).max())[:4]
legend_2 = ", "
legend_3 = "Budget: " + str(np.array(budgets).min())[:4]
plt.legend([legend_1 + legend_2 + legend_3])

plt.show()