In [None]:
import numpy as np

import matplotlib.pyplot as plt
import random
import shutil
import os
from scipy.interpolate import make_interp_spline

# Classifier.
from keras.models import Sequential
from keras.layers import Dense,Dropout,Conv2D,Flatten,MaxPooling2D
from keras import optimizers
from keras.constraints import maxnorm
from keras.optimizers import SGD

from dataset import DatasetCIFAR10

In [None]:
# BatchAgent
from batch_envs import LalEnvFirstAccuracy
from batch_helpers import ReplayBuffer
from batch_dqn import DQN

In [None]:
# Start time.
import time
start_time = time.time()

### Parameters.

In [None]:
# Classifier parameters.
CLASSIFIER_NUMBER_OF_CLASSES = 10
CLASSIFIER_NUMBER_OF_EPOCHS = 5
CLASSIFIER_LEARNING_RATE = 0.01
CLASSIFIER_BATCH_SIZE = 64

# Parameters for the agent.

REPLAY_BUFFER_SIZE = 5e4
PRIOROTIZED_REPLAY_EXPONENT = 3
BATCH_SIZE = 32
LEARNING_RATE = 1e-3
TARGET_COPY_FACTOR = 0.01
BIAS_INITIALIZATION = 0

NUMBER_OF_STATE_DATA = 1000
TRAIN_DATASET_LENGTH = 1000

# BatchAgent's parameters.

DIRNAME = './batch_agent/' # The resulting batch_agent of this experiment will be written in a file.

WARM_START_EPISODES_BATCH_AGENT = int(TRAIN_DATASET_LENGTH*(5/100))
TRAINING_EPOCHS_BATCH_AGENT = int(TRAIN_DATASET_LENGTH*(5/100))
WARM_START_EPISODES_BATCH_AGENT = 50
TRAINING_EPOCHS_BATCH_AGENT = 50
TRAINING_EPISODES_PER_EPOCH_BATCH_AGENT = 10
NN_UPDATES_PER_EPOCHS_BATCH_AGENT = 1

print("Warm-start episodes: {}.".format(WARM_START_EPISODES_BATCH_AGENT))
print("Training epochs: {}.".format(TRAINING_EPOCHS_BATCH_AGENT))

In [None]:
cwd = os.getcwd() # Find the current directory.

# Delete following directories if they exist.
shutil.rmtree(cwd+'/__pycache__', ignore_errors=True)
shutil.rmtree(cwd+'/wandb', ignore_errors=True)
shutil.rmtree(cwd+'/batch_agent', ignore_errors=True)
shutil.rmtree(cwd+'/libact', ignore_errors=True)
shutil.rmtree(cwd+'/AL_results', ignore_errors=True)
shutil.rmtree(cwd+'/checkpoints', ignore_errors=True)
shutil.rmtree(cwd+'/summaries', ignore_errors=True)

Initialise the dataset.

In [None]:
dataset = DatasetCIFAR10(number_of_state_data=NUMBER_OF_STATE_DATA, train_dataset_length=TRAIN_DATASET_LENGTH)
print("Train data are {}.".format(len(dataset.train_data)))
print("State data are {}.".format(len(dataset.state_data)))
print("Test data are {}.".format(len(dataset.test_data)))

# Classifier.

In [None]:
#"""
# Deep CNN.

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.optimizers import SGD

# Parameters.
input_shape = (32, 32, 3)
optimizer = SGD(lr=0.001, momentum=0.9, nesterov=True)

# Create the classifier.
classifier = Sequential()
classifier.add(Conv2D(64, (3, 3), input_shape=input_shape, padding='same', activation='relu', kernel_constraint=maxnorm(3)))
classifier.add(Conv2D(64, (3, 3), padding='same', activation='relu', kernel_constraint=maxnorm(3)))
classifier.add(MaxPooling2D(pool_size=(2, 2)))

classifier.add(Conv2D(128, (3, 3), padding='same', activation='relu', kernel_constraint=maxnorm(3)))
classifier.add(Conv2D(128, (3, 3), padding='same', activation='relu', kernel_constraint=maxnorm(3)))
classifier.add(MaxPooling2D(pool_size=(2, 2)))

classifier.add(Conv2D(256, (3, 3), padding='same', activation='relu', kernel_constraint=maxnorm(3)))
classifier.add(Conv2D(256, (3, 3), padding='same', activation='relu', kernel_constraint=maxnorm(3)))
classifier.add(MaxPooling2D(pool_size=(2, 2)))

classifier.add(Flatten())
classifier.add(Dense(1024, activation='relu', kernel_constraint=maxnorm(3)))
classifier.add(Dropout(0.5))

classifier.add(Dense(CLASSIFIER_NUMBER_OF_CLASSES, activation='softmax'))

# Compile classifier.
classifier.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
#"""

In [None]:
from sklearn.metrics import precision_score
predictions = classifier.predict(dataset.train_data)
predicted_labels = np.argmax(predictions, axis=1)
true_labels = np.argmax(dataset.train_labels_one_hot_encoding, axis=1)
TARGE_PRECISION = precision_score(true_labels, predicted_labels, average='weighted', zero_division=0)
print("Precision after training with all the data: {}.".format(TARGE_PRECISION))

Initialise the BatchAgent environment.

In [None]:
batch_env = LalEnvFirstAccuracy(dataset, classifier, epochs=CLASSIFIER_NUMBER_OF_EPOCHS, classifier_batch_size=CLASSIFIER_BATCH_SIZE, target_precision=TARGE_PRECISION)

Initialise the replay buffer.

In [None]:
replay_buffer = ReplayBuffer(buffer_size=REPLAY_BUFFER_SIZE, prior_exp=PRIOROTIZED_REPLAY_EXPONENT)

### Warm-start Episodes. | BatchAgent.

In [None]:
# Keep track of episode duration to compute average.
episode_durations = []
episode_scores = []
episode_number = 1
episode_losses = []
episode_precisions = []
batches = []
first_batch = True

for _ in range(WARM_START_EPISODES_BATCH_AGENT):

    print("Episode {}.".format(episode_number))
    # Reset the environment to start a new episode.
    # The state value contains a vector representation of state of the environment (depends on the classifier).
    # The next_action contains a vector representations of all actions available to be taken at the next step.
    state, next_action, indicies_unknown, reward = batch_env.reset(isBatchAgent=False, target_budget=1.0)
    done = False
    episode_duration = CLASSIFIER_NUMBER_OF_CLASSES

    # Before we reach a terminal state, make steps.
    while not done:

        # Choose a random action.
        batch = random.choice(next_action)[0]
        if first_batch:
            batches.append(batch)
            first_batch = False
        else:
            iteration = 1
            while batch in batches:
                batch = random.choice(next_action)[0]
                iteration += 1
                if iteration > 50:
                    break  
            batches.append(batch)

        # Getting numbers from 0 to n_actions.
        inputNumbers =range(0,batch_env.n_actions)

        # Non-repeating using sample() function.
        batch_actions_indices = np.array(random.sample(inputNumbers, batch))
        action = batch
        next_state, next_action, indicies_unknown, reward, done = batch_env.step(batch_actions_indices)
        
        if next_action==[]:
            next_action.append(np.array([0]))

        # Store the transition in the replay buffer.
        replay_buffer.store_transition(state, action, reward, next_state, next_action, done)

        # Get ready for the next step.
        state = next_state
        episode_duration += batch

    episode_final_acc = batch_env.return_episode_qualities()     
    episode_scores.append(episode_final_acc[-1])
    episode_final_precision = batch_env.return_episode_precisions()     
    episode_precisions.append(episode_final_precision[-1])    
    episode_durations.append(episode_duration)  
    episode_number+=1

# Compute the average episode duration of episodes generated during the warm start procedure.
av_episode_duration = np.mean(episode_durations)
BIAS_INITIALIZATION = -av_episode_duration/2

In [None]:
# Plot total budget size per episode.
xpoints = np.array(range(0,len(episode_durations)))
ypoints = np.array(episode_durations)
plt.figure(figsize=(20,10))
plt.subplot(3, 1, 1)
plt.plot(xpoints, ypoints, 'o', color='m')  # Plot points as blue circles.
xnew = np.linspace(xpoints.min(), xpoints.max(), 500)
spl = make_interp_spline(xpoints, ypoints, k=3)
power_smooth = spl(xnew)
plt.plot(xnew, power_smooth, color='m')
plot_label = "Budget per episode. *Size of unlabeled data: " + str(len(dataset.train_data))
plt.title(plot_label, loc = "left")
plt.xlabel("Episodes")
plt.ylabel("Budget size (percentage of the UD)")

# Plot total budget size (percentage of the UD) per episode.
xpoints = np.array(range(0,len(episode_durations)))
ypoints = np.array([x/len(dataset.train_data) for x in episode_durations])
plt.figure(figsize=(20,10))
plt.subplot(3, 1, 2)
plt.plot(xpoints, ypoints, 'o', color='k')  # Plot points as blue circles.
xnew = np.linspace(xpoints.min(), xpoints.max(), 500)
spl = make_interp_spline(xpoints, ypoints, k=3)
power_smooth = spl(xnew)
plt.plot(xnew, power_smooth, color='k')
plot_label = "Budget per episode. *Size of unlabeled data: " + str(len(dataset.train_data))
plt.title(plot_label, loc = "left")
plt.xlabel("Episodes")
plt.ylabel("Budget size (percentage of the UD)")

# Plot final achieved accuracy per episode.
xpoints = np.array(range(0,len(episode_scores)))
ypoints = np.array(episode_scores)
plt.figure(figsize=(20,10))
plt.subplot(3, 1, 3)
plt.plot(xpoints, ypoints, 'o', color='c')  # Plot points as blue circles.
xnew = np.linspace(xpoints.min(), xpoints.max(), 500)
spl = make_interp_spline(xpoints, ypoints, k=3)
power_smooth = spl(xnew)
plt.plot(xnew, power_smooth, color='c')
plt.title("Final achieved accuracy per episode", loc = "left")
plt.xlabel("Episodes")
plt.ylabel("ACC")
legend_label = "Maximum ACC: " + str(max(episode_scores))[:4]
plt.legend([legend_label])

# Plot final achieved precision per episode.
xpoints = np.array(range(0,len(episode_precisions)))
ypoints = np.array(episode_precisions)
plt.figure(figsize=(20,10))
plt.subplot(3, 1, 3)
plt.plot(xpoints, ypoints, 'o', color='y')  # Plot points as blue circles.
xnew = np.linspace(xpoints.min(), xpoints.max(), 500)
spl = make_interp_spline(xpoints, ypoints, k=3)
power_smooth = spl(xnew)
plt.plot(xnew, power_smooth, color='y')
plt.title("Final achieved precision per episode", loc = "left")
plt.xlabel("Episodes")
plt.ylabel("Precision")
legend_label = "Maximum precision: " + str(max(episode_precisions))[:4]
plt.legend([legend_label])

plt.show()

In [None]:
warm_start_batches = []
i=0
for precision in episode_precisions:
    if precision >= max(episode_precisions):
        warm_start_batches.append(episode_durations[i])
    i+=1
TARGET_BUDGET = min(warm_start_batches)/(len(dataset.train_data))
print("Target budget is {}.".format(TARGET_BUDGET))

In [None]:
#"""
# Train BatchAgent with all the data.
# But keep the ReplayBuffer the same.

TRAIN_DATASET_LENGTH = 50000

dataset = DatasetCIFAR10(number_of_state_data=NUMBER_OF_STATE_DATA, train_dataset_length=TRAIN_DATASET_LENGTH)
print("Train data are {}.".format(len(dataset.train_data)))
print("State data are {}.".format(len(dataset.state_data)))
print("Test data are {}.".format(len(dataset.test_data)))

"""
predictions = classifier.predict(dataset.train_data)
predicted_labels = np.argmax(predictions, axis=1)
true_labels = np.argmax(dataset.train_labels_one_hot_encoding, axis=1)
TARGE_PRECISION = precision_score(true_labels, predicted_labels, average='weighted', zero_division=0)
print("Precision after training with all the data: {}.".format(TARGE_PRECISION))
"""

batch_env = LalEnvFirstAccuracy(dataset, classifier, epochs=CLASSIFIER_NUMBER_OF_EPOCHS, classifier_batch_size=CLASSIFIER_BATCH_SIZE, target_precision=TARGE_PRECISION)
#"""

Initialize the DQN for the BatchAgent.

In [None]:
batch_agent = DQN(experiment_dir=DIRNAME,
            observation_length=NUMBER_OF_STATE_DATA,
            learning_rate=LEARNING_RATE,
            batch_size=BATCH_SIZE,
            target_copy_factor=TARGET_COPY_FACTOR,
            bias_average=BIAS_INITIALIZATION,
           )

Do updates of the network based on the warm-start episodes.

In [None]:
for update in range(NN_UPDATES_PER_EPOCHS_BATCH_AGENT):
    print("Update:", update+1)
    minibatch = replay_buffer.sample_minibatch(BATCH_SIZE)
    td_error = batch_agent.train(minibatch)
    replay_buffer.update_td_errors(td_error, minibatch.indices)

### Train BatchAgent.

In [None]:
# Simulate training episodes.

agent_epoch_durations = []
agent_epoch_scores = []
agent_epoch_precisions = []

for epoch in range(TRAINING_EPOCHS_BATCH_AGENT):

    print("Training epoch {}.".format(epoch+1))
    
    # Simulate training episodes.
    
    agent_episode_durations = []
    agent_episode_scores = []
    agent_episode_precisions = []

    for training_episode in range(TRAINING_EPISODES_PER_EPOCH_BATCH_AGENT):

        # print("Episode {}.".format(training_episode+1))
        
        # Reset the environment to start a new episode.
        state, action_batch, action_unlabeled_data, reward = batch_env.reset(isBatchAgent=True, target_budget=TARGET_BUDGET)
        done = False
        episode_duration = CLASSIFIER_NUMBER_OF_CLASSES
        first_batch = True
        
        # Run an episode.
        while not done:

            if first_batch:
                next_batch = action_batch
                next_unlabeled_data = action_unlabeled_data
                first_batch = False
            else:
                next_batch = next_action_batch_size
                next_unlabeled_data = next_action_unlabeled_data

            selected_batch, selected_indices = batch_agent.get_action(dataset=dataset, model=classifier, state=state, next_action_batch=next_batch, next_action_unlabeled_data=next_unlabeled_data)
            next_state, next_action_batch_size, next_action_unlabeled_data, reward, done = batch_env.step(selected_indices)
            if next_action_batch_size==[]:
                next_action_batch_size.append(np.array([0]))

            replay_buffer.store_transition(state, selected_batch, reward, next_state, next_action_batch_size, done)
            
            # Change the state of the environment.
            state = next_state
            episode_duration += selected_batch
            print("Selected batch is {}.".format(selected_batch))

        agent_episode_final_acc = batch_env.return_episode_qualities()
        agent_episode_scores.append(agent_episode_final_acc[-1])
        agent_episode_final_precision = batch_env.return_episode_precisions()
        agent_episode_precisions.append(agent_episode_final_precision[-1])
        agent_episode_durations.append(episode_duration)
        #print("---- Episode durations", agent_episode_durations)
        #print("\n")
        
    maximum_epoch_precision = max(agent_episode_precisions)
    minimum_batches_for_the_maximum_epoch_precision = []
    accuracy_for_the_maximum_epoch_precision = []
    for i in range(len(agent_episode_precisions)):
        if agent_episode_precisions[i] == maximum_epoch_precision:
            minimum_batches_for_the_maximum_epoch_precision.append(agent_episode_durations[i])
            accuracy_for_the_maximum_epoch_precision.append(agent_episode_scores[i])
    agent_epoch_precisions.append(maximum_epoch_precision)
    agent_epoch_scores.append(accuracy_for_the_maximum_epoch_precision)
    agent_epoch_durations.append(min(minimum_batches_for_the_maximum_epoch_precision))

    # NEURAL NETWORK UPDATES.
    for update in range(NN_UPDATES_PER_EPOCHS_BATCH_AGENT):
        # print("Update {}.".format(update+1))
        minibatch = replay_buffer.sample_minibatch(BATCH_SIZE)
        td_error = batch_agent.train(minibatch)
        replay_buffer.update_td_errors(td_error, minibatch.indices)

In [None]:
# End time.
seconds = time.time() - start_time
print("Total run time is {}.".format(time.strftime("%H:%M:%S",time.gmtime(seconds))))

In [None]:
# Plot precisions.

warm_start_xpoints = np.array(range(0,len(episode_precisions)))
warm_start_ypoints = np.array([x*100 for x in episode_precisions])
warm_start_xnew = np.linspace(warm_start_xpoints.min(), warm_start_xpoints.max(), 150)
warm_start_spl = make_interp_spline(warm_start_xpoints, warm_start_ypoints, k=3)
warm_start_power_smooth = warm_start_spl(warm_start_xnew)

batch_agent_xpoints = np.array(range(0,len(agent_epoch_precisions)))
batch_agent_ypoints = np.array([x*100 for x in agent_epoch_precisions])
batch_agent_xnew = np.linspace(batch_agent_xpoints.min(), batch_agent_xpoints.max(), 150)
batch_agent_spl = make_interp_spline(batch_agent_xpoints, batch_agent_ypoints, k=3)
batch_agent_power_smooth = batch_agent_spl(batch_agent_xnew)

plt.figure(figsize=(20,10))
plt.plot(warm_start_xnew, warm_start_power_smooth, color='y', linewidth=2.5)
plt.plot(batch_agent_xnew, batch_agent_power_smooth, color='m', linewidth=2.5)
plt.legend(["Warm-start", "Agent"]) 
plt.xlabel("Epoch")
plt.ylabel("Precision")

plt.show()

In [None]:
# Plot budgets.
warm_start_xpoints = np.array(range(0,len(episode_durations)))
warm_start_ypoints = np.array([(x/len(dataset.train_data))*100 for x in episode_durations])
warm_start_xnew = np.linspace(warm_start_xpoints.min(), warm_start_xpoints.max(), 150)
warm_start_spl = make_interp_spline(warm_start_xpoints, warm_start_ypoints, k=3)
warm_start_power_smooth = warm_start_spl(warm_start_xnew)

batch_agent_xpoints = np.array(range(0,len(agent_epoch_durations)))
batch_agent_ypoints = np.array([(x/len(dataset.train_data))*100 for x in agent_epoch_durations])
batch_agent_xnew = np.linspace(batch_agent_xpoints.min(), batch_agent_xpoints.max(), 150)
batch_agent_spl = make_interp_spline(batch_agent_xpoints, batch_agent_ypoints, k=3)
batch_agent_power_smooth = batch_agent_spl(batch_agent_xnew)

plt.figure(figsize=(20,10))
plt.plot(warm_start_xnew, warm_start_power_smooth, color='y', linewidth=2.5)
plt.plot(batch_agent_xnew, batch_agent_power_smooth, color='m', linewidth=2.5)
plt.legend(["Warm-start", "Agent"]) 
plt.xlabel("Epoch")
plt.ylabel("Budget")

plt.show()

### Testing episodes.

In [None]:
import copy

In [None]:
# Simulate testing episodes.

TEST_EPISODES = 50

Reinforced_Active_Learning = True
RAL_episode_durations = []
RAL_episode_scores = []
RAL_episode_precisions = []

Random_Sampling = True
Random_Sampling_episode_durations = []
Random_Sampling_episode_scores = []
Random_Sampling_episode_precisions = []

for episode in range(TEST_EPISODES):

    print("Testing episode {}.".format(episode+1))
    

    # print("Episode {}.".format(training_episode+1))
        
    # Reset the environment to start a new episode.
    state, action_batch, action_unlabeled_data, reward = batch_env.reset(isBatchAgent=True, target_budget=TARGET_BUDGET)
    random_sampling_episode_durarion = CLASSIFIER_NUMBER_OF_CLASSES
        
    # Run an episode.

    if Reinforced_Active_Learning:

        print("- Reinforced Active Learning.")

        RAL_episode_duration = CLASSIFIER_NUMBER_OF_CLASSES
        RAL_env = copy.deepcopy(batch_env)
        first_batch = True
        done = False

        while not done:

            if first_batch:
                next_batch = action_batch
                next_unlabeled_data = action_unlabeled_data
                first_batch = False
            else:
                next_batch = next_action_batch_size
                next_unlabeled_data = next_action_unlabeled_data

            selected_batch, selected_indices = batch_agent.get_action(dataset=dataset, model=classifier, state=state, next_action_batch=next_batch, next_action_unlabeled_data=next_unlabeled_data)
            next_state, next_action_batch_size, next_action_unlabeled_data, reward, done = RAL_env.step(selected_indices)

            RAL_episode_duration += selected_batch

        agent_episode_final_acc = RAL_env.return_episode_qualities()
        RAL_episode_scores.append(agent_episode_final_acc[-1])
        agent_episode_final_precision = RAL_env.return_episode_precisions()
        RAL_episode_precisions.append(agent_episode_final_precision[-1])
        RAL_episode_durations.append(RAL_episode_duration)

        # wandb.log({"RAL | Precision": RAL_episode_precisions[-1], "RAL | Budget": (RAL_episode_durations[-1]/len(dataset.train_data))*100})

    if Random_Sampling:

        print("- Random Sampling.")

        Random_Sampling_episode_duration = CLASSIFIER_NUMBER_OF_CLASSES
        Random_Sampling_env = copy.deepcopy(batch_env)
        Random_Sampling_env_state = copy.deepcopy(state)
        done = False

        while not done:

            batch = random.randint(1, Random_Sampling_env.n_actions)
            # print("--- Random_Sampling_env.n_actions", Random_Sampling_env.n_actions)
            inputNumbers =range(0,Random_Sampling_env.n_actions)
            # print("--- inputNumbers", inputNumbers)
            # print("--- batch", batch)
            batch_actions_indices = np.array(random.sample(inputNumbers, batch))
            action = batch
            _, next_action, _, _, done = Random_Sampling_env.step(batch_actions_indices)

            Random_Sampling_episode_duration += batch

        episode_final_acc = Random_Sampling_env.return_episode_qualities()     
        Random_Sampling_episode_scores.append(episode_final_acc[-1])
        episode_final_precision = Random_Sampling_env.return_episode_precisions()     
        Random_Sampling_episode_precisions.append(episode_final_precision[-1])    
        Random_Sampling_episode_durations.append(Random_Sampling_episode_duration)

In [None]:
# Plot precisions.

random_sampling_xpoints = np.array(range(0,len(Random_Sampling_episode_precisions)))
random_sampling_ypoints = np.array([x*100 for x in Random_Sampling_episode_precisions])
random_sampling_xnew = np.linspace(random_sampling_xpoints.min(), random_sampling_xpoints.max(), 150)
random_sampling_spl = make_interp_spline(random_sampling_xpoints, random_sampling_ypoints, k=3)
random_sampling_power_smooth = random_sampling_spl(random_sampling_xnew)

reinforced_active_learning_xpoints = np.array(range(0,len(RAL_episode_precisions)))
reinforced_active_learning_ypoints = np.array([x*100 for x in RAL_episode_precisions])
reinforced_active_learning_xnew = np.linspace(reinforced_active_learning_xpoints.min(), reinforced_active_learning_xpoints.max(), 150)
reinforced_active_learning_spl = make_interp_spline(reinforced_active_learning_xpoints, reinforced_active_learning_ypoints, k=3)
reinforced_active_learning_power_smooth = reinforced_active_learning_spl(reinforced_active_learning_xnew)

plt.figure(figsize=(20,10))
plt.plot(random_sampling_xnew, random_sampling_power_smooth, color='y', linewidth=2.5)
plt.plot(reinforced_active_learning_xnew, reinforced_active_learning_power_smooth, color='m', linewidth=2.5)
plt.legend(["RS", "RAL"]) 
plt.xlabel("Episode")
plt.ylabel("Precision")

plt.show()

In [None]:
# Plot budgets.

random_sampling_xpoints = np.array(range(0,len(Random_Sampling_episode_durations)))
random_sampling_ypoints = np.array([(x/len(dataset.train_data))*100 for x in Random_Sampling_episode_durations])
random_sampling_xnew = np.linspace(random_sampling_xpoints.min(), random_sampling_xpoints.max(), 150)
random_sampling_spl = make_interp_spline(random_sampling_xpoints, random_sampling_ypoints, k=3)
random_sampling_power_smooth = random_sampling_spl(random_sampling_xnew)

reinforced_active_learning_xpoints = np.array(range(0,len(RAL_episode_durations)))
reinforced_active_learning_ypoints = np.array([(x/len(dataset.train_data))*100 for x in RAL_episode_durations])
reinforced_active_learning_xnew = np.linspace(reinforced_active_learning_xpoints.min(), reinforced_active_learning_xpoints.max(), 150)
reinforced_active_learning_spl = make_interp_spline(reinforced_active_learning_xpoints, reinforced_active_learning_ypoints, k=3)
reinforced_active_learning_power_smooth = reinforced_active_learning_spl(reinforced_active_learning_xnew)

plt.figure(figsize=(20,10))
plt.plot(random_sampling_xnew, random_sampling_power_smooth, color='y', linewidth=2.5)
plt.plot(reinforced_active_learning_xnew, reinforced_active_learning_power_smooth, color='m', linewidth=2.5)
plt.legend(["RS", "RAL"]) 
plt.xlabel("Episode")
plt.ylabel("Budget")

plt.show()

In [None]:
# wandb.finish()