In [None]:
%matplotlib inline
import numpy as np
import sys
import tensorflow as tf
import copy
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
# depending on the classification model use, we might need to import other packages
#from sklearn import svm
#from sklearn.ensemble import RandomForestClassifier
import sklearn
from matplotlib import pyplot as plt
import pickle as pkl

from datasets import DatasetUCI
from envs import LalEnvFirstAccuracy

from estimator import Estimator
from helpers import Minibatch, ReplayBuffer
from dqn import DQN

import os

Strategies to test.

In [None]:
random_sampling = True
uncertainty_sampling = True
reinforced_active_learning = True

Experiment parameterandom_sampling.

In [None]:
DIRNAME_TRANSFER = './agents/1-australian-logreg-8-to-1/'
DIRNAME_RESULTS = './AL_results/test-agent-australian.p'
test_dataset_names = ['waveform']
N_STATE_ESTIMATION = 30
SUBSET = -1 # Choose -1 for using all datapoints, 0 for even, 1 for odd.
SIZE = -1
N_JOBS = 1 # Can set more if we want to parallelise.
QUALITY_METHOD = metrics.accuracy_score
N_EXPERIMENTS = 1000
MAX_BATCH = 20

In [None]:
# Create the folder to store the results from the experiments.
results_path = r'./AL_results' 
if not os.path.exists(results_path):
    os.makedirs(results_path)

Models for classifier.

- Logistic Regression: LogisticRegression(n_jobs=N_JOBS)
- SVM: svm.SVC(probability=True)
- RF: RandomForestClassifier(50, oob_score=True, n_jobs=N_JOBS)

In [None]:
dataset = DatasetUCI(possible_names=test_dataset_names, n_state_estimation=N_STATE_ESTIMATION, subset=SUBSET, size=SIZE)
model = LogisticRegression(n_jobs=N_JOBS)
env = LalEnvFirstAccuracy(dataset, model, quality_method=QUALITY_METHOD)

Prepare AL methods.

Methods for random sampling and uncertainty sampling.

In [None]:
if random_sampling:
    from Test_AL import policy_random
if uncertainty_sampling:
    from Test_AL import policy_uncertainty

Load RL model.

In [None]:
if reinforced_active_learning:
    from Test_AL import policy_rl
    tf.reset_default_graph()
    # Load the DQN agent from DIRNAME_TRANSFER
    agent = DQN(experiment_dir=DIRNAME_TRANSFER,  observation_length=N_STATE_ESTIMATION, learning_rate=1e-3, batch_size=32, target_copy_factor=0.01, bias_average=0,)

Run the experiemnts.

In [None]:
# Results will be stored in all_results dictionary
all_results = {}
all_scores_rand = []
all_scores_uncert = []
all_scores_rl = []
all_durations_rand = []
all_durations_uncert = []
all_durations_rl = []

for experiment in range(N_EXPERIMENTS):

    print("Round {}.".format(experiment+1))

    if random_sampling:
        duration = 6
        env_rand = copy.deepcopy(env)
        state_rand, next_action_rand, reward_rand = env_rand.reset()
        done_rand = False
        while not(done_rand):
            action_rand, done_rand = policy_random(env_rand.n_actions, max_batch=MAX_BATCH)
            if done_rand:
                duration =+ len(action_rand)
                break
            _, _, _, done_rand = env_rand.step(action_rand)
            duration+=len(action_rand)
        all_scores_rand.append(env_rand.episode_qualities)
        all_durations_rand.append(duration)

    if uncertainty_sampling:
        duration = 6
        env_uncert = copy.deepcopy(env)
        state_uncert, next_action_uncert, reward_uncert = env_uncert.reset()
        done_uncert = False
        while not(done_uncert):
            action_uncert = policy_uncertainty(next_action_uncert[0,:], env_uncert.n_actions, max_batch=MAX_BATCH)
            if done_uncert:
                duration =+ len(action_uncert)
                break            
            next_state_uncert, next_action_uncert, reward_uncert, done_uncert = env_uncert.step(action_uncert)
            duration+=len(action_uncert)
        all_scores_uncert.append(env_uncert.episode_qualities)
        all_durations_uncert.append(duration)
        
    test_batch = 6
    duration = 6
    if reinforced_active_learning:
        env_rl = copy.deepcopy(env)
        state_rl, next_action_rl, reward_rl = env_rl.reset()
        done_rl = False
        while not(done_rl):
            test_batch = env_rl._find_batch_size(test_batch, reward_rl, env_rl.n_actions, max_batch=MAX_BATCH)
            if test_batch > env_rl.n_actions:
                    duration+=test_batch
                    done_rl = True
            else:
                action_rl = policy_rl(agent, state_rl, next_action_rl, test_batch)        
                next_state_rl, next_action_rl, reward_rl, done_rl = env_rl.step(action_rl)
                state_rl = next_state_rl
                duration+=test_batch
        all_scores_rl.append(env_rl.episode_qualities)
        all_durations_rl.append(duration)

# Record the results.
all_results['all_durations_rand'] = all_durations_rand
all_results['all_scores_uncert'] = all_scores_uncert
all_results['all_scores_rl'] = all_scores_rl
all_results['all_scores_rand'] = all_scores_rand
all_results['all_durations_uncert'] = all_durations_uncert
all_results['all_durations_rl'] = all_durations_rl
with open(DIRNAME_RESULTS, 'wb') as file:
    pkl.dump(all_results, file)

Load the results.

In [None]:
all_results = pkl.load(open(DIRNAME_RESULTS, "rb" ) )

In [None]:
all_scores_rand = all_results['all_scores_rand']
all_scores_uncert = all_results['all_scores_uncert']
all_scores_reinforced_active_learning = all_results['all_scores_rl']

In [None]:
# Random Sampling
random_sampling_scores = []
for i in range(len(all_scores_rand)):
    random_sampling_scores.append(np.array(all_scores_rand[i]).max())
    
# Uncertainty Sampling
uncertainty_sampling_scores = []
for i in range(len(all_scores_uncert)):
    uncertainty_sampling_scores.append(np.array(all_scores_uncert[i]).max())

# Reinforced Active Learning
reinforced_active_learning_scores = []
for i in range(len(all_scores_reinforced_active_learning)):
    reinforced_active_learning_scores.append(np.array(all_scores_reinforced_active_learning[i]).max())

In [None]:
from scipy.ndimage.filters import gaussian_filter1d

plt.figure(figsize=(30,10))

xpoints_rand = np.array(range(0,len(random_sampling_scores)))
ypoints_rand = np.array(random_sampling_scores)
# ypoints_rand = gaussian_filter1d(ypoints_rand, sigma=1)
plt.plot(xpoints_rand, ypoints_rand, color='m', label='RANDOM')

xpoints_rl = np.array(range(0,len(uncertainty_sampling_scores)))
ypoints_rl = np.array(uncertainty_sampling_scores)
# ypoints_rl = gaussian_filter1d(ypoints_rl, sigma=1)
plt.plot(xpoints_rl, ypoints_rl, color='c', label='UNCERTAINTY')

xpoints_uncert = np.array(range(0,len(reinforced_active_learning_scores)))
ypoints_uncert = np.array(reinforced_active_learning_scores)
# ypoints_uncert = gaussian_filter1d(ypoints_uncert, sigma=1)
plt.plot(xpoints_uncert, ypoints_uncert, color='k', label='RAL')

plt.title("ACC per episode")
plt.xlabel("Episodes")
plt.ylabel("ACC")

plt.legend()
plt.savefig("Output images/TESTING, Max ACC per episode.png")

plt.show()