In [1]:
%matplotlib inline
import numpy as np
import sys
import tensorflow as tf
import copy
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
# depending on the classification model use, we might need to import other packages
#from sklearn import svm
#from sklearn.ensemble import RandomForestClassifier
import sklearn
from matplotlib import pyplot as plt
import pickle as pkl

from datasets import DatasetUCI
from envs import LalEnvFirstAccuracy

from estimator import Estimator
from helpers import Minibatch, ReplayBuffer
from dqn import DQN

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Strategies to test

In [3]:
# Random sampling
rs = True
# Uncertainty sampling
us = True
# LAL-RL learnt strategy on other datasets
rl = True
# LAL-RL learnt strategy on the same dataset (another half)
rl_notransfer = False
# LAL-independent and LAL-iterative strategies
lal = False
# ALBE strategy that learns a combination of rs, us and quire
albe = False
# QUIRE strategy
quire = False

## Experiment parameters

In [4]:
DIRNAME_TRANSFER = './agents/1-australian-logreg-8-to-1/'
DIRNAME_NOTRANSFER = ''
DIRNAME_RESULTS = './AL_results/test-agent-australian.p'

TOLERANCE_LEVEL = 0.98
test_dataset_names = ['australian']

N_STATE_ESTIMATION = 30
SUBSET = -1 # -1 for using all datapoints, 0 for even, 1 for odd
SIZE = 100

N_JOBS = 1 # can set more if we want to parallelise
QUALITY_METHOD = metrics.accuracy_score

N_EXPERIMENTS = 500

Can use different models for classifier
<br>
`LogisticRegression(n_jobs=N_JOBS)` <br>
SVM: <br>
`svm.SVC(probability=True)` <br>
RF: <br>
`RandomForestClassifier(50, oob_score=True, n_jobs=N_JOBS)`

In [6]:
dataset = DatasetUCI(possible_names=test_dataset_names, n_state_estimation=N_STATE_ESTIMATION, subset=SUBSET, size=SIZE)
model = LogisticRegression(n_jobs=N_JOBS)
env = LalEnvFirstAccuracy(dataset, model, quality_method=QUALITY_METHOD)

## Prepare AL methods

#### Methods for random sampling and uncertainty sampling

In [7]:
if rs:
    from Test_AL import policy_random
if us:
    from Test_AL import policy_uncertainty

#### Load RL model

In [8]:
if rl:
    from Test_AL import policy_rl
    tf.reset_default_graph()
    # Load the DQN agent from DIRNAME_TRANSFER
    agent = DQN(experiment_dir=DIRNAME_TRANSFER,
            observation_length=N_STATE_ESTIMATION,
            learning_rate=1e-3,
            batch_size=32,
            target_copy_factor=0.01,
            bias_average=0,
           )


The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from ./agents/1-australian-logreg-8-to-1/checkpoints/model
Loading checkpoint ./agents/1-australian-logreg-8-to-1/checkpoints/model...



## Run the experiemnts

In [9]:
# Results will be stored in all_results dictionary
all_results = {}
all_scores_rand = []
all_scores_uncert = []
all_scores_rl = []
all_scores_rl_notransfer = []
all_scores_LAL_independant = []
all_scores_LAL_iterative = []
all_scores_ALBE = []
all_scores_QUIRE = []

for experiment in range(N_EXPERIMENTS):
    print(experiment, end=' ')
    # reset the environment
    state, next_action_state = env.reset()
    # run the experiments
    # 1. copy the initial state and environment 
    # so that all strategies start from the same point            
    # 2. done variable indicates when terminal state is reached
    # 3. repeat until terminal state is reached
    # 4. select an action according to the policy
    # to see the prob of selected action: taken_action_state = next_action_state_uncert[:,action]
    # 5. make a step in the environment
    # 6. keep track of the scores in the episode
    if rs:
        env_rand = copy.deepcopy(env)
        state_rand = copy.deepcopy(state)
        done = False
        while not(done):
            action = policy_random(env_rand.n_actions)
            _, _, _, done = env_rand.step(action)
        all_scores_rand.append(env_rand.episode_qualities)
    if us:
        next_action_state_uncert = next_action_state
        env_uncert = copy.deepcopy(env)
        state_uncert = copy.deepcopy(state)
        done = False
        while not(done):
            action = policy_uncertainty(next_action_state_uncert[0,:])
            next_state, next_action_state_uncert, reward, done = env_uncert.step(action)
        all_scores_uncert.append(env_uncert.episode_qualities)
    if rl:
        next_action_state_rl = next_action_state
        env_rl = copy.deepcopy(env)
        state_rl = copy.deepcopy(state)
        done = False
        while not(done):
            action = policy_rl(agent, state_rl, next_action_state_rl)        
            next_state, next_action_state_rl, reward, done = env_rl.step(action)
            state_rl = next_state
        all_scores_rl.append(env_rl.episode_qualities)

# record the results
all_results['all_scores_rand'] = all_scores_rand
all_results['all_scores_uncert'] = all_scores_uncert
all_results['all_scores_rl'] = all_scores_rl
pkl.dump(all_results, open(DIRNAME_RESULTS, "wb" ))

0 

ValueError: too many values to unpack (expected 2)

## Load the results

In [None]:
all_results = pkl.load(open(DIRNAME_RESULTS, "rb" ) )

In [None]:
all_scores_rand = all_results['all_scores_rand']
all_scores_uncert = all_results['all_scores_uncert']
all_scores_rl = all_results['all_scores_rl']

## Check the results

#### Compute the mean duration, it's std, median and max.

In [None]:
max_duration = 0
if rs:
    print("Random")
    all_scores_rand, all_durations_rand = check_performance(all_scores_rand)
    max_duration = max(max_duration, max(all_durations_rand))
if us:
    print("Uncertainty")
    all_scores_uncert, all_durations_uncert = check_performance(all_scores_uncert)
    max_duration = max(max_duration, max(all_durations_uncert))
if rl:
    print("RL")
    all_scores_rl, all_durations_rl = check_performance(all_scores_rl)
    max_duration = max(max_duration, max(all_durations_rl))

#### Compute the relative scores that can be used to plot the results

In [None]:
scores_relative_rand = check_performance_for_figure(all_scores_rand, max_duration)
scores_relative_uncert = check_performance_for_figure(all_scores_uncert, max_duration)
scores_relative_rl = check_performance_for_figure(all_scores_rl, max_duration)

#### Plot the results

In [None]:
plt.figure(figsize=(20,10))
if rs:
    m_line = np.mean(scores_relative_rand, axis=0)
    var_line = np.var(scores_relative_rand, axis=0)
    plt.plot(m_line, linewidth=2.0, label = 'random', color='k')
    plt.fill_between(range(np.size(m_line)), m_line - var_line, m_line + var_line, color='k', alpha=0.2)
if us:
    m_line = np.mean(scores_relative_uncert, axis=0)
    var_line = np.var(scores_relative_uncert, axis=0)
    plt.plot(m_line, linewidth=2.0, label = 'uncertainty', color='b')
    plt.fill_between(range(np.size(m_line)), m_line - var_line, m_line + var_line, color='b', alpha=0.2)
if rl:
    m_line = np.mean(scores_relative_rl, axis=0)
    var_line = np.var(scores_relative_rl, axis=0)
    plt.plot(m_line, linewidth=2.0, label = 'rl', color='red')
    plt.fill_between(range(np.size(m_line)), m_line - var_line, m_line + var_line, color='red', alpha=0.2)