# Training and parameter optimisation notebook

In [1]:
%matplotlib inline

%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys

sys.path.append('.')

In [3]:
from vimms.Common import create_if_not_exist, save_obj
from vimms_gym.common import METHOD_PPO, METHOD_PPO_RECURRENT, METHOD_DQN, ALPHA, BETA, EVAL_METRIC_REWARD, \
    EVAL_METRIC_F1, EVAL_METRIC_COVERAGE_PROP, EVAL_METRIC_INTENSITY_PROP, \
    EVAL_METRIC_MS1_MS2_RATIO, EVAL_METRIC_EFFICIENCY
from experiments import preset_qcb_small, ENV_QCB_SMALL_GAUSSIAN, ENV_QCB_MEDIUM_GAUSSIAN, \
    ENV_QCB_LARGE_GAUSSIAN, ENV_QCB_SMALL_EXTRACTED, ENV_QCB_MEDIUM_EXTRACTED, \
    ENV_QCB_LARGE_EXTRACTED, preset_qcb_medium, preset_qcb_large

In [4]:
from training import train, tune

### Parameters

In [5]:
preset = 'QCB_chems_small'
timesteps = 2E5
n_trials = 3
n_eval_episodes = 10
eval_freq = 1E6
eval_metric = EVAL_METRIC_F1

In [6]:
# preset = 'QCB_resimulated_medium'
# timesteps = 5E5
# n_trials = 10
# n_eval_episodes = 5
# eval_freq = 1E5
# eval_metric = EVAL_METRIC_REWARD

In [7]:
tune_model = True
tune_reward = True
alpha = 0.25
beta = 0.00

In [8]:
results = os.path.join('tune', preset)
verbose = 0
model_name = METHOD_PPO_RECURRENT

In [9]:
if tune_reward:
    alpha = None
    beta = None
    out_dir = 'metric_%s_timesteps_%d_alpha_auto_beta_auto' % (eval_metric, timesteps)
else:
    out_dir = 'metric_%s_timesteps_%d_alpha_%.2f_beta_%.2f' % (eval_metric, timesteps, alpha, beta)

out_dir = os.path.join(results, out_dir, model_name)    
out_dir = os.path.abspath(out_dir)
out_dir
create_if_not_exist(out_dir)

In [10]:
presets = {
    ENV_QCB_SMALL_GAUSSIAN: {'f': preset_qcb_small, 'extract': False},
    ENV_QCB_MEDIUM_GAUSSIAN: {'f': preset_qcb_medium, 'extract': False},
    ENV_QCB_LARGE_GAUSSIAN: {'f': preset_qcb_large, 'extract': False},
    ENV_QCB_SMALL_EXTRACTED: {'f': preset_qcb_small, 'extract': True},
    ENV_QCB_MEDIUM_EXTRACTED: {'f': preset_qcb_medium, 'extract': True},
    ENV_QCB_LARGE_EXTRACTED: {'f': preset_qcb_large, 'extract': True},
}
preset_func = presets[preset]['f']
extract = presets[preset]['extract']
params, max_peaks = preset_func(model_name, alpha=alpha, beta=beta,
                                extract_chromatograms=extract)

2022-08-21 21:33:21.117 | INFO     | experiments:get_samplers:238 - Loaded /notebooks/vimms-gym/pickles/samplers_QCB_small_gaussian.p


In [11]:
# actually train the model here
if tune_model or tune_reward:
    tune(model_name, timesteps, params, max_peaks, out_dir, n_trials,
         n_eval_episodes, int(eval_freq), eval_metric,
         tune_model, tune_reward, verbose=verbose)
else:
    train(model_name, timesteps, params, max_peaks, out_dir, verbose=verbose)

2022-08-21 21:33:21.153 | INFO     | training:tune:72 - Doing 1 intermediate evaluations for pruning based on the number of timesteps. (1 evaluation every 1000000 timesteps)
[32m[I 2022-08-21 21:33:21,358][0m Using an existing study with name 'DQN' instead of creating a new one.[0m
[32m[I 2022-08-21 21:39:45,066][0m Trial 40 pruned. [0m
[32m[I 2022-08-21 21:48:33,677][0m Trial 41 finished with value: 0.4849345763481348 and parameters: {'gamma': 0.99, 'learning_rate': 0.0028518895639868505, 'lr_schedule': 'linear', 'batch_size': 128, 'buffer_size': 50000, 'exploration_final_eps': 0.08448814796533216, 'exploration_fraction': 0.0836859793307714, 'target_update_interval': 5000, 'learning_starts': 0, 'train_freq': 4, 'subsample_steps': 2, 'net_arch': 'small', 'alpha': 0.13822155728746427, 'beta': 0.645306790984753}. Best is trial 23 with value: 0.49883431650551363.[0m
[32m[I 2022-08-21 21:57:58,737][0m Trial 42 pruned. [0m
