# Training and parameter optimisation notebook

In [1]:
%matplotlib inline

%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys

sys.path.append('.')

In [3]:
from vimms.Common import create_if_not_exist, save_obj
from vimms_gym.common import METHOD_PPO, METHOD_DQN, ALPHA, BETA, EVAL_METRIC_REWARD, \
    EVAL_METRIC_F1, EVAL_METRIC_COVERAGE_PROP, EVAL_METRIC_INTENSITY_PROP, \
    EVAL_METRIC_MS1_MS2_RATIO, EVAL_METRIC_EFFICIENCY
from experiments import preset_qcb_small, ENV_QCB_SMALL_GAUSSIAN, ENV_QCB_MEDIUM_GAUSSIAN, \
    ENV_QCB_LARGE_GAUSSIAN, ENV_QCB_SMALL_EXTRACTED, ENV_QCB_MEDIUM_EXTRACTED, \
    ENV_QCB_LARGE_EXTRACTED, preset_qcb_medium, preset_qcb_large

In [4]:
from training import train, tune

### Parameters

In [5]:
# preset = 'QCB_chems_small'
# timesteps = 1E5
# n_trials = 30
# n_eval_episodes = 10
# eval_freq = 5E4
# eval_metric = EVAL_METRIC_REWARD

In [6]:
preset = 'QCB_resimulated_medium'
timesteps = 5E5
n_trials = 1
n_eval_episodes = 5
eval_freq = 1E5
eval_metric = EVAL_METRIC_REWARD

In [7]:
tune_model = True
tune_reward = False
alpha = 0.25
beta = 0.00

In [8]:
results = os.path.join('tune', eval_metric, preset)
verbose = 0
model_name = METHOD_PPO

In [9]:
if tune_reward:
    alpha = None
    beta = None
    out_dir = os.path.abspath(os.path.join(results, 'results', model_name))
else:
    out_dir = os.path.abspath(
        os.path.join(results, 'results_alpha_%.2f_beta_%.2f' % (alpha, beta), model_name))
create_if_not_exist(out_dir)

In [10]:
presets = {
    ENV_QCB_SMALL_GAUSSIAN: {'f': preset_qcb_small, 'extract': False},
    ENV_QCB_MEDIUM_GAUSSIAN: {'f': preset_qcb_medium, 'extract': False},
    ENV_QCB_LARGE_GAUSSIAN: {'f': preset_qcb_large, 'extract': False},
    ENV_QCB_SMALL_EXTRACTED: {'f': preset_qcb_small, 'extract': True},
    ENV_QCB_MEDIUM_EXTRACTED: {'f': preset_qcb_medium, 'extract': True},
    ENV_QCB_LARGE_EXTRACTED: {'f': preset_qcb_large, 'extract': True},
}
preset_func = presets[preset]['f']
extract = presets[preset]['extract']
params, max_peaks = preset_func(model_name, alpha=alpha, beta=beta,
                                extract_chromatograms=extract)

2022-08-16 12:51:37.640 | INFO     | experiments:get_samplers:229 - Loaded /notebooks/vimms-gym/pickles/samplers_QCB_medium_extracted.p


In [None]:
# actually train the model here
if tune_model or tune_reward:
    tune(model_name, timesteps, params, max_peaks, out_dir, n_trials,
         n_eval_episodes, int(eval_freq), eval_metric,
         tune_model, tune_reward, verbose=verbose)
else:
    train(model_name, timesteps, params, max_peaks, out_dir, verbose=verbose)

2022-08-16 12:51:37.700 | INFO     | training:tune:82 - Doing 5 intermediate evaluations for pruning based on the number of timesteps. (1 evaluation every 100000 timesteps)
[32m[I 2022-08-16 12:51:37,969][0m Using an existing study with name 'PPO' instead of creating a new one.[0m
