# Training and parameter optimisation notebook

In [1]:
%matplotlib inline

%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys

sys.path.append('.')

In [None]:
from vimms.Common import create_if_not_exist, save_obj
from vimms_gym.common import METHOD_PPO, METHOD_PPO_RECURRENT, METHOD_DQN, ALPHA, BETA, EVAL_METRIC_REWARD, \
    EVAL_METRIC_F1, EVAL_METRIC_COVERAGE_PROP, EVAL_METRIC_INTENSITY_PROP, \
    EVAL_METRIC_MS1_MS2_RATIO, EVAL_METRIC_EFFICIENCY
from experiments import preset_qcb_small, ENV_QCB_SMALL_GAUSSIAN, ENV_QCB_MEDIUM_GAUSSIAN, \
    ENV_QCB_LARGE_GAUSSIAN, ENV_QCB_SMALL_EXTRACTED, ENV_QCB_MEDIUM_EXTRACTED, \
    ENV_QCB_LARGE_EXTRACTED, preset_qcb_medium, preset_qcb_large

In [None]:
from training import train, tune

### Parameters

In [None]:
# preset = 'QCB_chems_small'
# timesteps = 1E5
# n_trials = 30
# n_eval_episodes = 10
# eval_freq = 5E4
# eval_metric = EVAL_METRIC_REWARD

In [3]:
preset = 'QCB_resimulated_medium'
timesteps = 5E5
n_trials = 10
n_eval_episodes = 5
eval_freq = 1E5
eval_metric = EVAL_METRIC_REWARD

In [5]:
tune_model = True
tune_reward = False
alpha = 0.25
beta = 0.00

In [6]:
results = os.path.join('tune', preset)
verbose = 0
model_name = METHOD_PPO_RECURRENT

In [8]:
if tune_reward:
    alpha = None
    beta = None
    out_dir = 'metric_%s_timesteps_%d' % (eval_metric, timesteps)
else:
    out_dir = 'metric_%s_timesteps_%d_alpha_%.2f_beta_%.2f' % (eval_metric, timesteps, alpha, beta)

out_dir = os.path.join(results, out_dir, model_name)    
out_dir = os.path.abspath(out_dir)
out_dir
create_if_not_exist(out_dir)

'/notebooks/vimms-gym/tune/QCB_resimulated_medium/metric_reward_timesteps_500000_alpha_0.25_beta_0.00/RecurrentPPO'

In [None]:
presets = {
    ENV_QCB_SMALL_GAUSSIAN: {'f': preset_qcb_small, 'extract': False},
    ENV_QCB_MEDIUM_GAUSSIAN: {'f': preset_qcb_medium, 'extract': False},
    ENV_QCB_LARGE_GAUSSIAN: {'f': preset_qcb_large, 'extract': False},
    ENV_QCB_SMALL_EXTRACTED: {'f': preset_qcb_small, 'extract': True},
    ENV_QCB_MEDIUM_EXTRACTED: {'f': preset_qcb_medium, 'extract': True},
    ENV_QCB_LARGE_EXTRACTED: {'f': preset_qcb_large, 'extract': True},
}
preset_func = presets[preset]['f']
extract = presets[preset]['extract']
params, max_peaks = preset_func(model_name, alpha=alpha, beta=beta,
                                extract_chromatograms=extract)

In [None]:
# actually train the model here
if tune_model or tune_reward:
    tune(model_name, timesteps, params, max_peaks, out_dir, n_trials,
         n_eval_episodes, int(eval_freq), eval_metric,
         tune_model, tune_reward, verbose=verbose)
else:
    train(model_name, timesteps, params, max_peaks, out_dir, verbose=verbose)