# Debugging DQN_9

DQN_9 was the best performing model (on the F1-score) saved by Optuna optimisation. However during validation, the results are poor. This notebook troubleshoots that by re-training the model again with the best parameters. Turns out the problem is the best performing model wasn't saved correctly.

In [1]:
%matplotlib inline

%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
from os.path import exists

sys.path.append('../..')

In [3]:
from timeit import default_timer as timer
from datetime import timedelta
import copy

import numpy as np
import pandas as pd

from stable_baselines3 import DQN
from stable_baselines3.common.monitor import Monitor

from vimms.Evaluation import EvaluationData
from vimms_gym.env import DDAEnv
from vimms_gym.common import EVAL_METRIC_REWARD
from vimms_gym.evaluation import evaluate

from experiments import preset_qcb_small
from tune import TrialEvalCallback

## Define some useful methods

In [4]:
def debug_run(fname, max_peaks, params, n_eval_episodes=1, deterministic=True):
    
    custom_objects = {
        "learning_rate": 0.0,
        "lr_schedule": lambda _: 0.0,
        "clip_range": lambda _: 0.0,
    }    

    model = DQN.load(fname, custom_objects=custom_objects)
    eval_env = DDAEnv(max_peaks, params)
    print(eval_env.env_params)
    
    # wrap env in Monitor, create the trial callback
    eval_env = Monitor(eval_env)
    eval_metric = EVAL_METRIC_REWARD
    eval_callback = TrialEvalCallback(eval_env, None, eval_metric)
    env = eval_callback.eval_env

    assert eval_callback.deterministic == True
    
    # actual evaluation starts here
    episode_count = 0
    episode_count_target = n_eval_episodes
    current_reward = 0
    current_length = 0
    observations = env.reset()
    states = None
    episode_starts = np.ones((env.num_envs,), dtype=bool)
    episode_starts
    
    episode_rewards = []
    episode_eval_results = []
    episode_lengths = []
    start = timer()
    while episode_count < episode_count_target:
        actions, states = model.predict(observations, state=states,
                                        episode_start=episode_starts,
                                        deterministic=deterministic)
        observations, rewards, dones, infos = env.step(actions)
        # print(rewards, current_reward, current_length, dones)
        episode_starts = dones
        current_reward += rewards[0]
        current_length += 1

        if dones[0]:  # when done, episode would be reset automatically
            val = current_reward
            eval_res = evaluate(eval_data, format_output=False)
            episode_eval_results.append(eval_res)
            end = timer()
            print('Evaluation episode %d finished: metric %f, timedelta=%s' % (
                episode_count, val, str(timedelta(seconds=end - start))))
            start = timer()
            episode_rewards.append(val)
            episode_lengths.append(current_length)
            episode_count += 1
            current_reward = 0
            current_length = 0

        # store previous results for evaluation before 'done'
        # this needs to be here, because VecEnv is automatically reset when done
        inner_env = env.envs[0].env
        eval_data = EvaluationData(inner_env.vimms_env)
    
    return episode_rewards, episode_eval_results

In [5]:
def eval_res_to_df(rewards, eval_res):
    reward_mean = np.mean(rewards)
    reward_std = np.std(rewards)

    metric = [e['coverage_prop'] for e in eval_res]
    coverage_mean = np.mean(metric)
    coverage_std = np.std(metric)
    
    metric = [e['intensity_prop'] for e in eval_res]
    intensity_prop_mean = np.mean(metric)
    intensity_prop_std = np.std(metric)
    
    metric = [e['f1'] for e in eval_res]
    f1_mean = np.mean(metric)
    f1_std = np.std(metric)
    
    results = []
    results.append(['reward', reward_mean, reward_std])
    results.append(['coverage_prop', coverage_mean, coverage_std])
    results.append(['intensity_prop', intensity_prop_mean, intensity_prop_std])
    results.append(['f1', f1_mean, f1_std])    
    df = pd.DataFrame(results, columns=['metric', 'mean', 'std'])
    return df

## Generate environment preset

In [6]:
alpha = 0.191500954
beta = 0.030798858
extract = False
params, max_peaks = preset_qcb_small(None, alpha=alpha, beta=beta, extract_chromatograms=extract)
params, max_peaks

2022-08-23 11:41:06.267 | INFO     | experiments:get_samplers:283 - Loaded /Users/joewandy/Work/git/vimms-gym/pickles/samplers_QCB_small_gaussian.p


({'chemical_creator': {'mz_range': (100, 110),
   'rt_range': (400, 500),
   'intensity_range': (10000.0, 1e+20),
   'n_chemicals': (20, 50),
   'mz_sampler': <vimms.ChemicalSamplers.MZMLFormulaSampler at 0x7fa7cfb06700>,
   'ri_sampler': <vimms.ChemicalSamplers.MZMLRTandIntensitySampler at 0x7fa7cfb06370>,
   'cr_sampler': <vimms.ChemicalSamplers.GaussianChromatogramSampler at 0x7fa7cfb19dc0>},
  'noise': {'enable_spike_noise': True,
   'noise_density': 0.1,
   'noise_max_val': 1000.0,
   'mz_range': (100, 110)},
  'env': {'ionisation_mode': 'Positive',
   'rt_range': (400, 500),
   'isolation_window': 0.7,
   'mz_tol': 10,
   'rt_tol': 120,
   'alpha': 0.191500954,
   'beta': 0.030798858}},
 100)

In [7]:
n_eval_episodes = 10

## Load best saved model from Optuna

In [8]:
fname = os.path.join('..', 'DQN', 'DQN_9.zip') 

### Deterministic = True

In [9]:
rewards, eval_res = debug_run(fname, max_peaks, params, n_eval_episodes=n_eval_episodes, deterministic=True)
df = eval_res_to_df(rewards, eval_res)
df


This system does not have apparently enough memory to store the complete replay buffer 9.87GB > 0.35GB


The number of environments used is greater than the target network update interval (20 > 1), therefore the target network will be updated after each call to env.step() which corresponds to 20 steps.



{'ionisation_mode': 'Positive', 'rt_range': (400, 500), 'isolation_window': 0.7, 'mz_tol': 10, 'rt_tol': 120, 'alpha': 0.191500954, 'beta': 0.030798858}
Evaluation episode 0 finished: metric -499.000000, timedelta=0:00:00.322370
Evaluation episode 1 finished: metric -499.000000, timedelta=0:00:00.316084
Evaluation episode 2 finished: metric -499.000000, timedelta=0:00:00.315718
Evaluation episode 3 finished: metric -499.000000, timedelta=0:00:00.305773
Evaluation episode 4 finished: metric -499.000000, timedelta=0:00:00.374677
Evaluation episode 5 finished: metric -499.000000, timedelta=0:00:00.323484
Evaluation episode 6 finished: metric -499.000000, timedelta=0:00:00.326402
Evaluation episode 7 finished: metric -499.000000, timedelta=0:00:00.308955
Evaluation episode 8 finished: metric -499.000000, timedelta=0:00:00.311999
Evaluation episode 9 finished: metric -499.000000, timedelta=0:00:00.314115


Unnamed: 0,metric,mean,std
0,reward,-499.0,0.0
1,coverage_prop,0.0,0.0
2,intensity_prop,0.0,0.0
3,f1,0.0,0.0


### Deterministic = False

In [10]:
rewards, eval_res = debug_run(fname, max_peaks, params, n_eval_episodes=n_eval_episodes, deterministic=False)
df = eval_res_to_df(rewards, eval_res)
df

{'ionisation_mode': 'Positive', 'rt_range': (400, 500), 'isolation_window': 0.7, 'mz_tol': 10, 'rt_tol': 120, 'alpha': 0.191500954, 'beta': 0.030798858}



This system does not have apparently enough memory to store the complete replay buffer 9.87GB > 0.35GB


The number of environments used is greater than the target network update interval (20 > 1), therefore the target network will be updated after each call to env.step() which corresponds to 20 steps.



Evaluation episode 0 finished: metric -471.907695, timedelta=0:00:00.305524
Evaluation episode 1 finished: metric -469.009215, timedelta=0:00:00.317895
Evaluation episode 2 finished: metric -460.838806, timedelta=0:00:00.474006
Evaluation episode 3 finished: metric -481.639023, timedelta=0:00:00.243609
Evaluation episode 4 finished: metric -474.898733, timedelta=0:00:00.396137
Evaluation episode 5 finished: metric -479.285758, timedelta=0:00:00.351159
Evaluation episode 6 finished: metric -465.367322, timedelta=0:00:00.527462
Evaluation episode 7 finished: metric -469.887116, timedelta=0:00:00.355267
Evaluation episode 8 finished: metric -474.777023, timedelta=0:00:00.256751
Evaluation episode 9 finished: metric -481.679283, timedelta=0:00:00.235541


Unnamed: 0,metric,mean,std
0,reward,-472.928997,6.550939
1,coverage_prop,0.649381,0.180341
2,intensity_prop,0.396457,0.145935
3,f1,0.226063,0.098639


## Load first re-trained model

In [11]:
fname = os.path.join('..', 'DQN', 'DQN_9_rerun_1.zip') 

### Deterministic = True

In [12]:
rewards, eval_res = debug_run(fname, max_peaks, params, n_eval_episodes=n_eval_episodes, deterministic=True)
df = eval_res_to_df(rewards, eval_res)
df

{'ionisation_mode': 'Positive', 'rt_range': (400, 500), 'isolation_window': 0.7, 'mz_tol': 10, 'rt_tol': 120, 'alpha': 0.191500954, 'beta': 0.030798858}



This system does not have apparently enough memory to store the complete replay buffer 9.87GB > 0.34GB



Evaluation episode 0 finished: metric 125.119552, timedelta=0:00:02.810154
Evaluation episode 1 finished: metric 147.436301, timedelta=0:00:03.308261
Evaluation episode 2 finished: metric 143.023466, timedelta=0:00:01.739373
Evaluation episode 3 finished: metric 142.359745, timedelta=0:00:03.149774
Evaluation episode 4 finished: metric 150.115175, timedelta=0:00:04.013260
Evaluation episode 5 finished: metric 160.712618, timedelta=0:00:02.833915
Evaluation episode 6 finished: metric 119.778864, timedelta=0:00:02.997854
Evaluation episode 7 finished: metric 144.988713, timedelta=0:00:02.988392
Evaluation episode 8 finished: metric 113.195536, timedelta=0:00:03.152961
Evaluation episode 9 finished: metric 145.361616, timedelta=0:00:02.212373


Unnamed: 0,metric,mean,std
0,reward,139.209159,14.133361
1,coverage_prop,0.976379,0.022579
2,intensity_prop,0.849789,0.030132
3,f1,0.521972,0.079312


### Deterministic = False

In [13]:
rewards, eval_res = debug_run(fname, max_peaks, params, n_eval_episodes=n_eval_episodes, deterministic=False)
df = eval_res_to_df(rewards, eval_res)
df

{'ionisation_mode': 'Positive', 'rt_range': (400, 500), 'isolation_window': 0.7, 'mz_tol': 10, 'rt_tol': 120, 'alpha': 0.191500954, 'beta': 0.030798858}



This system does not have apparently enough memory to store the complete replay buffer 9.87GB > 0.33GB



Evaluation episode 0 finished: metric 51.047035, timedelta=0:00:01.286196
Evaluation episode 1 finished: metric 52.058043, timedelta=0:00:02.592784
Evaluation episode 2 finished: metric 43.937356, timedelta=0:00:02.109299
Evaluation episode 3 finished: metric 34.198545, timedelta=0:00:01.723674
Evaluation episode 4 finished: metric 79.437551, timedelta=0:00:02.568079
Evaluation episode 5 finished: metric 50.119367, timedelta=0:00:02.337249
Evaluation episode 6 finished: metric 46.410382, timedelta=0:00:02.432139
Evaluation episode 7 finished: metric 44.504599, timedelta=0:00:04.275583
Evaluation episode 8 finished: metric 50.312273, timedelta=0:00:02.385027
Evaluation episode 9 finished: metric 50.224249, timedelta=0:00:02.250241


Unnamed: 0,metric,mean,std
0,reward,50.22494,10.959397
1,coverage_prop,0.954365,0.033858
2,intensity_prop,0.828065,0.048414
3,f1,0.541052,0.106116


## Load second re-trained model

In [14]:
fname = os.path.join('..', 'DQN', 'DQN_9_rerun_2.zip') 

### Deterministic = True

In [15]:
rewards, eval_res = debug_run(fname, max_peaks, params, n_eval_episodes=n_eval_episodes, deterministic=True)
df = eval_res_to_df(rewards, eval_res)
df


This system does not have apparently enough memory to store the complete replay buffer 9.87GB > 0.33GB



{'ionisation_mode': 'Positive', 'rt_range': (400, 500), 'isolation_window': 0.7, 'mz_tol': 10, 'rt_tol': 120, 'alpha': 0.191500954, 'beta': 0.030798858}
Evaluation episode 0 finished: metric 132.219249, timedelta=0:00:04.016300
Evaluation episode 1 finished: metric 134.872074, timedelta=0:00:02.738245
Evaluation episode 2 finished: metric 130.427843, timedelta=0:00:02.071264
Evaluation episode 3 finished: metric 126.086919, timedelta=0:00:03.131886
Evaluation episode 4 finished: metric 136.770174, timedelta=0:00:02.624029
Evaluation episode 5 finished: metric 116.921459, timedelta=0:00:02.083195
Evaluation episode 6 finished: metric 124.990844, timedelta=0:00:03.497837
Evaluation episode 7 finished: metric 109.730944, timedelta=0:00:02.914869
Evaluation episode 8 finished: metric 119.027010, timedelta=0:00:01.894763
Evaluation episode 9 finished: metric 108.694060, timedelta=0:00:01.202462


Unnamed: 0,metric,mean,std
0,reward,123.974058,9.529937
1,coverage_prop,0.939156,0.039377
2,intensity_prop,0.799242,0.058815
3,f1,0.46665,0.08042


### Deterministic = False

In [16]:
rewards, eval_res = debug_run(fname, max_peaks, params, n_eval_episodes=n_eval_episodes, deterministic=False)
df = eval_res_to_df(rewards, eval_res)
df


This system does not have apparently enough memory to store the complete replay buffer 9.87GB > 0.32GB



{'ionisation_mode': 'Positive', 'rt_range': (400, 500), 'isolation_window': 0.7, 'mz_tol': 10, 'rt_tol': 120, 'alpha': 0.191500954, 'beta': 0.030798858}
Evaluation episode 0 finished: metric 31.888336, timedelta=0:00:01.581181
Evaluation episode 1 finished: metric 23.143872, timedelta=0:00:01.509766
Evaluation episode 2 finished: metric 36.427806, timedelta=0:00:02.155045
Evaluation episode 3 finished: metric 36.680873, timedelta=0:00:01.426064
Evaluation episode 4 finished: metric 14.809011, timedelta=0:00:01.454375
Evaluation episode 5 finished: metric 25.413376, timedelta=0:00:01.590618
Evaluation episode 6 finished: metric 53.616078, timedelta=0:00:02.351283
Evaluation episode 7 finished: metric 50.523153, timedelta=0:00:02.262978
Evaluation episode 8 finished: metric 50.846056, timedelta=0:00:02.531238
Evaluation episode 9 finished: metric 50.139359, timedelta=0:00:02.055096


Unnamed: 0,metric,mean,std
0,reward,37.348792,12.912589
1,coverage_prop,0.952008,0.041648
2,intensity_prop,0.795199,0.062697
3,f1,0.490124,0.067185
