# Select Models

This notebook is for training model selection based on a policy search.

In [2]:
import deepQLearningSimple as dql
import gym
from keras.models import load_model 
from gym.envs.classic_control.mountain_car import MountainCarEnv
from gym.wrappers.time_limit import TimeLimit

import json


Using TensorFlow backend.


# Test Model: Version Simple_V1
***

There are 3 models to be tested for their performence in terms of win rate, steps and avg action values for 1000 games:

- end-of-run-model  / episode 1000
- max Q-value model / episode 975   
- min stepsize / episode 318

In [16]:
history_store = {}

for model in ["end_of_run_model.h5", "success_model_episode_975.h5", "success_model_episode_318.h5", "success_model_episode_869.h5"]:
    pathImp = "data/model/version_simple_v1/"
    action_model = load_model(pathImp + model)
    
    # load new agent
    env = dql.patientMountainCar()
    agent = dql.agent(env  = env, training = False, render = False)

    # Implement the action model
    agent.action_dqn.dqn = action_model

    # Run
    agent.run(num_episode = 1000, num_steps = 500)
    env.close()
    
    # Store history
    history_store[model] = agent.writer_history.history

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
 Game :: 1 Wins :: 1 Steps :: 210 Reward -64.136069 Mean Q Value :: -10.111550 Max position 0.502272 
 Game :: 2 Wins :: 2 Steps :: 290 Reward -91.721551 Mean Q Value :: -8.208216 Max position 0.514294 
 Game :: 3 Wins :: 3 Steps :: 273 Reward -98.777166 Mean Q Value :: -10.490991 Max position 0.503865 
 Game :: 4 Wins :: 4 Steps :: 208 Reward -62.684050 Mean Q Value :: -12.017440 Max position 0.500173 
 Game :: 5 Wins :: 5 Steps :: 286 Reward -94.863659 Mean Q Value :: -8.194080 Max position 0.528087 
 Game :: 6 Wins :: 6 Steps :: 469 Reward -177.245651 Mean Q Value :: -16.893852 Max position 0.536858 
 Game :: 7 Wins :: 7 Steps :: 306 Reward -82.376707 Mean Q Value :: -6.946870 Max position 0.501268 
 Game :: 8 Wins :: 7 Steps :: 500 Reward -190.764531 Mean Q Value :: -21.053814 Max position 0.285637 
 Game :: 9 Wins :: 7 Steps :: 500 Reward -193.559996 Mean Q Value :: -24.3533

In [18]:
# Results Printer
def resultPrinter(history, model_name):
    import numpy as np
    #print("="*50)
    print("Model:: %s" % model_name)
    print("Test horizon :: 1000")
    print("Average Q-Value :: %s" % np.mean(history["mean_q_values"]))
    print("Average Reward :: %s" % np.mean(history["reward"]))
    print("Average Stepsize :: %s" % np.mean(history["steps"]))
    print("Win Rate :: %s" % (max(history["cum_win"]) / 1000) ) 
    print("="*50)

for model in ["end_of_run_model.h5", "success_model_episode_975.h5", "success_model_episode_318.h5", "success_model_episode_869.h5"]:
    resultPrinter(history_store[model], model)

Model:: end_of_run_model.h5
Test horizon :: 1000
Average Q-Value :: -10.23952720928192
Average Reward :: -88.005
Average Stepsize :: 265.697
Win Rate :: 0.942
Model:: success_model_episode_975.h5
Test horizon :: 1000
Average Q-Value :: -3.485122898124158
Average Reward :: -56.793
Average Stepsize :: 165.024
Win Rate :: 1.0
Model:: success_model_episode_318.h5
Test horizon :: 1000
Average Q-Value :: -8.571254022836685
Average Reward :: -34.134
Average Stepsize :: 117.632
Win Rate :: 1.0
Model:: success_model_episode_869.h5
Test horizon :: 1000
Average Q-Value :: -9.02769183731079
Average Reward :: -42.814
Average Stepsize :: 129.911
Win Rate :: 1.0
