# DQN


### Training of DQN in Tensorflow
--------------------------

In [1]:
#
from util import *
from dlg_manager import *
from alg import *
from agent import *
from user_sim import *
from state_tracker import *
import random
from config import *
import matplotlib.pyplot as plt
import numpy as np
from nlg import *
from six.moves import cPickle as pickle
import IPython
import copy, argparse, json

%matplotlib inline
%load_ext autoreload
%autoreload 2

because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.



# Load Data
-------------------

#### Action Set

In [2]:
#
act_set_path = './data/dia_acts.txt'
act_set = text_to_dict(act_set_path)
sample_dict(act_set)

keys = 11
Sample of dict:
- greeting: 4
- multiple_choice: 6
- closing: 5
- confirm_answer: 3
- deny: 9


### slot set

In [3]:
#
slots_set_path = "./data/slot_set.txt"
slot_set = text_to_dict(slots_set_path)
sample_dict(slot_set)

keys = 29
Sample of dict:
- state: 21
- closing: 3
- distanceconstraints: 7
- moviename: 12
- actress: 1


### movie dic: info about movie

In [4]:
#
movie_kb_path = "./data/movie_kb.1k.p"
movie_kb = pickle.load(open(movie_kb_path, 'rb'), encoding="latin")
sample_dict(movie_kb, sample_size=1)

keys = 991
Sample of dict:
- 173: {'city': 'Sacramento', 'theater': 'REGAL NATOMAS MARKETPLACE STADIUM 16 & RPX', 'distanceconstraints': 'close to 95833', 'video_format': 'IMAX', 'state': 'ca', 'starttime': 'morning', 'date': 'tomorrow', 'moviename': 'zoology'}


# Language Generator (pretrained)
-------------------

In [5]:
#
nlg_model_path = './data/trained_model/nlg/lstm_tanh_relu_[1468202263.38]_2_0.610.p'
nlg_model = Nlg()
nlg_model.load_nlg_model(nlg_model_path)
diaact_nl_pairs_path = "./data/nlg/dia_act_nl_pairs.v6.json"
nlg_model.load_predefine_act_nl_pairs(diaact_nl_pairs_path)

## Model Params

In [6]:
#
model_params = pickle.load(open(nlg_model_path, 'rb'), encoding='latin1')
params = model_params['params']
params['batch_size'] = 16
batch_size = 16
save_check_point = 20
params['trained_model_path'] = None
for k in params:
    print("{}: {}".format(k, params[k]))

grad_clip: -0.0001
dia_slot_val: 2
reg_cost: 0.001
data_path: .\data\movieMultiLine.Annot.Corrected.Final.v3.csv
save_check_point: 20
slot_rep: 1
max_epochs: 200
sdgtype: rmsprop
init_rnn: 0
cv_fold: 6
write_model_dir: .\checkpoints\template\07102016\
valid_test: 0
pretrained_model_path: None
check_point: 20
decay_rate: 0.999
feed_recurrence: 0
hidden_size: 100
activation_func: relu
momentum: 0.1
learning_rate: 0.001
batch_size: 16
act_set: data/dia_acts.txt
smooth_eps: 1e-08
split_method: 1
slot_set: data/slot_set.txt
eva_metric: 2
model: lstm_tanh
trained_model_path: None


# User Simulator
-------------------

### goal

In [7]:
#
goal_file_path = './data/user_goals_first_turn_template.part.movie.v1.p'
all_goal_set = pickle.load(open(goal_file_path, 'rb'), encoding="latin")
print("goals length: {}".format(len(all_goal_set)))
print("Sample the first goal: \n{}".format(all_goal_set[0]))

goals length: 128
Sample the first goal: 
{'request_slots': {}, 'diaact': 'request', 'inform_slots': {'city': 'birmingham', 'numberofpeople': '1', 'theater': 'carmike summit 16', 'state': 'al', 'starttime': 'around 2pm', 'date': 'today', 'moviename': 'zootopia'}}


### Split goal set

In [8]:
# split goal set
split_fold = params.get('split_fold', 5)
goal_set = {'train':[], 'valid':[], 'test':[], 'all':[]}
for u_goal_id, u_goal in enumerate(all_goal_set):
    if u_goal_id % split_fold == 1: goal_set['test'].append(u_goal)
    else: goal_set['train'].append(u_goal)
    goal_set['all'].append(u_goal)
print(len(goal_set['train']))
print(len(goal_set['valid']))
print(len(goal_set['test']))
print(len(goal_set['all']))

102
0
26
128


### user simulator param

In [9]:
#
usersim_params = {}
usersim_params['max_turn'] = 40
usersim_params['slot_err_prob'] = 0.00
# slot_err_mode: 0 for slot_val only; 1 for three errs
usersim_params['slot_err_mode'] = 0
usersim_params['intent_err_prob'] = 0
# run_mode: 0 for default NL; 1 for dia_act; 2 for both
usersim_params['run_mode'] = 0
# 0 for dia_act level; 1 for NL level
usersim_params['act_level'] = 0
# train/test/all; default is all
usersim_params['learn_phase'] = 'all'

### a movie dictionary for user simulator - slot:possible values

In [10]:
#
movie_dict_path = './data/user/dicts.v3.p'
movie_dictionary = pickle.load(open(movie_dict_path, 'rb'), encoding="latin")
samples = sample_dict(movie_dictionary, sample_size=1)

keys = 20
Sample of dict:
- numberofkids: ['two', '2', '1', 'no']


###  Create a User

In [11]:
user = RuleSimulator(movie_dictionary, act_set, slot_set, goal_set, usersim_params)
user.set_nlg_model(nlg_model)

# Agent
-------------------

## param

In [22]:
#
agent_params = {}
# maximum length of each dialog (default=20, 0=no maximum length)
agent_params['max_turn'] = 40
# Epsilon to determine stochasticity of epsilon-greedy agent policies
agent_params['epsilon'] = 0
# run_mode: 0 for default NL; 1 for dia_act; 2 for both
agent_params['agent_run_mode'] = 3
# 0 for dia_act level; 1 for NL level
agent_params['agent_act_level'] = 0

############### DQN #################
# the size for experience replay
agent_params['experience_replay_pool_size'] = 10000
# # the hidden size for DQN
agent_params['dqn_hidden_size'] = 60
agent_params['batch_size'] = 16
# # gamma for DQN
agent_params['gamma'] = 0.9
# # predict model for DQN
agent_params['predict_mode'] = True
agent_params['trained_model_path'] = params['pretrained_model_path']
#####################################
print("pretrained model path = {}".format(agent_params['trained_model_path']))
# 0: no warm start; 1: warm start for training
agent_params['warm_start'] = 1
# run_mode: 0 for NL; 1 for dia_act
agent_params['cmd_input_mode'] = 0

success_rate_threshold = 0.3

pretrained model path = None


### create an agent

In [31]:
# agent = RequestBasicsAgent(movie_kb, act_set, slot_set, agent_params)
agent = AgentDQN(movie_kb, act_set, slot_set, agent_params)
agt = 9
# agent = DQNAgentTF(movie_kb, act_set, slot_set, agent_params)
# agt = 10
agent.set_nlg_model(nlg_model)

agent_run_mode:3
scale_factor: 0.13443321448446624
scale_factor: 0.2413553960127389


# Dialog Manager
-------------------

In [32]:
dlg_manager = DlgManager(agent, user, act_set, slot_set, movie_kb)

## Running Episodes
-------------------

### Param

In [33]:
#
status = {'successes': 0, 'count': 0, 'cumulative_reward': 0}
# the size of validation set
simulation_epoch_size = 100
# the number of epochs for warm start 
warm_start_epochs = 120
num_episodes = 100

In [34]:
""" Warm_Start Simulation (by Rule Policy) """
def warm_start_simulation():
    successes = 0
    cumulative_reward = 0
    cumulative_turns = 0
    
    res = {}
    for episode in range(warm_start_epochs):
        dlg_manager.init_episode()
        episode_over = False
        while(not episode_over):
            episode_over, reward = dlg_manager.step()
            cumulative_reward += reward
            if episode_over:
                if reward > 0: 
                    successes += 1
#                     print ("warm_start simulation episode %s: Success" % (episode))
#                 else: print ("warm_start simulation episode %s: Fail" % (episode))
                cumulative_turns += dlg_manager.state_tracker.turn_count
        
        if len(agent.experience_replay_pool) >= agent.experience_replay_pool_size:
            break
    
    agent.warm_start = 2
    res['success_rate'] = float(successes)/simulation_epoch_size
    res['ave_reward'] = float(cumulative_reward)/simulation_epoch_size
    res['ave_turns'] = float(cumulative_turns)/simulation_epoch_size
    print ("Warm_Start %s epochs, success rate %s, ave reward [%s], ave turns %s" % (episode+1, res['success_rate'], res['ave_reward'], res['ave_turns']))
    print ("Current experience replay buffer size %s" % (len(agent.experience_replay_pool)))

In [35]:
def simulation_epoch(simulation_epoch_size):
    successes = 0
    cumulative_reward = 0
    cumulative_turns = 0
    
    res = {}
    for episode in range(simulation_epoch_size):
        dlg_manager.init_episode()
        episode_over = False
        while(not episode_over):
            episode_over, reward = dlg_manager.step()
            cumulative_reward += reward
            if episode_over:
                if reward > 0: 
                    successes += 1
#                     print ("simulation episode %s: Success" % (episode))
#                 else: print ("simulation episode %s: Fail" % (episode))
                cumulative_turns += dlg_manager.state_tracker.turn_count
    
    res['success_rate'] = float(successes)/simulation_epoch_size
    res['ave_reward'] = float(cumulative_reward)/simulation_epoch_size
    res['ave_turns'] = float(cumulative_turns)/simulation_epoch_size
    print("simulation success rate %s, ave reward [%s], ave turns %s" % (res['success_rate'], res['ave_reward'], res['ave_turns']))
    return res

In [36]:
def run_episodes(count, status):
    successes = 0
    cumulative_reward = 0
    cumulative_turns = 0
    
    
    if agt >= 9 and params['trained_model_path'] == None and agent.warm_start == 1:
        print ('warm_start starting ...')
        warm_start_simulation()
        print ('warm_start finished, start RL training ...')
    
    for episode in range(count):
        print ("----------------- Episode: %s ----------------- " % (episode))
        dlg_manager.init_episode()
        episode_over = False
        
        while(not episode_over):
            episode_over, reward = dlg_manager.step()
            cumulative_reward += reward
                
            if episode_over:
                if reward > 0:
                    print ("Successful Dialog!")
                    successes += 1
#                 else: print ("Failed Dialog!")
                
                cumulative_turns += dlg_manager.state_tracker.turn_count
        
        # simulation
        if agt >= 9 and params['trained_model_path'] == None:
            agent.predict_mode = True
            simulation_res = simulation_epoch(simulation_epoch_size)
            
            performance_records['success_rate'][episode] = simulation_res['success_rate']
            performance_records['ave_turns'][episode] = simulation_res['ave_turns']
            performance_records['ave_reward'][episode] = simulation_res['ave_reward']
            
            if simulation_res['success_rate'] >= best_res['success_rate']:
                if simulation_res['success_rate'] >= success_rate_threshold: # threshold = 0.30
                    agent.experience_replay_pool = [] 
                    simulation_epoch(simulation_epoch_size)
                
#             if simulation_res['success_rate'] > best_res['success_rate']:
#                 best_model['model'] = copy.deepcopy(agent)
#                 best_res['success_rate'] = simulation_res['success_rate']
#                 best_res['ave_reward'] = simulation_res['ave_reward']
#                 best_res['ave_turns'] = simulation_res['ave_turns']
#                 best_res['epoch'] = episode
                
            loss = agent.train(batch_size, 1)
            if agt == 10: agent.model.update_target_params()
            else: agent.clone_dqn = copy.deepcopy(agent.dqn)
                
            agent.predict_mode = False
            
            print ("Simulation success rate %s, Ave reward [%s], Ave turns %s, Best success rate %s" % (performance_records['success_rate'][episode], performance_records['ave_reward'][episode], performance_records['ave_turns'][episode], best_res['success_rate']))
#             if episode % save_check_point == 0 and params['trained_model_path'] == None: # save the model every 10 episodes
#                 save_model(params['write_model_dir'], agt, best_res['success_rate'], best_model['model'], best_res['epoch'], episode)
#                 save_performance_records(params['write_model_dir'], agt, performance_records)
        curve.append(successes/(episode+1))
        losses.append(loss)
        print("Progress: %s / %s, Success rate: %s / %s Avg reward: [%.2f] Avg turns: %.2f" % (episode+1, count, successes, episode+1, float(cumulative_reward)/(episode+1), float(cumulative_turns)/(episode+1)))
    print("Success rate: %s / %s Avg reward: [%.2f] Avg turns: %.2f" % (successes, count, float(cumulative_reward)/count, float(cumulative_turns)/count))
    status['successes'] += successes
    status['count'] += count
    
#     if agt == 9 and params['traained_model_path'] == None:
#         save_model(params['write_model_dir'], agt, float(successes)/count, best_model['model'], best_res['epoch'], count)
#         save_performance_records(params['write_model_dir'], agt, performance_records)


## Run a Warm Start

# Train & Eval
-------------------

In [37]:
# def train_agent(agent = agent, num_episode = 1000, num_batches = 100, batch_size = 10, status=status, eval_every = 10, show_every = 100):
#     succ_rates = []
#     epochs = list(range(eval_every, num_episode+1, eval_every))
#     print(epochs)
#     curr_succ_rate = None
#     curr_loss      = None
#     for n_epo in epochs:
#         print("-=-=-=-=-=-= Running: {} - Success = {} | Loss = {} -=-=-=-=-=-=".format(n_epo, curr_succ_rate, curr_loss))
#         # collect experience
#         run_episodes(agent, num_episode=eval_every, status=status, record_training_data = True, is_training = True)
#         # train
#         curr_loss = agent.train(batch_size=batch_size, num_batches=num_batches, show_every = show_every)
#         # eval
#         curr_succ_rate = run_episodes(agent, num_episode=eval_every, status=status, record_training_data = False)
#         display.HTML('<h3>{}</h3> '.format("Success Rate: {}".format(curr_succ_rate)))
#         succ_rates.append(curr_succ_rate)
        
#     return {"x": epochs, 'success_rate': succ_rates}

In [38]:
performance_records = {}
performance_records['success_rate'] = {}
performance_records['ave_turns'] = {}
performance_records['ave_reward'] = {}

best_model = {}
best_res = {'success_rate': 0, 'ave_reward':float('-inf'), 'ave_turns': float('inf'), 'epoch':0}

curve = []
losses = []
agent.warm_start = 1
run_episodes(20, status)

warm_start starting ...
Warm_Start 120 epochs, success rate 0.46, ave reward [-1.2], ave turns 19.2
Current experience replay buffer size 960
warm_start finished, start RL training ...
----------------- Episode: 0 ----------------- 
simulation success rate 0.0, ave reward [-59.53], ave turns 41.06
Train on : 3034
wh= 9.81611514668
wd= 0.700901582464
0.001
0.001
wh= 9.90948203528
wd= 0.831934883822
0.001
0.001
wh= 9.94062631059
wd= 0.959800272213
0.001
0.001
wh= 9.98283126614
wd= 1.0633320218
0.001
0.001
wh= 9.9804507885
wd= 1.13334809598
0.001
0.001
wh= 10.0594892295
wd= 1.22699909534
0.001
0.001
wh= 10.0577486997
wd= 1.21445083276
0.001
0.001
wh= 10.0787211919
wd= 1.2232019311
0.001
0.001
wh= 10.1839424671
wd= 1.31308126091
0.001
0.001
wh= 10.2076738025
wd= 1.33005123544
0.001
0.001
wh= 10.2162544829
wd= 1.31913793835
0.001
0.001
wh= 10.2949394643
wd= 1.34595216765
0.001
0.001
wh= 10.2953241978
wd= 1.33764534997
0.001
0.001
wh= 10.2951861976
wd= 1.36400231634
0.001
0.001
wh= 10.305772

0.001
0.001
wh= 15.7059475818
wd= 3.19093633047
0.001
0.001
wh= 15.7062947737
wd= 3.19270201142
0.001
0.001
wh= 15.7153468462
wd= 3.19944276423
0.001
0.001
wh= 15.7448598378
wd= 3.20830672973
0.001
0.001
wh= 15.7631371567
wd= 3.22089550183
0.001
0.001
wh= 15.8127827944
wd= 3.23995959033
0.001
0.001
wh= 15.8283087466
wd= 3.24425773461
0.001
0.001
wh= 15.8733736101
wd= 3.25251611201
0.001
0.001
wh= 15.9192780508
wd= 3.27287050002
0.001
0.001
wh= 15.9425637888
wd= 3.27747359638
0.001
0.001
wh= 15.9684231332
wd= 3.29210373765
0.001
0.001
wh= 16.0157007481
wd= 3.30442275234
0.001
0.001
wh= 16.0375253036
wd= 3.31448252823
0.001
0.001
wh= 16.0743865091
wd= 3.32235958435
0.001
0.001
wh= 16.1074065131
wd= 3.33136207513
0.001
0.001
wh= 16.125574259
wd= 3.34875359117
0.001
0.001
cur bellman err [9.8375], experience replay pool 3034
Simulation success rate 0.0, Ave reward [-59.53], Ave turns 41.06, Best success rate 0
Progress: 1 / 20, Success rate: 0 / 1 Avg reward: [-60.00] Avg turns: 42.00
----

0.001
0.001
wh= 19.7664338137
wd= 5.2276394906
0.001
0.001
wh= 19.7884295313
wd= 5.24012754861
0.001
0.001
wh= 19.8054211731
wd= 5.24592719958
0.001
0.001
wh= 19.8173850041
wd= 5.24941269521
0.001
0.001
wh= 19.8207053137
wd= 5.26375770376
0.001
0.001
wh= 19.8226568822
wd= 5.27074540836
0.001
0.001
wh= 19.8209693436
wd= 5.27519688908
0.001
0.001
wh= 19.9020003342
wd= 5.28960803686
0.001
0.001
wh= 19.9031977993
wd= 5.28955814077
0.001
0.001
wh= 19.9013193206
wd= 5.28923763302
0.001
0.001
wh= 19.8980742319
wd= 5.28856612563
0.001
0.001
wh= 19.9038755913
wd= 5.28780322007
0.001
0.001
wh= 19.9257943632
wd= 5.29720287953
0.001
0.001
wh= 19.937383577
wd= 5.30363667407
0.001
0.001
wh= 19.961399375
wd= 5.31468269697
0.001
0.001
wh= 19.9795547162
wd= 5.32263601261
0.001
0.001
wh= 19.9769292083
wd= 5.32675321936
0.001
0.001
wh= 19.9920920799
wd= 5.33157930303
0.001
0.001
wh= 19.993356557
wd= 5.34026556003
0.001
0.001
wh= 20.0247601557
wd= 5.34229681367
0.001
0.001
wh= 20.0264873858
wd= 5.34966409

0.001
0.001
wh= 21.4339186402
wd= 6.10627608798
0.001
0.001
wh= 21.4428301133
wd= 6.11593155236
0.001
0.001
wh= 21.4558230049
wd= 6.11920277036
0.001
0.001
wh= 21.4628768196
wd= 6.1190443464
0.001
0.001
wh= 21.4688621387
wd= 6.12431146654
0.001
0.001
wh= 21.4718844424
wd= 6.12447533238
0.001
0.001
wh= 21.4763489584
wd= 6.12620608353
0.001
0.001
wh= 21.475342551
wd= 6.12304140185
0.001
0.001
wh= 21.4988702935
wd= 6.13614242065
0.001
0.001
wh= 21.5033204254
wd= 6.14519791411
0.001
0.001
wh= 21.509993669
wd= 6.15483885081
0.001
0.001
wh= 21.5203313382
wd= 6.15813959969
0.001
0.001
wh= 21.5188925204
wd= 6.15487702244
0.001
0.001
wh= 21.5250618843
wd= 6.16842201778
0.001
0.001
wh= 21.5427321802
wd= 6.18493493498
0.001
0.001
wh= 21.5595159314
wd= 6.18887331253
0.001
0.001
wh= 21.5777011397
wd= 6.19301123226
0.001
0.001
wh= 21.5959819847
wd= 6.19724560699
0.001
0.001
wh= 21.6178944819
wd= 6.20425661629
0.001
0.001
wh= 21.6247418949
wd= 6.20477357221
0.001
0.001
wh= 21.6265903887
wd= 6.2087129

wh= 23.2301907973
wd= 6.84943350257
0.001
0.001
wh= 23.2357635963
wd= 6.85201167629
0.001
0.001
wh= 23.2477479653
wd= 6.85660907051
0.001
0.001
wh= 23.2602540075
wd= 6.85921996334
0.001
0.001
wh= 23.2661275485
wd= 6.85414111138
0.001
0.001
wh= 23.2782062179
wd= 6.85873940456
0.001
0.001
wh= 23.3009184306
wd= 6.86570020479
0.001
0.001
wh= 23.3091319193
wd= 6.871146617
0.001
0.001
wh= 23.3150142931
wd= 6.87238058866
0.001
0.001
wh= 23.3229554985
wd= 6.87448396834
0.001
0.001
wh= 23.3256642288
wd= 6.87954076224
0.001
0.001
wh= 23.3362243288
wd= 6.88429644942
0.001
0.001
wh= 23.3581075729
wd= 6.89044984817
0.001
0.001
wh= 23.3553613358
wd= 6.89277454644
0.001
0.001
wh= 23.3651302128
wd= 6.8943564284
0.001
0.001
wh= 23.3684981111
wd= 6.89387345327
0.001
0.001
wh= 23.369006017
wd= 6.89918675628
0.001
0.001
wh= 23.3689723797
wd= 6.8989610184
0.001
0.001
wh= 23.3788779984
wd= 6.90064765615
0.001
0.001
wh= 23.3837890803
wd= 6.90341879698
0.001
0.001
wh= 23.392570482
wd= 6.90466530495
0.001
0.00

simulation success rate 0.0, ave reward [-56.72], ave turns 35.44
Train on : 8764
wh= 24.8898959257
wd= 7.47214129587
0.001
0.001
wh= 24.9038995893
wd= 7.48089611679
0.001
0.001
wh= 24.9314308103
wd= 7.49315655116
0.001
0.001
wh= 24.9445230753
wd= 7.48924262332
0.001
0.001
wh= 24.9528123069
wd= 7.49126981298
0.001
0.001
wh= 24.9665090406
wd= 7.50507371572
0.001
0.001
wh= 24.9822881903
wd= 7.51140714184
0.001
0.001
wh= 24.9847943198
wd= 7.51128064736
0.001
0.001
wh= 25.0043833579
wd= 7.51722639428
0.001
0.001
wh= 25.0130279234
wd= 7.52335568324
0.001
0.001
wh= 25.029898117
wd= 7.52370419123
0.001
0.001
wh= 25.0465522911
wd= 7.52801620638
0.001
0.001
wh= 25.0501880581
wd= 7.54397844965
0.001
0.001
wh= 25.0617534484
wd= 7.55655847401
0.001
0.001
wh= 25.0735832379
wd= 7.56701173259
0.001
0.001
wh= 25.0922271119
wd= 7.57469031365
0.001
0.001
wh= 25.1040690927
wd= 7.58196338001
0.001
0.001
wh= 25.1352772416
wd= 7.59203363807
0.001
0.001
wh= 25.1531689344
wd= 7.59637496959
0.001
0.001
wh= 25.

wd= 8.37795907175
0.001
0.001
wh= 26.8566627204
wd= 8.3790487057
0.001
0.001
wh= 26.8586607168
wd= 8.38222176805
0.001
0.001
wh= 26.8734338433
wd= 8.38925363814
0.001
0.001
wh= 26.8703385605
wd= 8.39083950777
0.001
0.001
wh= 26.8754132558
wd= 8.39363583129
0.001
0.001
wh= 26.8842180127
wd= 8.40069401801
0.001
0.001
wh= 26.9191732381
wd= 8.40206771374
0.001
0.001
wh= 26.9158735988
wd= 8.40760958595
0.001
0.001
wh= 26.9351033836
wd= 8.40860342823
0.001
0.001
wh= 26.9477850752
wd= 8.41268706633
0.001
0.001
wh= 26.9546780254
wd= 8.41437971652
0.001
0.001
wh= 26.9809884664
wd= 8.42168148779
0.001
0.001
wh= 26.9851313649
wd= 8.4264622645
0.001
0.001
wh= 26.9905334845
wd= 8.42682281344
0.001
0.001
wh= 27.0009455527
wd= 8.43324123014
0.001
0.001
wh= 27.0092150365
wd= 8.44530999849
0.001
0.001
wh= 27.0147066882
wd= 8.44932440469
0.001
0.001
wh= 27.0269283986
wd= 8.45479903428
0.001
0.001
wh= 27.041683963
wd= 8.4599128872
0.001
0.001
wh= 27.0602556694
wd= 8.4617636469
0.001
0.001
wh= 27.07088930

wh= 28.05729308
wd= 8.83686248836
0.001
0.001
wh= 28.0699297258
wd= 8.84016955036
0.001
0.001
wh= 28.0708473195
wd= 8.83903031065
0.001
0.001
wh= 28.0693816999
wd= 8.83988618834
0.001
0.001
wh= 28.0746275915
wd= 8.8409547044
0.001
0.001
wh= 28.0762477979
wd= 8.84092050326
0.001
0.001
wh= 28.0824201033
wd= 8.84240157085
0.001
0.001
wh= 28.0823755786
wd= 8.84272669908
0.001
0.001
wh= 28.0840533967
wd= 8.84314833667
0.001
0.001
wh= 28.0895323072
wd= 8.84487661236
0.001
0.001
wh= 28.0916466932
wd= 8.84548229998
0.001
0.001
wh= 28.0927511621
wd= 8.84773661195
0.001
0.001
wh= 28.0963662589
wd= 8.84918881529
0.001
0.001
wh= 28.0991347715
wd= 8.84987321524
0.001
0.001
wh= 28.1002520347
wd= 8.85021392123
0.001
0.001
wh= 28.1083299264
wd= 8.85555779592
0.001
0.001
wh= 28.1136667286
wd= 8.85554114259
0.001
0.001
wh= 28.1109439438
wd= 8.85334163452
0.001
0.001
wh= 28.1130107635
wd= 8.85379644042
0.001
0.001
wh= 28.0930492641
wd= 8.85028226976
0.001
0.001
wh= 28.0991019303
wd= 8.85010262088
0.001
0

0.001
0.001
wh= 29.2952940272
wd= 9.27865664921
0.001
0.001
wh= 29.2958390834
wd= 9.28181208224
0.001
0.001
wh= 29.3079286588
wd= 9.28438486085
0.001
0.001
wh= 29.3143441345
wd= 9.28667730898
0.001
0.001
wh= 29.3126683783
wd= 9.28514608291
0.001
0.001
wh= 29.3202146257
wd= 9.28843091715
0.001
0.001
wh= 29.3334863488
wd= 9.29339956454
0.001
0.001
wh= 29.3515730312
wd= 9.29640035171
0.001
0.001
wh= 29.3561958804
wd= 9.29762643175
0.001
0.001
wh= 29.3674496238
wd= 9.30069715514
0.001
0.001
wh= 29.3704799468
wd= 9.30169361554
0.001
0.001
wh= 29.3857247957
wd= 9.30958800287
0.001
0.001
wh= 29.3983812562
wd= 9.31642195687
0.001
0.001
wh= 29.3977425618
wd= 9.31494132824
0.001
0.001
wh= 29.3976476639
wd= 9.31606222041
0.001
0.001
wh= 29.3875714768
wd= 9.32716335046
0.001
0.001
wh= 29.3862414993
wd= 9.32401128056
0.001
0.001
wh= 29.387793584
wd= 9.33073171192
0.001
0.001
wh= 29.3979606957
wd= 9.34078834873
0.001
0.001
wh= 29.4125434033
wd= 9.34299106653
0.001
0.001
wh= 29.414155896
wd= 9.349425

0.001
0.001
wh= 29.9611359995
wd= 9.61970070252
0.001
0.001
wh= 29.9667358797
wd= 9.62249371545
0.001
0.001
wh= 29.9705524561
wd= 9.62527706176
0.001
0.001
wh= 29.9698363012
wd= 9.62238331811
0.001
0.001
wh= 29.9658303919
wd= 9.6206894532
0.001
0.001
wh= 29.9699889461
wd= 9.62494432843
0.001
0.001
wh= 29.9698712589
wd= 9.62638448329
0.001
0.001
wh= 29.968672259
wd= 9.62498509866
0.001
0.001
wh= 29.9725236191
wd= 9.62835897454
0.001
0.001
wh= 29.9704373556
wd= 9.62522338234
0.001
0.001
wh= 29.9542371084
wd= 9.62330071439
0.001
0.001
wh= 29.9551783879
wd= 9.62511287864
0.001
0.001
wh= 29.9554587728
wd= 9.62494799719
0.001
0.001
wh= 29.9575953998
wd= 9.61974655802
0.001
0.001
wh= 29.9559480007
wd= 9.6187142702
0.001
0.001
wh= 29.9548969028
wd= 9.62368954413
0.001
0.001
wh= 29.9529339905
wd= 9.62709408091
0.001
0.001
wh= 29.9597159407
wd= 9.63117299078
0.001
0.001
wh= 29.96550832
wd= 9.6328544131
0.001
0.001
wh= 29.969387735
wd= 9.63429951615
0.001
0.001
wh= 29.9687341879
wd= 9.63436278565

0.001
wh= 30.8820255841
wd= 9.95681987652
0.001
0.001
wh= 30.8841242761
wd= 9.95697355516
0.001
0.001
wh= 30.8893691574
wd= 9.96018836855
0.001
0.001
wh= 30.8945702406
wd= 9.96294662622
0.001
0.001
wh= 30.9019199737
wd= 9.96700039063
0.001
0.001
wh= 30.9064348018
wd= 9.96894026421
0.001
0.001
wh= 30.9067420557
wd= 9.97579459893
0.001
0.001
wh= 30.9102349413
wd= 9.97933954666
0.001
0.001
wh= 30.9164209728
wd= 9.98272755236
0.001
0.001
wh= 30.921380419
wd= 9.98354930122
0.001
0.001
wh= 30.93081592
wd= 9.98522143746
0.001
0.001
wh= 30.9431016741
wd= 9.98867305041
0.001
0.001
wh= 30.9612803277
wd= 9.99482605199
0.001
0.001
wh= 30.9736974069
wd= 9.99775625583
0.001
0.001
wh= 30.9824500875
wd= 10.0004812471
0.001
0.001
wh= 30.993418112
wd= 10.0021019662
0.001
0.001
wh= 30.9979375809
wd= 10.0054841964
0.001
0.001
wh= 31.0005013714
wd= 10.0049796527
0.001
0.001
wh= 31.003016311
wd= 10.005507355
0.001
0.001
wh= 31.0074435418
wd= 10.007223588
0.001
0.001
wh= 31.0144398151
wd= 10.0113928538
0.001

0.001
0.001
wh= 31.7868554036
wd= 10.3077151069
0.001
0.001
wh= 31.7945485982
wd= 10.3088065751
0.001
0.001
wh= 31.7946478895
wd= 10.3098435283
0.001
0.001
wh= 31.798979846
wd= 10.3108791747
0.001
0.001
wh= 31.8009239374
wd= 10.3126716477
0.001
0.001
wh= 31.8074699077
wd= 10.3143970557
0.001
0.001
wh= 31.8134415969
wd= 10.3192629921
0.001
0.001
wh= 31.8222503875
wd= 10.3249846533
0.001
0.001
wh= 31.8358044668
wd= 10.3270421004
0.001
0.001
wh= 31.8387295396
wd= 10.3286557545
0.001
0.001
wh= 31.8401561465
wd= 10.3282698197
0.001
0.001
wh= 31.8362671112
wd= 10.3280204908
0.001
0.001
wh= 31.8387897265
wd= 10.3282646512
0.001
0.001
wh= 31.8411429548
wd= 10.3280526573
0.001
0.001
wh= 31.8454643812
wd= 10.3310899604
0.001
0.001
wh= 31.8503203285
wd= 10.3316378852
0.001
0.001
wh= 31.855399623
wd= 10.3344861137
0.001
0.001
wh= 31.8513735375
wd= 10.3329289517
0.001
0.001
wh= 31.8502861007
wd= 10.3334746307
0.001
0.001
wh= 31.8539082792
wd= 10.3346972798
0.001
0.001
wh= 31.8578496999
wd= 10.33718

KeyboardInterrupt: 

In [None]:
draw_learning_curve(curve)

In [None]:
draw_loss_curve(losses)

In [None]:
draw_loss_curve(losses[-10:])