# This is model Testing Code for DDPG in simulation

In [1]:
import tensorflow as tf     
import numpy as np          
import os
from game_environment import Data_Env
import pymysql

In [2]:
def save_test_data(step,score):
    step = str(step)
    score = str(score)
    db = pymysql.connect("localhost", "root", "root", "test")
    cursor = db.cursor()
    insert = "INSERT INTO test(step,score) VALUES('%s','%s')"%(step,score)
    result = cursor.execute(insert)
    db.close()
    print(insert)
    return result

In [3]:
def test(env, start, end, noise_sigma, init_memory, model_dir, experiment_dir,
                         actor, critic, memory,
                         actor_lr, critic_lr, batch_size,
                         gamma, tau=0.01):
    
    #build agent: action_range=(-1., 1.),reward_scale=1.
    agent = DDPG(actor, critic, memory, env.observation_shape, env.action_shape,
                 actor_lr=actor_lr, critic_lr=critic_lr, batch_size=batch_size,
                 gamma=gamma, tau=tau)
    saver = tf.train.Saver(max_to_keep=20)
    #------add save dir--------
    checkpoint_dir = os.path.join(experiment_dir, "checkpoints")
    checkpoint_path = os.path.join(checkpoint_dir, "model")
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    #summary dir------------------
    summary_dir = os.path.join(experiment_dir, "summaries")
    if not os.path.exists(summary_dir):
        os.makedirs(summary_dir)
    summary_writer = tf.summary.FileWriter(summary_dir)
    summary = tf.Summary()
    episode_summary = tf.Summary()
    with tf.Session() as sess:
        latest_checkpoint = model_dir
        if latest_checkpoint:
            print("Loading model checkpoint {}...\n".format(latest_checkpoint))
            saver.restore(sess, latest_checkpoint)
            agent.sess = sess
        else:
            print('Building new model...')
            agent.initialize(sess)
        print('Generating ',init_memory,' memory... Please reset game!')
        for i in range(init_memory):
            wechat_score = 0
            jump_step = 0
            obs0 = env.reset()
            while 1:
                #get action
                feed_dict = {agent.obs0: [obs0]}
                action = sess.run(agent.actor_tf, feed_dict=feed_dict)
                print("action:%f"%action)
                action = action.flatten()
                obs1, reward, done, score = env.step(action)
                print(score)
                jump_step += 1
                if done:
                    save_test_data(jump_step,wechat_score)
                    break
                else:
                    wechat_score = score
                    obs0 =obs1

In [4]:
from model import Actor, Critic
from memory import Memory
from ddpg import DDPG
import cv2
from game_environment import Jump_Env as wechat_env

In [5]:
actor_lr = 1e-4
critic_lr = 1e-3
tau = 0.01
nb_actions = 1
batch_size = 128
limit=int(5000)
noise_sigma = 0.1
gamma = 0
init_memory = 100
# episodes = 10000
model_dir = "./experiments_9_1/checkpoints/model11700"#None
start = 0
end = 2000
experiment_dir = os.path.abspath("./experiments_9_1/")
number_templet = [cv2.imread('templet/{}.jpg'.format(i)) for i in range(10)]
restart_templet = cv2.imread('templet/again.jpg')
env = wechat_env(number_templet=number_templet, restart_templet=restart_templet)

In [6]:
actor = Actor(nb_actions, layer_norm=True)
critic = Critic(layer_norm=True)
memory = Memory(limit, action_shape=env.action_shape, observation_shape=env.observation_shape)

In [7]:
test(env=env, start=start, end=end, noise_sigma=noise_sigma, init_memory=init_memory, model_dir=model_dir, experiment_dir=experiment_dir, actor=actor, critic=critic, memory=memory, 
              actor_lr=actor_lr, critic_lr=critic_lr, batch_size=batch_size, gamma=gamma, tau=tau)

Actor
Critic
Critic
Actor
Critic
setting up target updates ...
len 16 = 16
{ target_actor/Conv/weights:0 } <- { actor/Conv/weights:0 }
{ target_actor/Conv/biases:0 } <- { actor/Conv/biases:0 }
{ target_actor/Conv_1/weights:0 } <- { actor/Conv_1/weights:0 }
{ target_actor/Conv_1/biases:0 } <- { actor/Conv_1/biases:0 }
{ target_actor/Conv_2/weights:0 } <- { actor/Conv_2/weights:0 }
{ target_actor/Conv_2/biases:0 } <- { actor/Conv_2/biases:0 }
{ target_actor/dense/kernel:0 } <- { actor/dense/kernel:0 }
{ target_actor/dense/bias:0 } <- { actor/dense/bias:0 }
{ target_actor/LayerNorm/beta:0 } <- { actor/LayerNorm/beta:0 }
{ target_actor/LayerNorm/gamma:0 } <- { actor/LayerNorm/gamma:0 }
{ target_actor/dense_1/kernel:0 } <- { actor/dense_1/kernel:0 }
{ target_actor/dense_1/bias:0 } <- { actor/dense_1/bias:0 }
{ target_actor/LayerNorm_1/beta:0 } <- { actor/LayerNorm_1/beta:0 }
{ target_actor/LayerNorm_1/gamma:0 } <- { actor/LayerNorm_1/gamma:0 }
{ target_actor/dense_2/kernel:0 } <- { actor/de

  warn("The default mode, 'constant', will be changed to 'reflect' in "


action:-0.256941
645
1
action:-0.335007
598
2
action:-0.519102
488
4
action:-0.025951
784
8
action:0.104089
862
9
action:-0.508083
495
11
action:-0.234171
659
12
action:-0.281956
630
14
action:-0.077830
753
15
action:-0.449055
530
26
action:-0.426147
544
27
action:-0.316719
609
28
action:-0.141733
714
None
INSERT INTO test(step,score) VALUES('13','28')
action:-0.140178
715
1
action:-0.124566
725
2
action:-0.740203
355
3
action:-0.741933
354
4
action:-0.279428
632
25
action:-0.261508
643
26
action:-0.388294
567
27
action:-0.596180
442
29
action:-0.367863
579
30
action:-0.365330
580
32
action:-0.948751
230
33
action:-0.208914
674
34
action:-0.252993
648
35
action:-0.422368
546
36
action:-0.300484
619
37
action:0.133888
880
9
action:-0.195994
682
40
action:-0.589942
446
41
action:-0.561007
463
4
action:-0.538756
476
6
action:-0.058273
765
64
action:-0.010796
793
6
action:-0.077828
753
None
INSERT INTO test(step,score) VALUES('23','6')
action:-0.124896
725
0
action:-0.046145
772
0
action:-

None
INSERT INTO test(step,score) VALUES('5','4')
action:-0.161700
702
0
action:-0.771510
337
0
action:-0.206605
676
4
action:-0.355637
586
None
INSERT INTO test(step,score) VALUES('4','4')
action:-0.180511
691
1
action:0.088805
853
0
action:-0.644360
413
0
action:-0.276473
634
None
INSERT INTO test(step,score) VALUES('4','0')
action:-0.123504
725
0
action:-0.250407
649
0
action:-0.522870
486
None
INSERT INTO test(step,score) VALUES('3','0')
action:-0.123504
725
0
action:-0.318392
608
0
action:-0.198277
681
None
INSERT INTO test(step,score) VALUES('3','0')
action:-0.152982
708
0
action:-0.117182
729
0
action:-0.145835
712
4
action:-0.046616
772
0
action:-0.464615
521
1
action:-0.527015
483
None
INSERT INTO test(step,score) VALUES('6','1')
action:-0.254784
647
1
action:-0.157410
705
0
action:-0.001029
799
0
action:-0.229646
662
1
action:-0.365217
580
None
INSERT INTO test(step,score) VALUES('5','1')
action:-0.161700
702
0
action:-0.377564
573
0
action:-0.161758
702
4
action:-0.410675
55

0
action:-0.256236
646
0
action:-0.154204
707
0
action:-0.197422
681
6
action:-0.323866
605
7
action:-0.233087
660
None
INSERT INTO test(step,score) VALUES('7','7')
action:-0.159292
704
0
action:-0.264938
641
0
action:-0.781607
331
4
action:-0.375303
574
0
action:-0.324061
605
6
action:-0.354766
587
None
INSERT INTO test(step,score) VALUES('6','6')
action:-0.124896
725
0
action:-0.765351
340
0
action:-0.246875
651
4
action:-0.558642
464
6
action:-0.590592
445
17
action:-0.538950
476
1
action:0.102940
861
19
action:-0.383288
570
0
action:-0.254189
647
1
action:-0.310554
613
0
action:-0.370139
577
4
action:-0.053752
767
0
action:-0.315214
610
None
INSERT INTO test(step,score) VALUES('13','0')
action:-0.169298
698
0
action:-0.388183
567
0
action:-0.056573
766
4
action:-0.385452
568
6
action:-0.307151
615
None
INSERT INTO test(step,score) VALUES('5','6')
action:-0.169298
698
1
action:-0.144815
713
None
INSERT INTO test(step,score) VALUES('2','1')
action:-0.159292
704
0
action:-0.304587
617