In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf      # Deep Learning library
import numpy as np           # Handle matrices
import os
# import tensorflow.contrib as tc

from skimage import transform # Help us to preprocess the frames
from skimage.color import rgb2gray # Help us to gray our frames

from collections import deque# Ordered collection with ends

  from ._conv import register_converters as _register_converters


In [2]:
from tools import state

DDPG

在现有模型基础上继续训练，不导入任何数据，先由模型生成init_memory episodes数据后开始训练

In [3]:
#train while playing game, we do not need any data
def train_jump(env, episodes, init_memory, experiment_dir,
                         actor, critic, memory,
                         actor_lr, critic_lr, batch_size,
                         gamma, tau=0.01):
    
    #build agent: action_range=(-1., 1.),reward_scale=1.
    agent = DDPG(actor, critic, memory, env.observation_shape, env.action_shape,
                 actor_lr=actor_lr, critic_lr=critic_lr, batch_size=batch_size,
                 gamma=gamma, tau=tau)

    #saver
    saver = tf.train.Saver()
    #------add save dir--------
    checkpoint_dir = os.path.join(experiment_dir, "checkpoints")
    checkpoint_path = os.path.join(checkpoint_dir, "model")
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    #summary dir
    summary_dir = os.path.join(experiment_dir, "summaries")
    if not os.path.exists(summary_dir):#如果路径不存在创建路径
        os.makedirs(summary_dir)
    summary_writer = tf.summary.FileWriter(summary_dir)
    summary = tf.Summary()
    episode_summary = tf.Summary()
    #----------------------------
    with tf.Session() as sess:
        
        #load model if we have
        latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
        if latest_checkpoint:
            print("Loading model checkpoint {}...\n".format(latest_checkpoint))
            saver.restore(sess, latest_checkpoint)
            agent.sess = sess
        else:
        # Prepare everything.
            print('Building new model...')
            agent.initialize(sess)
        #         sess.graph.finalize()
        
        #------------------------
        #generate initial memory
        print('Generating ',init_memory,' memory... Please reset game!')
        obs0 = env.reset()
        for i in range(init_memory):
    #             set game
                print('new game')
                obs0 = env.reset()
                while 1:
                    #get action
                    feed_dict = {agent.obs0: [obs0]}
                    action = sess.run(agent.actor_tf, feed_dict=feed_dict)
                    action = action.flatten()

                    #do action
                    obs1, reward, done = env.step(action)

                    #store transition
                    agent.store_transition(obs0, action, reward, obs1, done)

                    #judge death
                    if done:
                        break
                    else:
                        obs0 =obs1
        #---------------------------------------
                
        print('Training...')
        for episode in range(episodes):
            #set game
#             print('new game')
            obs0 = env.reset()
            episode_reward = 0
            episode_step = 0
            
            while 1:
                
                # Train.
                cl, al = agent.train()
                global_step = sess.run(agent.global_step)
                #record loss
                summary.value.add(simple_value=cl, tag="critic_loss")
                summary.value.add(simple_value=al, tag="actor_loss")
                summary_writer.add_summary(summary, global_step)

                #             #record graph
                #             summary_writer.add_graph(sess.graph)

                #flush
                summary_writer.flush()

                #update model
                agent.update_target_net()

                #-----------------------------------
                #get action
                feed_dict = {agent.obs0: [obs0]}
                action = sess.run(agent.actor_tf, feed_dict=feed_dict)
                action = action.flatten()

                #do action
                obs1, reward, done = env.step(action)
                episode_reward += reward 
                episode_step += 1
                
                #store transition
                agent.store_transition(obs0, action, reward, obs1, done)
                obs0 =obs1
                
                if done:
                    episode_summary.value.add(simple_value=episode_reward, tag="episode_reward")
                    episode_summary.value.add(simple_value=episode_step, tag="episode_step")
                    summary_writer.add_summary(episode_summary, episode)
                    summary_writer.flush()
#                     print('dead at',episode_step)
                    break
                
                #----------------------------------------------------------
        
                

            #save model every 100 episodes
            if episode%100 == 0:
                saver.save(tf.get_default_session(), checkpoint_path)

    print('Training completed!')



In [4]:
from models import Actor, Critic

In [5]:
from memory import Memory

In [6]:
from ddpg import DDPG

In [7]:
#hyper-parameters of training on data
actor_lr = 1e-4
critic_lr = 1e-3
batch_size = 64
gamma = 0.99
tau = 0.01
nb_actions = 1
limit=int(5000)

In [8]:
#导入模型路径，也是tensorboard记录路径，会自动创建一个叫summary的文件夹记录loss和episode step&reward
experiment_dir = os.path.abspath("/ddpg-model/experiments/")

In [9]:
init_memory = 100#开始训练前先由模型玩这么多局游戏来产生数据放入memory，100我觉得差不多
episodes = 10000#开始训练后总共玩多少局游戏，10000可能得跑半个月，但10000我觉得应该够训练到比较好的水平了

In [10]:
#创建游戏环境
import cv2
from jump_env import Jump_Env
number_templet = [cv2.imread('templet/{}.jpg'.format(i)) for i in range(10)]
restart_templet = cv2.imread('templet/again.jpg')
env = Jump_Env(number_templet=number_templet, restart_templet=restart_templet)

In [11]:
actor = Actor(nb_actions, layer_norm=True)
critic = Critic(layer_norm=True)
memory = Memory(limit, action_shape=env.action_shape, observation_shape=env.observation_shape)

In [12]:
train_jump(env=env, episodes=episodes, init_memory=init_memory, experiment_dir=experiment_dir, actor=actor, critic=critic, memory=memory, 
              actor_lr=actor_lr, critic_lr=critic_lr, batch_size=batch_size, gamma=gamma, tau=tau)

setting up target updates ...
len 16 = 16
{ target_actor/Conv/weights:0 } <- { actor/Conv/weights:0 }
{ target_actor/Conv/biases:0 } <- { actor/Conv/biases:0 }
{ target_actor/Conv_1/weights:0 } <- { actor/Conv_1/weights:0 }
{ target_actor/Conv_1/biases:0 } <- { actor/Conv_1/biases:0 }
{ target_actor/Conv_2/weights:0 } <- { actor/Conv_2/weights:0 }
{ target_actor/Conv_2/biases:0 } <- { actor/Conv_2/biases:0 }
{ target_actor/dense/kernel:0 } <- { actor/dense/kernel:0 }
{ target_actor/dense/bias:0 } <- { actor/dense/bias:0 }
{ target_actor/LayerNorm/beta:0 } <- { actor/LayerNorm/beta:0 }
{ target_actor/LayerNorm/gamma:0 } <- { actor/LayerNorm/gamma:0 }
{ target_actor/dense_1/kernel:0 } <- { actor/dense_1/kernel:0 }
{ target_actor/dense_1/bias:0 } <- { actor/dense_1/bias:0 }
{ target_actor/LayerNorm_1/beta:0 } <- { actor/LayerNorm_1/beta:0 }
{ target_actor/LayerNorm_1/gamma:0 } <- { actor/LayerNorm_1/gamma:0 }
{ target_actor/dense_2/kernel:0 } <- { actor/dense_2/kernel:0 }
{ target_actor/d

InternalError: Failed to create session.

In [22]:
# env = Data_Env()

In [24]:
# actor = Actor(nb_actions, layer_norm=True)

In [25]:
# critic = Critic(layer_norm=True)

In [26]:
# memory = Memory(limit, action_shape=env.action_shape, observation_shape=env.observation_shape)

In [27]:
# train_on_data(env=env, steps=steps, data=input_data, experiment_dir=experiment_dir, actor=actor, critic=critic, memory=memory, 
#               actor_lr=actor_lr, critic_lr=critic_lr, batch_size=batch_size, gamma=gamma, tau=tau)

In [28]:
# train_on_data_online(env=env, steps=steps, data=input_data, experiment_dir=experiment_dir, actor=actor, critic=critic, memory=memory, 
#               actor_lr=actor_lr, critic_lr=critic_lr, batch_size=batch_size, gamma=gamma, tau=tau)

In [13]:
episode_summary = tf.Summary()

In [14]:
with tf.Session() as sess:
    print("123")

InternalError: Failed to create session.