In [15]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import module.keras
from keras.models import Model
from keras.layers import concatenate, Dense, Input, Flatten
from keras.optimizers import Adam
import gym
from rl.agents import eventDDPGAgent
from rl.memory import SequentialMemory

Using TensorFlow backend.


In [16]:
# GymのPendulum環境を作成
env = gym.make("Pendulum-v0")

# 取りうる”打ち手”のアクション数と値の定義
nb_actions = 2
ACT_ID_TO_VALUE = {0: [-1], 1: [+1]}

print("Action Space: %s" % env.action_space)
#action  dim = 1
#critic dim = 3 with ??
print( env.observation_space.shape[0])

Action Space: Box(1,)
3


In [17]:
def actor_net(a_shape, s_shape):
    action_input = Input(shape=(1,)+s_shape)
    x = Flatten()(action_input)
    x = Dense(16, activation="relu")(x)
    x = Dense(16, activation="relu")(x)
    x = Dense(3, activation="tanh")(x)
    #x = Dense(a_shape[0], activation="linear")(x)
    print(x)
    actor = Model(inputs=action_input, outputs=x)
    return actor

In [18]:
def critic_net(a_shape, s_shape):
    action_input = Input(a_shape)
    observation_input = Input(shape=(1,)+s_shape)
    flattened_observation = Flatten()(observation_input)
    x = concatenate([action_input, flattened_observation])
    x = Dense(32, activation="relu")(x)
    x = Dense(32, activation="relu")(x)
    x = Dense(1, activation="linear")(x)
    critic = Model(inputs=[action_input, observation_input], outputs=x)
    return (critic, action_input)


In [19]:
def agent(a_shape, s_shape):
    actor = actor_net(a_shape, s_shape)
    critic,  critic_action_input = critic_net(a_shape, s_shape)
    memory = SequentialMemory(limit = 50000, window_length = 1)
    agent = eventDDPGAgent(
        a_shape[0],
        actor,
        critic,
        critic_action_input,
        memory
    )
    return agent

In [20]:
agent = agent(env.action_space.shape, env.observation_space.shape)
print(env.action_space.shape, env.observation_space.shape)
agent.compile(Adam(lr=0.001, clipnorm=1.), metrics=["mae"])
agent.fit(env, nb_steps=10, visualize=False, verbose=1, nb_max_episode_steps=200)
agent.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=200)

Tensor("dense_21/Tanh:0", shape=(?, 3), dtype=float32)
(1,) (3,)
Training for 10 steps ...
Interval 1 (0 steps performed)
-------------------
step =  0
<class 'numpy.ndarray'> [-0.17931418]
    1/10000 [..............................] - ETA: 1:12:15 - reward: -3.7088-------------------
step =  1
-0.094371684 [-0.03114451 -0.03381133]
action =  <class 'numpy.ndarray'> [-0.09437168]
-------------------
step =  2
-0.07497559 [-0.09999412 -0.06890906]
action =  <class 'numpy.ndarray'> [-0.09437168]
-------------------
step =  3
-0.048379198 [-0.12123152 -0.05374011]
action =  <class 'numpy.ndarray'> [-0.09437168]
-------------------
step =  4
-0.008305179 [-0.14234318 -0.02842122]
action =  <class 'numpy.ndarray'> [-0.09437168]
-------------------
step =  5
0.036968943 [-0.16094398  0.00161956]
action =  <class 'numpy.ndarray'> [-0.09437168]
-------------------
step =  6
0.085923634 [-0.19547744  0.05659249]
action =  <class 'numpy.ndarray'> [-0.09437168]
-------------------
step =  7
0.13

TypeError: '<=' not supported between instances of 'NoneType' and 'float'

In [7]:
a = np.array([1,2]).shape
b = np.array([1,2,3]).shape
c = np.array([1,2,3,4]).shape
print(a + b + c)

(2, 3, 4)


In [8]:
a = np.array(2)
print(a.shape)

()
