In [5]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from keras.models import Model
from keras.layers import concatenate, Dense, Input, Flatten
from keras.optimizers import Adam
import gym
from rl.agents import eventDDPGAgent
from rl.memory import SequentialMemory

Using TensorFlow backend.


In [6]:
# GymのPendulum環境を作成
env = gym.make("Pendulum-v0")

# 取りうる”打ち手”のアクション数と値の定義
nb_actions = 2
ACT_ID_TO_VALUE = {0: [-1], 1: [+1]}

print("Action Space: %s" % env.action_space)
#action  dim = 1
#critic dim = 3 with ??
print( env.observation_space.shape[0])

Action Space: Box(1,)
3


In [7]:
def actor_net(a_shape, s_shape):
    action_input = Input(shape=(1,)+s_shape)
    x = Flatten()(action_input)
    x = Dense(16, activation="relu")(x)
    x = Dense(16, activation="relu")(x)
    x = Dense(3, activation="tanh")(x)
    #x = Dense(a_shape[0], activation="linear")(x)
    print(x)
    actor = Model(inputs=action_input, outputs=x)
    return actor

In [8]:
def critic_net(a_shape, s_shape):
    action_input = Input(a_shape)
    observation_input = Input(shape=(1,)+s_shape)
    flattened_observation = Flatten()(observation_input)
    x = concatenate([action_input, flattened_observation])
    x = Dense(32, activation="relu")(x)
    x = Dense(32, activation="relu")(x)
    x = Dense(1, activation="linear")(x)
    critic = Model(inputs=[action_input, observation_input], outputs=x)
    return (critic, action_input)


In [11]:
def agent(a_shape, s_shape):
    actor = actor_net(a_shape, s_shape)
    critic,  critic_action_input = critic_net(a_shape, s_shape)
    memory = SequentialMemory(limit = 50000, window_length = 1)
    agent = eventDDPGAgent(
        a_shape[0],
        actor,
        critic,
        critic_action_input,
        memory
    )
    return agent

In [12]:
agent = agent(env.action_space.shape, env.observation_space.shape)
print(env.action_space.shape, env.observation_space.shape)
agent.compile(Adam(lr=0.001, clipnorm=1.), metrics=["mae"])
agent.fit(env, nb_steps=200, visualize=True, verbose=1, nb_max_episode_steps=200)
agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200)

Tensor("dense_9/Tanh:0", shape=(?, 3), dtype=float32)
(1,) (3,)
Training for 200 steps ...
Interval 1 (0 steps performed)
-------------------
step =  0
<class 'numpy.ndarray'> [-0.11109197]
    1/10000 [..............................] - ETA: 41:31 - reward: -0.1607-------------------
step =  1
-0.062671416 [ 0.00227584 -0.02956191]
action =  <class 'numpy.ndarray'> [-0.06267142]
-------------------
step =  2
-0.012063989 [-0.04571781 -0.03165216]
action =  <class 'numpy.ndarray'> [-0.06267142]
-------------------
step =  3
0.028882788 [-0.08377926 -0.02591506]
action =  <class 'numpy.ndarray'> [-0.06267142]
-------------------
step =  4
0.068344444 [-0.11744134 -0.02069923]
action =  <class 'numpy.ndarray'> [-0.06267142]
-------------------
step =  5
0.09834901 [-0.1439236  -0.01433904]
action =  <class 'numpy.ndarray'> [-0.06267142]
-------------------
step =  6
0.10578304 [-0.15327038 -0.03002047]
action =  <class 'numpy.ndarray'> [-0.06267142]
-------------------
step =  7
0.1076470

-------------------
step =  66
-0.31949508 [ 0.07386719 -0.3896931 ]
action =  <class 'numpy.ndarray'> [-0.31949508]
-------------------
step =  67
-0.36695164 [ 0.23814882 -0.5040139 ]
action =  <class 'numpy.ndarray'> [-0.36695164]
-------------------
step =  68
-0.4136989 [ 0.37111545 -0.6083771 ]
action =  <class 'numpy.ndarray'> [-0.4136989]
   69/10000 [..............................] - ETA: 1:36 - reward: -4.3040-------------------
step =  69
-0.43447864 [ 0.48658362 -0.6763323 ]
action =  <class 'numpy.ndarray'> [-0.43447864]
-------------------
step =  70
-0.42057112 [ 0.5460127 -0.7029314]
action =  <class 'numpy.ndarray'> [-0.42057112]
-------------------
step =  71
-0.38231808 [ 0.55813134 -0.7105847 ]
action =  <class 'numpy.ndarray'> [-0.38231808]
-------------------
step =  72
-0.34463668 [ 0.4897293 -0.6972288]
action =  <class 'numpy.ndarray'> [-0.34463668]
-------------------
step =  73
-0.2862764 [ 0.3955774 -0.6414335]
action =  <class 'numpy.ndarray'> [-0.2862764]


  195/10000 [..............................] - ETA: 1:29 - reward: -5.2125-------------------
step =  195
-0.23581226 [ 0.22330241 -0.5640472 ]
action =  <class 'numpy.ndarray'> [-0.23581226]
-------------------
step =  196
-0.21668981 [ 0.1158529 -0.5191988]
action =  <class 'numpy.ndarray'> [-0.21668981]
-------------------
step =  197
-0.19425476 [ 0.01291945 -0.47383937]
action =  <class 'numpy.ndarray'> [-0.19425476]
-------------------
step =  198
-0.16266358 [-0.08316185 -0.44334722]
action =  <class 'numpy.ndarray'> [-0.16266358]
-------------------
step =  199
-0.12511688 [-0.16046281 -0.4023932 ]
action =  <class 'numpy.ndarray'> [-0.12511688]
-0.079676546 [-0.21463957 -0.3533491 ]
action =  <class 'numpy.ndarray'> [-0.07967655]
done, took 1.830 seconds
Testing for 5 episodes ...
-0.015249409 [ 0.22416297 -0.17907888]
action =  <class 'numpy.ndarray'> [-0.01524941]
-0.13625225 [ 0.19313054 -0.07388971]
action =  <class 'numpy.ndarray'> [-0.13625225]
-0.27703902 [ 0.19177204 -

-0.17373244 [ 0.47126728 -0.19985126]
action =  <class 'numpy.ndarray'> [-0.17373244]
-0.17750736 [ 0.4722209  -0.23655996]
action =  <class 'numpy.ndarray'> [-0.17750736]
-0.1768802 [ 0.4738347  -0.26580665]
action =  <class 'numpy.ndarray'> [-0.1768802]
-0.17170072 [ 0.47508007 -0.28645363]
action =  <class 'numpy.ndarray'> [-0.17170072]
-0.1621185 [ 0.475517   -0.29773843]
action =  <class 'numpy.ndarray'> [-0.1621185]
-0.148577 [ 0.4753956  -0.29928565]
action =  <class 'numpy.ndarray'> [-0.148577]
-0.13176616 [ 0.47560248 -0.2910952 ]
action =  <class 'numpy.ndarray'> [-0.13176616]
-0.11254429 [ 0.47746724 -0.27351823]
action =  <class 'numpy.ndarray'> [-0.11254429]
-0.08683423 [ 0.47863728 -0.2486806 ]
action =  <class 'numpy.ndarray'> [-0.08683423]
-0.060273502 [ 0.44914672 -0.23315535]
action =  <class 'numpy.ndarray'> [-0.0602735]
-0.036440052 [ 0.4180248 -0.2150988]
action =  <class 'numpy.ndarray'> [-0.03644005]
-0.02467968 [ 0.34483343 -0.19213445]
action =  <class 'numpy.n

-0.042934168 [ 0.41192114 -0.07789791]
action =  <class 'numpy.ndarray'> [-0.04293417]
-0.10883488 [ 0.4565409  -0.12428974]
action =  <class 'numpy.ndarray'> [-0.10883488]
-0.13558486 [ 0.486489   -0.15460509]
action =  <class 'numpy.ndarray'> [-0.13558486]
-0.1416188 [ 0.49950367 -0.1773932 ]
action =  <class 'numpy.ndarray'> [-0.1416188]
-0.14347495 [ 0.5085695  -0.19607165]
action =  <class 'numpy.ndarray'> [-0.14347495]
-0.14100794 [ 0.51313525 -0.20984584]
action =  <class 'numpy.ndarray'> [-0.14100794]
-0.13426158 [ 0.51294136 -0.21813887]
action =  <class 'numpy.ndarray'> [-0.13426158]
-0.12346868 [ 0.5080328  -0.22061825]
action =  <class 'numpy.ndarray'> [-0.12346868]
-0.11014509 [ 0.49854752 -0.21904792]
action =  <class 'numpy.ndarray'> [-0.11014509]
-0.09731894 [ 0.48464066 -0.21769385]
action =  <class 'numpy.ndarray'> [-0.09731894]
-0.08296151 [ 0.46765637 -0.21246666]
action =  <class 'numpy.ndarray'> [-0.08296151]
-0.06648741 [ 0.44722492 -0.20359126]
action =  <class 

-0.8215213 [ 0.74300855 -0.29059577]
action =  <class 'numpy.ndarray'> [-0.8215213]
-0.8012388 [ 0.75838083 -0.2730475 ]
action =  <class 'numpy.ndarray'> [-0.8012388]
-0.76073265 [ 0.7541655  -0.24912737]
action =  <class 'numpy.ndarray'> [-0.76073265]
-0.6929515 [ 0.7368352  -0.20763732]
action =  <class 'numpy.ndarray'> [-0.6929515]
-0.5967595 [ 0.70396805 -0.1574575 ]
action =  <class 'numpy.ndarray'> [-0.5967595]
-0.47790593 [ 0.651306   -0.11047952]
action =  <class 'numpy.ndarray'> [-0.47790593]
-0.34283614 [ 0.58162737 -0.06523507]
action =  <class 'numpy.ndarray'> [-0.34283614]
-0.19980639 [ 0.4996586 -0.0198904]
action =  <class 'numpy.ndarray'> [-0.19980639]
-0.043958202 [0.4255472  0.04508716]
action =  <class 'numpy.ndarray'> [-0.0439582]
0.10775174 [0.3243661  0.09859549]
action =  <class 'numpy.ndarray'> [0.10775174]
0.087510906 [0.19482855 0.05938957]
action =  <class 'numpy.ndarray'> [0.08751091]
0.045354355 [ 0.00539854 -0.02901653]
action =  <class 'numpy.ndarray'> [

-0.13426442 [ 0.40545312 -0.44200897]
action =  <class 'numpy.ndarray'> [-0.13426442]
-0.10588942 [ 0.3357549 -0.397087 ]
action =  <class 'numpy.ndarray'> [-0.10588942]
-0.06659452 [ 0.28226236 -0.3310431 ]
action =  <class 'numpy.ndarray'> [-0.06659452]
-0.033249214 [ 0.24155657 -0.27441317]
action =  <class 'numpy.ndarray'> [-0.03324921]
-0.033320893 [ 0.18211652 -0.24373399]
action =  <class 'numpy.ndarray'> [-0.03332089]
-0.011087646 [ 0.20179643 -0.25033972]
action =  <class 'numpy.ndarray'> [-0.01108765]
0.070828125 [ 0.23154983 -0.15644316]
action =  <class 'numpy.ndarray'> [0.07082812]
-0.043714717 [ 0.29573387 -0.00851548]
action =  <class 'numpy.ndarray'> [-0.04371472]
-0.24982408 [0.27712885 0.03937816]
action =  <class 'numpy.ndarray'> [-0.24982408]
-0.41386753 [ 0.3307474  -0.03437861]
action =  <class 'numpy.ndarray'> [-0.41386753]
-0.55102247 [ 0.38572398 -0.14650814]
action =  <class 'numpy.ndarray'> [-0.55102247]
-0.64366734 [ 0.48339373 -0.17335273]
action =  <class 

TypeError: '<=' not supported between instances of 'NoneType' and 'float'

In [14]:
a = np.array([1,2]).shape
b = np.array([1,2,3]).shape
c = np.array([1,2,3,4]).shape
print(a + b + c)

(2, 3, 4)


In [11]:
a = np.array(2)
print(a.shape)

()
