In [1]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import matplotlib.pyplot as plt

import sys
sys.path.append('../../module/')

import keras2
from keras2.models import Model
from keras2.layers import concatenate, Dense, Input, Flatten
from keras2.optimizers import Adam
from util import moving_average, dlqr, discretized_system, lqr
import gym2
from rl2.agents import selfDDPGAgent
from rl2.memory import SequentialMemory

Using TensorFlow backend.
Using TensorFlow backend.


In [2]:
# GymのPendulum環境を作成
env = gym2.make("Pendulum-v2")

# 取りうる”打ち手”のアクション数と値の定義
nb_actios = 2
ACT_ID_TO_VALUE = {0: [-1], 1: [+1]}

In [74]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def tau(x):
    return 0.099*sigmoid(x) + 0.001

def actor_net(a_shape, s_shape):
    action_input = Input(shape=(1,)+s_shape)
    x = Flatten()(action_input)
    x = Dense(16, activation="relu")(x)
    x = Dense(16, activation="relu")(x)
    x = Dense(2, activation="self_trigger_output")(x)
    actor = Model(inputs=action_input, outputs=x)
    return actor

def critic_net(a_shape , s_shape):
    action_input = Input(a_shape)
    observation_input = Input(shape=(1,)+s_shape)
    flattened_observation = Flatten()(observation_input)
    x = concatenate([action_input, flattened_observation])
    x = Dense(16, activation="relu")(x)
    x = Dense(16, activation="relu")(x)
    x = Dense(1, activation="linear")(x)
    critic = Model(inputs=[action_input, observation_input], outputs=x)
    return (critic, action_input)

def agent(a_shape, s_shape):
    actor = actor_net(a_shape, s_shape)
    critic,  critic_action_input = critic_net(a_shape, s_shape)
    memory = SequentialMemory(limit = 50000, window_length = 1)
    agent = selfDDPGAgent(
        a_shape[0],
        actor,
        critic,
        critic_action_input,
        memory,
        clip_com = 0.01
    )
    return agent

class input_checker():
    def __init__(self, actor):
        self.actor = actor
        self._construct_output_model()
    
    def _output_setweights(self, NN):
        for i, layer in enumerate(NN.layers):
            layer.set_weights(self.actor.layers[i].get_weights())
        return NN
    
    def _construct_output_model(self, s_shape=(2,)):
        action_input = Input(shape=(1,)+s_shape)
        x = Flatten()(action_input)
        x = Dense(16, activation="relu")(x)
        x = Dense(16, activation="relu")(x)
        model = Model(inputs=action_input, outputs=x)
        model.compile(loss='mean_squared_error',optimizer='adam')
        model = self._output_setweights(model)
        self.output_model = model
        
    
    def check_input(self, x, output_num):
        last_layer_output_vector = self.output_model.predict(np.array([[x]]))
        weights, bias = a.actor.layers[-1].get_weights()
        weights, bias = weights[:,output_num], bias[output_num]
        neuron_input_signal = np.dot(last_layer_output_vector, weights) + bias
        return neuron_input_signal[0]

In [76]:
a = agent((2,),(2,))
a.compile(Adam(lr=0.001, clipnorm=1.), metrics=["mae"])
a.actor.load_weights('./saved_agent/self_trigger_05_1.h5')

checker = input_checker(a.actor)

In [77]:
for i in range(100):
    x = np.random.randn(2,)/2.
    neuron_input = checker.check_input(x, 1)
    print(neuron_input, tau(neuron_input))

25.497437 0.09999999999916395
40.640587 0.1
26.812382 0.09999999999977552
45.38944 0.1
27.183151 0.09999999999984507
83.31393 0.1
26.4567 0.09999999999967965
49.57977 0.1
28.11481 0.09999999999993898
25.859768 0.09999999999941807
34.19526 0.09999999999999987
30.82706 0.09999999999999597
66.396484 0.1
31.906704 0.09999999999999862
25.875343 0.09999999999942706
28.584343 0.09999999999996184
31.920765 0.09999999999999865
26.843521 0.09999999999978243
27.133907 0.09999999999983725
61.171894 0.1
43.185715 0.1
29.833145 0.09999999999998906
71.347496 0.1
28.738419 0.0999999999999673
32.511093 0.09999999999999926
93.17734 0.1
50.792187 0.1
46.051754 0.1
54.388943 0.1
32.334583 0.0999999999999991
25.981083 0.09999999999948454
46.31402 0.1
29.357502 0.0999999999999824
74.20269 0.1
31.049637 0.09999999999999676
28.011948 0.09999999999993237
56.30703 0.1
72.444374 0.1
28.463032 0.09999999999995692
33.438255 0.0999999999999997
34.050743 0.09999999999999985
57.65442 0.1
34.688076 0.09999999999999992