In [31]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

from processing import ObsProcessor
from core.algorithm.DDPG import DDPGAgent
from saida_gym.starcraft.avoidObservers import AvoidObservers

In [2]:
frames_per_step = 4
no_gui = False

env = AvoidObservers(action_type=0, verbose=0, frames_per_step=frames_per_step, no_gui=no_gui, \
                     bot_runner=r"..\SAIDA_RL\cpp\Release\SAIDA\SAIDA.exe")

Initialize...
Shared Memory create 
SAIDA_AO9968 Shared memory found.


In [3]:
env.reset()
s = env.state

In [5]:
processor = ObsProcessor()
map_of_scurge, map_of_observer = processor.process_observation(s)

In [6]:
print(map_of_scurge.shape)
print(map_of_observer.shape)
print(processor.last_action.shape)

(64, 64, 1)
(21, 21, 1)
(64, 64)


In [None]:
agent = DDPGAgent(actor, critic, ACTION_SIZE, memory, critic_action_input=action_input,
                  train_interval=dict_args[OPS.TARGET_NETWORK_UPDATE()], processor=ObsProcessor(), 
                  batch_size=BATCH_SIZE, tau_for_actor=1e-3, tau_for_critic=1e-2, policy=policy, test_policy=test_policy)

---

In [23]:
from keras.layers import Dense, Input, Concatenate, Conv2D, Flatten, TimeDistributed, LSTM
from keras import regularizers
from keras.models import Model

In [15]:
TIME_WINDOW = 2
STATE1_SIZE = (TIME_WINDOW, 64, 64, 1)  # If you never set it, then it will be "channels_last".
STATE2_SIZE = (TIME_WINDOW, 20, 20, 1)
ACTION_SIZE = 1
CRITIC_L2_REG = 0.01

In [16]:
observation_input = [Input(shape=STATE1_SIZE, name='scurge_observation_input'), \
                     Input(shape=STATE2_SIZE, name='observer_observation_input')]
action_input = Input(shape=(ACTION_SIZE, ), name='action_input')

In [17]:
observation_input

[<tf.Tensor 'scurge_observation_input_1:0' shape=(?, 2, 64, 64, 1) dtype=float32>,
 <tf.Tensor 'observer_observation_input_1:0' shape=(?, 2, 20, 20, 1) dtype=float32>]

- `(n_samples, time_window, shape[0], shape[1], shape[2])`

In [28]:
action_input

<tf.Tensor 'action_input_1:0' shape=(?, 1) dtype=float32>

In [27]:
SHARED_CONV2D_1_1 = TimeDistributed(Conv2D(10, kernel_size=5, strides=1, activation='relu', padding='SAME', kernel_regularizer=regularizers.l2(CRITIC_L2_REG)))
SHARED_CONV2D_1_2 = TimeDistributed(Conv2D(5, kernel_size=3, strides=1, activation='relu', padding='SAME', kernel_regularizer=regularizers.l2(CRITIC_L2_REG)))
SHARED_FLATTEN_1 = TimeDistributed(Flatten())

SHARED_CONV2D_2_1 = TimeDistributed(Conv2D(10, kernel_size=4, strides=1, activation='relu', padding='SAME', kernel_regularizer=regularizers.l2(CRITIC_L2_REG)))
SHARED_CONV2D_2_2 = TimeDistributed(Conv2D(5, kernel_size=3, strides=1, activation='relu', padding='SAME', kernel_regularizer=regularizers.l2(CRITIC_L2_REG)))
SHARED_FLATTEN_2 = TimeDistributed(Flatten())

SHARED_CONCATENATED = Concatenate()

In [26]:
def build_critic_model():
    oh1 = SHARED_CONV2D_1_1(observation_input[0])
    oh1 = SHARED_CONV2D_1_2(oh1)
    oh1 = SHARED_FLATTEN_1(oh1)

    oh2 = SHARED_CONV2D_2_1(observation_input[1])
    oh2 = SHARED_CONV2D_2_2(oh2)
    oh2 = SHARED_FLATTEN_2(oh2)

    oh = SHARED_CONCATENATED([oh1, oh2])
    oh = LSTM(512)(oh)

    ah = Dense(30, activation='relu', kernel_regularizer=regularizers.l2(CRITIC_L2_REG))(action_input)
    ah = Dense(20, activation='relu', kernel_regularizer=regularizers.l2(CRITIC_L2_REG))(ah)

    h = Concatenate()([oh, ah])
    h = Dense(30, activation='relu', kernel_regularizer=regularizers.l2(CRITIC_L2_REG))(h)
    h = Dense(20, activation='relu', kernel_regularizer=regularizers.l2(CRITIC_L2_REG))(h)
    output = Dense(1, activation='linear', kernel_regularizer=regularizers.l2(CRITIC_L2_REG))(h)

    model = Model(inputs=[observation_input[0], observation_input[1], action_input], outputs=[output])
    model.summary()

    return model

In [25]:
critic = build_critic_model()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
scurge_observation_input (Input (None, 2, 64, 64, 1) 0                                            
__________________________________________________________________________________________________
observer_observation_input (Inp (None, 2, 20, 20, 1) 0                                            
__________________________________________________________________________________________________
time_distributed_1 (TimeDistrib (None, 2, 64, 64, 10 260         scurge_observation_input[0][0]   
__________________________________________________________________________________________________
time_distributed_4 (TimeDistrib (None, 2, 20, 20, 10 170         observer_observation_input[0][0] 
__________________________________________________________________________________________________
time_distr

In [29]:
def build_actor_model():
    oh1 = SHARED_CONV2D_1_1(observation_input[0])
    oh1 = SHARED_CONV2D_1_2(oh1)
    oh1 = SHARED_FLATTEN_1(oh1)

    oh2 = SHARED_CONV2D_2_1(observation_input[1])
    oh2 = SHARED_CONV2D_2_2(oh2)
    oh2 = SHARED_FLATTEN_2(oh2)

    oh = SHARED_CONCATENATED([oh1, oh2])

    h = TimeDistributed(Dense(30, activation='relu'))(oh)
    h = LSTM(512)(h)
    h = Dense(20, activation='relu')(h)

    output = Dense(ACTION_SIZE, activation='sigmoid')(h)
    model = Model(inputs=observation_input, outputs=[output])
    model.summary()

    return model

In [30]:
actor = build_actor_model()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
scurge_observation_input (Input (None, 2, 64, 64, 1) 0                                            
__________________________________________________________________________________________________
observer_observation_input (Inp (None, 2, 20, 20, 1) 0                                            
__________________________________________________________________________________________________
time_distributed_7 (TimeDistrib (None, 2, 64, 64, 10 260         scurge_observation_input[0][0]   
__________________________________________________________________________________________________
time_distributed_10 (TimeDistri (None, 2, 20, 20, 10 170         observer_observation_input[0][0] 
__________________________________________________________________________________________________
time_distr