## Install the dependencies

In [83]:
#!pip install tensorflow==2.3.0
#!pip install keras
#!pip install keras-rl2
#!pip install stable-baselines[mpi]

In [84]:
import os
import gym
import slimevolleygym
from slimevolleygym import SurvivalRewardEnv

from stable_baselines.ppo1 import PPO1
from stable_baselines.common.policies import MlpPolicy
from stable_baselines import logger
from stable_baselines.common.callbacks import EvalCallback

NUM_TIMESTEPS = int(2e7)
SEED = 721
EVAL_FREQ = 250000
EVAL_EPISODES = 1000
LOGDIR = "ppo1" # moved to zoo afterwards.

logger.configure(folder=LOGDIR)

env = gym.make("SlimeVolley-v0")
env.seed(SEED)

# take mujoco hyperparams (but doubled timesteps_per_actorbatch to cover more steps.)
model = PPO1(MlpPolicy, env, timesteps_per_actorbatch=4096, clip_param=0.2, entcoeff=0.0, optim_epochs=10,
                 optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', verbose=2)

eval_callback = EvalCallback(env, best_model_save_path=LOGDIR, log_path=LOGDIR, eval_freq=EVAL_FREQ, n_eval_episodes=EVAL_EPISODES)

model.learn(total_timesteps=NUM_TIMESTEPS, callback=eval_callback)

model.save(os.path.join(LOGDIR, "final_model")) # probably never get to this point.

env.close()

ModuleNotFoundError: No module named 'tensorflow.contrib'

## Make the "SlimeVolley" Environment

In [70]:
import gym
import slimevolleygym

In [71]:
env = gym.make("SlimeVolley-v0")
state = env.observation_space.shape[0]
actions = env.action_space.n

In [72]:
print(state)
print(actions)
print(env.unwrapped.get_action_meanings())
type(actions)

12
3
['NOOP', 'UP', 'RIGHT', 'LEFT', 'UPRIGHT', 'UPLEFT']


int

In [73]:
env.reset()
env.step([0,1,0])

(array([1.258, 0.15 , 1.75 , 0.   , 0.   , 1.2  , 0.867, 2.152, 1.2  , 0.15 , 0.   , 0.   ]),
 0,
 False,
 {'ale.lives': 5,
  'ale.otherLives': 5,
  'otherObs': array([ 1.2  ,  0.15 ,  0.   ,  0.   ,  0.   ,  1.2  , -0.867,  2.152,  1.258,  0.15 ,  1.75 ,  0.   ]),
  'state': array([1.258, 0.15 , 1.75 , 0.   , 0.   , 1.2  , 0.867, 2.152, 1.2  , 0.15 , 0.   , 0.   ]),
  'otherState': array([ 1.2  ,  0.15 ,  0.   ,  0.   ,  0.   ,  1.2  , -0.867,  2.152,  1.258,  0.15 ,  1.75 ,  0.   ])})

## Creating the Deep Learning Model

In [74]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [75]:
def creat_model(state, actions):
    model = Sequential()
    model.add(Flatten(input_shape=(1, state)))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [76]:
model = creat_model(state, actions)
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_5 (Flatten)          (None, 12)                0         
_________________________________________________________________
dense_15 (Dense)             (None, 24)                312       
_________________________________________________________________
dense_16 (Dense)             (None, 24)                600       
_________________________________________________________________
dense_17 (Dense)             (None, 3)                 75        
Total params: 987
Trainable params: 987
Non-trainable params: 0
_________________________________________________________________


## Creating the DQN Agent

In [77]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [78]:
def creat_agent(model, actions):
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    DQN_Agent = DQNAgent(model=model, memory=memory, policy=policy,
                        nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return DQN_Agent

In [79]:
#actions = env.action_space
DQN_Agent = creat_agent(model, actions)
DQN_Agent.compile(Adam(lr=1e-2), metrics=['mae'])
DQN_Agent.fit(env, nb_steps=50000, visualize=False, verbose=1)

Training for 50000 steps ...
Interval 1 (0 steps performed)


IndexError: invalid index to scalar variable.