<a href="https://colab.research.google.com/github/YamenHabib/ReinforcementLearning/blob/main/SpaceInvaders_v0_using_keras_rl2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
try:
    from google.colab import drive
    %tensorflow_version 2.x
    COLAB = True
    print("Note: using Google CoLab")
except:
    print("Note: not using Google CoLab")
    COLAB = False

Note: using Google CoLab


In [2]:
if COLAB:
  !sudo apt-get install -y xvfb ffmpeg x11-utils
  !pip install -q 'gym==0.17.0'
  !pip install -q 'imageio==2.4.0'
  !pip install -q PILLOW
  !pip install -q 'pyglet==1.4.0'
  !pip install -q pyvirtualdisplay
  !pip install -q tf-agents
  !pip install keras-rl2

Reading package lists... Done
Building dependency tree       
Reading state information... Done
ffmpeg is already the newest version (7:3.4.8-0ubuntu0.2).
Suggested packages:
  mesa-utils
The following NEW packages will be installed:
  libxxf86dga1 x11-utils xvfb
0 upgraded, 3 newly installed, 0 to remove and 30 not upgraded.
Need to get 993 kB of archives.
After this operation, 2,981 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxxf86dga1 amd64 2:1.1.4-1 [13.7 kB]
Get:2 http://archive.ubuntu.com/ubuntu bionic/main amd64 x11-utils amd64 7.7+3build1 [196 kB]
Get:3 http://archive.ubuntu.com/ubuntu bionic-updates/universe amd64 xvfb amd64 2:1.19.6-1ubuntu4.8 [784 kB]
Fetched 993 kB in 1s (777 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 76, <> line 3.)
debconf: falling back to 

In [3]:
import gym
import random
from gym.wrappers import Monitor
import glob
import io
import base64
from IPython.display import HTML
from pyvirtualdisplay import Display
from IPython import display as ipythondisplay

display = Display(visible=0, size=(1400, 900))
display.start()

def show_video():
  mp4list = glob.glob('video/*.mp4')
  if len(mp4list) > 0:
    mp4 = mp4list[0]
    video = io.open(mp4, 'r+b').read()
    encoded = base64.b64encode(video)
    ipythondisplay.display(HTML(data='''<video alt="test" autoplay 
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
  else: 
    print("Could not find video")
    

def wrap_env(env):
  env = Monitor(env, './video', force=True)
  return env

In [4]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam

In [5]:
env_name = 'SpaceInvaders-v0'
env = gym.make(env_name)
height, width, channels = env.observation_space.shape
actions = env.action_space.n
print("Height: {}, Width: {}, channels: {}".format(height, width, channels))
print(actions)

Height: 210, Width: 160, channels: 3
6


In [6]:
env.unwrapped.get_action_meanings()

['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

In [7]:
episodes = 5
for episode in range(episodes):
  state = env.reset()
  done = False
  rewards = 0

  while not done:
    env.render()
    action = random.choice([0,2,3,4,5])
    new_state, reward, done, _ = env.step(action)
    rewards += reward

  print("Episode: {}, Reward: {}".format(episode, rewards))

env.close()

Episode: 0, Reward: 210.0
Episode: 1, Reward: 90.0
Episode: 2, Reward: 30.0
Episode: 3, Reward: 380.0
Episode: 4, Reward: 80.0


In [8]:
if COLAB:
  env = wrap_env(gym.make(env_name))
else:
  env = gym.make(env_name)

env.reset()
done = False

i = 0
while not done:
    i += 1
    action = action = random.choice([i for i in range(actions)])
    state, reward, done, _ = env.step(action)
    env.render()
    #print(f"Step {i}: Reward={reward}. done={done}")
    
env.close()

In [9]:
show_video()

In [10]:
def build_model(height, width, channels, actions):
  model = Sequential()
  model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', input_shape=(3, height, width, channels)))
  model.add(Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
  model.add(Convolution2D(64, (3,3), strides=(1,1), activation='relu'))
  model.add(Convolution2D(128, (3,3), activation='relu'))
  model.add(Flatten())
  model.add(Dense(512, activation='relu'))
  model.add(Dense(256, activation='relu'))
  model.add(Dense(128, activation='relu'))
  model.add(Dense(actions, activation='linear'))
  return model

In [11]:
model = build_model(height, width, channels, actions)

In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 3, 51, 39, 32)     6176      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 3, 24, 18, 64)     32832     
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 3, 22, 16, 64)     36928     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 3, 20, 14, 128)    73856     
_________________________________________________________________
flatten (Flatten)            (None, 107520)            0         
_________________________________________________________________
dense (Dense)                (None, 512)               55050752  
_________________________________________________________________
dense_1 (Dense)              (None, 256)               1

In [13]:
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [14]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                  enable_dueling_network=True, dueling_type='avg', 
                   nb_actions=actions, nb_steps_warmup=1000
                  )
    return dqn

In [15]:
del model

In [16]:
model = build_model(height, width, channels, actions)

In [17]:
agent = build_agent(model, actions)
agent.compile(Adam(lr=1e-4))
agent.fit(env, nb_steps = 10000, visualize=False, verbose=2)

Training for 10000 steps ...




  545/10000: episode: 1, duration: 33.708s, episode steps: 545, steps per second:  16, episode reward: 125.000, mean reward:  0.229 [ 0.000, 25.000], mean action: 2.545 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --




 1112/10000: episode: 2, duration: 15.434s, episode steps: 567, steps per second:  37, episode reward: 55.000, mean reward:  0.097 [ 0.000, 20.000], mean action: 2.541 [0.000, 5.000],  loss: 6.420318, mean_q: 3.470096, mean_eps: 0.904960
 2405/10000: episode: 3, duration: 135.249s, episode steps: 1293, steps per second:  10, episode reward: 285.000, mean reward:  0.220 [ 0.000, 30.000], mean action: 2.477 [0.000, 5.000],  loss: 0.525623, mean_q: 2.391806, mean_eps: 0.841780
 3522/10000: episode: 4, duration: 116.769s, episode steps: 1117, steps per second:  10, episode reward: 460.000, mean reward:  0.412 [ 0.000, 200.000], mean action: 2.495 [0.000, 5.000],  loss: 1.221056, mean_q: 3.171260, mean_eps: 0.733330
 4099/10000: episode: 5, duration: 60.365s, episode steps: 577, steps per second:  10, episode reward: 105.000, mean reward:  0.182 [ 0.000, 30.000], mean action: 2.674 [0.000, 5.000],  loss: 7.477869, mean_q: 3.508515, mean_eps: 0.657100
 4499/10000: episode: 6, duration: 42.45

<tensorflow.python.keras.callbacks.History at 0x7f74201a0690>

In [18]:
scores = agent.test(env, nb_episodes=10, visualize=True)
print(np.mean(scores.history['episode_reward']))

Testing for 10 episodes ...
Episode 1: reward: 380.000, steps: 837
Episode 2: reward: 160.000, steps: 813
Episode 3: reward: 40.000, steps: 382
Episode 4: reward: 120.000, steps: 800
Episode 5: reward: 35.000, steps: 368
Episode 6: reward: 105.000, steps: 699
Episode 7: reward: 125.000, steps: 653
Episode 8: reward: 110.000, steps: 678
Episode 9: reward: 460.000, steps: 1567
Episode 10: reward: 75.000, steps: 714
161.0


In [19]:
agent.save_weights('/content/drive/MyDrive/models/SpaceInvaders_v0_agents_weights.h5f')

[TIP] Next time specify overwrite=True!


In [27]:
if COLAB:
  env = wrap_env(gym.make(env_name))
else:
  env = gym.make(env_name)

env.reset()
done = False
rewards = 0
step = 0
steps = 0
while not done:
    step += 1
    action = agent.forward(state)
    state, reward, done, _ = env.step(action)
    env.render()
    rewards += reward

print(f"Total Steps: {i}: Rewards={rewards}.")
    
env.close()

Total Steps: 386: Rewards=415.0.


In [28]:
show_video()