In [1]:
import os
import sys
import json
import glob
import gym
import numpy as np
import matplotlib.pyplot as plt
import cv2
from PIL import Image, ImageFont, ImageDraw
import imageio

sys.path.append('../')
from bnp_options import *
from env.atari_env import AtariEnv
from env.augmented_atari_env import AugmentedAtariEnv

sys.path.append('../../stable-baselines3')
from stable_baselines3.common.env_util import make_vec_env, make_atari_env
from stable_baselines3.common.vec_env import VecTransposeImage
from stable_baselines3.common.atari_wrappers import AtariWrapper
from stable_baselines3 import PPO

In [2]:
runs_dir = '/data/bnp_options_runs'
name = 'spaceInvaders'

# pretrained_path = f'{runs_dir}/6777783_bnp_montezumaRevenge_0'
# trained_path = f'{runs_dir}/6777783_augmented_bnp_montezumaRevenge_0'
# env_name = 'MontezumaRevengeNoFrameskip-v4'

# pretrained_path = f'{runs_dir}/6777803_bnp_chopperCommand_0'
# trained_path = f'{runs_dir}/6777803_augmented_bnp_chopperCommand_0'
# env_name = 'ChopperCommandNoFrameskip-v4'

# pretrained_path = f'{runs_dir}/6777804_bnp_breakout_2'
# trained_path = f'{runs_dir}/6777804_augmented_bnp_breakout_2'
# env_name = 'BreakoutNoFrameskip-v4'

pretrained_path = f'{runs_dir}/6777813_bnp_spaceInvaders_2'
trained_path = f'{runs_dir}/6777813_augmented_bnp_spaceInvaders_2'
env_name = 'SpaceInvadersNoFrameskip-v4'

In [3]:
random_seed = 0
rng_master = np.random.RandomState(random_seed)
np.random.seed(random_seed) # there were some issue with reproducibility

In [4]:
env = gym.make(env_name)
state_dim = 1024
action_dim = env.action_space.n
device='cuda'

In [5]:
with open(os.path.join(pretrained_path, 'config.json'), 'r') as config_file:
    config = json.load(config_file)
    
model = BNPOptions(None, state_dim, action_dim, device, rng=rng_master, **config)

model.load(os.path.join(pretrained_path, 'checkpoint.pth'))


In [6]:
def augmented_atari_wrapper(env, model):
    env = AtariWrapper(env)
    env = AugmentedAtariEnv(env, model, max_steps=15)
    return env

n_envs = 10
augmented_env = VecTransposeImage(make_vec_env(env_name, n_envs=n_envs, seed=random_seed, 
    wrapper_class=lambda env: augmented_atari_wrapper(env, model)
))

In [7]:
ppo = PPO.load(os.path.join(trained_path, "ppo_augmented"))
# ppo.__dict__
ppo2 = PPO('CnnPolicy', augmented_env, n_steps=512, verbose=1, custom_buffer=True)

Using cuda device


In [8]:
# trained_ppo = ppo2.learn(total_timesteps=20000)

In [9]:
actions = []
frames = [[] for _ in range(n_envs)]
all_rewards = []

obs = augmented_env.reset()
dones_flag = np.array([False]*n_envs)

while not dones_flag.all() and len(actions) < 500:
    action, _states = ppo.predict(obs, deterministic=False)
    actions.append(action)
    obs, rewards, dones, info = augmented_env.step(action)
    all_rewards.append(rewards*(~dones_flag))
    dones_flag = ['episode' in info[i] for i in range(n_envs)] | dones_flag
    for i in range(n_envs):
        frames[i] += info[i]['frames']
    if len(actions) % 100 == 0:
        print(len(actions))

100


In [10]:
len(actions)

132

In [11]:
np.sum(all_rewards, axis=0)

array([29.188013, 27.765678, 32.975166, 28.140633, 32.34302 , 18.03625 ,
       31.603065, 29.496185, 31.670149, 39.105423], dtype=float32)

In [23]:
images_dir = f'/data/bnp_options_viz/{name}'
os.makedirs(images_dir, exist_ok=True)

In [24]:
np.save(os.path.join(images_dir, 'frames.npy'), frames, allow_pickle=True)

In [25]:
frames = np.load(os.path.join(images_dir, 'frames.npy'), allow_pickle=True)

In [26]:
np.save(os.path.join(images_dir, 'actions.npy'), actions, allow_pickle=True)

In [27]:
actions = np.load(os.path.join(images_dir, 'actions.npy'), allow_pickle=True)

In [28]:
for j in range(10):
    os.makedirs(os.path.join(images_dir, f'images{j}'), exist_ok=True)
    for i in range(500):
        cv2.imwrite(os.path.join(images_dir, f'images{j}/frame_{i}.jpg'), frames[j][i][..., ::-1])

In [29]:
run = 9

In [30]:
images_folder = os.path.join(images_dir, f'images{run}')
labelled_images_folder = os.path.join(images_dir, f'labelled_images{run}')
os.makedirs(labelled_images_folder, exist_ok=True)

In [31]:
temp_actions = actions[:, run]
duplicated_actions = []
for a in temp_actions:
    duplicated_actions += [a]*15 if a >= action_dim else [a]
duplicated_actions = np.array(duplicated_actions)

In [32]:
images = [Image.open(image) for image in [f"{images_folder}/frame_{i}.jpg" for i in range(500)]]
for i, image in enumerate(images):
    draw = ImageDraw.Draw(image)
    action = duplicated_actions[i]
    label = f"Option {action - env.action_space.n}" if action >= action_dim else f"Action {env.unwrapped.get_action_meanings()[action]}"
    draw.text((1,1), label, (255,255,0), fontsize=15)
    image.save(os.path.join(images_dir, f'labelled_images{run}/frame_{i}.jpg'))

In [33]:
with imageio.get_writer(os.path.join(images_dir, 'test.gif'), mode='I') as writer:
    for filename in [f"{labelled_images_folder}/frame_{i}.jpg" for i in range(500)]:
        image = imageio.imread(filename)
        writer.append_data(image)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import time
%matplotlib notebook

In [None]:
fig = plt.figure()
ax = plt.axes()
im = ax.imshow(frames[3][0])
label = ax.text(10, 10, "NA", ha='center', va='center', fontsize=20, color="Red")

def animate(n, *args, **kwargs):
    ax.imshow(frames[3][n])
    label.set_text(str(actions[3][n]))
    return

ani = animation.FuncAnimation(fig, animate, frames = 1000, interval = 30, blit = False, repeat = False)
fig.show()