In [1]:
def pxlocal(line, cell):
    ip = get_ipython()
    ip.run_cell_magic("px", line, cell)
    ip.run_cell(cell)
get_ipython().register_magic_function(pxlocal, "cell")

In [2]:
import subprocess
import ipyparallel as ipp
import time
from IPython.display import clear_output

subprocess.Popen(["ipcluster", "stop"])
time.sleep(10)

num_agents = 16
num_engines = 8
subprocess.Popen(["ipcluster", "start", "-n={:d}".format(num_engines)])

wait_time = 40
# Waiting for clusters to start properly
for i in range(wait_time):
    clear_output(wait = True)
    print('Waiting for', wait_time - (i+1), 'seconds')
    time.sleep(1)
    
rc = ipp.Client()

Waiting for 0 seconds


In [3]:
%%pxlocal

from nes_py.wrappers import JoypadSpace
import gym_super_mario_bros
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
from gym import wrappers
from IPython.display import Video
import io
import base64
from IPython.display import HTML
import numpy as np

class Agent:
    
    def __init__(self, update = ['reward']):
        self.state = None
        self.reward = None
        self.done = None
        self.info = None
        self.next_state = None
        self.update = update
        
    def take_action(self, env, mode = None):
        return env.action_space.sample()
    
    def make_env(self, mode = None):
        env = gym_super_mario_bros.make('SuperMarioBros-v0')
        env = JoypadSpace(env, SIMPLE_MOVEMENT)
        if mode == 'monitor':
            env = wrappers.Monitor(env, directory, force = True)
        return env
    
    def reset_data(self):
        self.state = []
        self.reward = []
        self.done = []
        self.info = dict()
        self.next_state = []
    
    def gather_data(self, state, reward, done, info, next_state):
        if 'reward' in self.update:
            self.reward.append(reward)

    def run(self, max_steps = 500, mode = None, directory = './gym-results/'):    
        env = self.make_env(mode = mode)
        self.reset_data()
        
        state = env.reset()
        done = False
        for step in range(max_steps):
            if not done:
                action = self.take_action(env)
                next_state, reward, done, info = env.step(action)
                self.gather_data(state, reward, done, info, next_state)

                if mode == 'render':
                    env.render()

        if mode == 'monitor':
            file_name = directory + 'openaigym.video.%s.video000000.mp4'% env.file_infix
            mp4 = Video(file_name, width = 600, height = 450)
            display(mp4)

        if mode == 'render':    
            env.close()
    
    def get_reward(self):
        if self.reward == None:
            self.run()
        return self.reward
    
    def itsame(self):
        return 'Mario!'

In [4]:
rc = ipp.Client()
generation = [Agent() for i in range(num_agents)]

In [5]:
def parallel_run(rc, generation):
    dview = rc[:]
    dview.scatter('generation', generation)
    %px partial_ans = [agent.run() for agent in generation]
    return dview.gather('generation').get()

In [6]:
start_time = time.time()
generation = parallel_run(rc, generation)
end_time = time.time()

ans = [sum(agent.reward) for agent in generation]

print('Parallel time:', round(end_time - start_time, 3))
print('Ans:', ans)

Parallel time: 73.462
Ans: [369, 529, 529, 528, 529, 529, 369, 529, 391, 529, 529, 529, 706, 700, 504, 526, 529, 698, 472, 528, 529, 529, 639, 657, 528, 529, 369, 529, 529, 527, 529, 528, 474, 529, 374, 778, 672, 373, 676, 529, 529, 529, 529, 767, 725, 371, 369, 428, 771, 529, 617, 529, 529, 368, 529, 763, 401, 529, 529, 529, 529, 529, 698, 529, 529, 625, 529, 774, 529, 529, 529, 529, 529, 653, 529, 529, 415, 457, 622, 529, 529, 529, 529, 748, 699, 744, 369, 529, 529, 529, 529, 529, 528, 529, 529, 783, 529, 528, 529, 718]


[stderr:0] 
  return (self.ram[0x86] - self.ram[0x071c]) % 256
[stderr:1] 
  return (self.ram[0x86] - self.ram[0x071c]) % 256
[stderr:2] 
  return (self.ram[0x86] - self.ram[0x071c]) % 256
[stderr:3] 
  return (self.ram[0x86] - self.ram[0x071c]) % 256


In [7]:
start_time = time.time()

ans = [sum(Agent().get_reward()) for agent in generation]

end_time = time.time()

print('Sequential time:', round(end_time - start_time, 3))
print('Ans:', ans)

  return (self.ram[0x86] - self.ram[0x071c]) % 256


Sequential time: 266.293
Ans: [782, 787, 787, 529, 529, 529, 529, 657, 528, 368, 674, 529, 528, 529, 529, 526, 529, 648, 615, 773, 686, 529, 529, 688, 529, 528, 529, 529, 529, 529, 529, 699, 787, 529, 691, 753, 529, 529, 529, 775, 529, 368, 529, 369, 529, 529, 529, 529, 528, 774, 634, 529, 442, 529, 768, 680, 529, 529, 529, 529, 529, 369, 372, 529, 528, 612, 736, 421, 529, 529, 620, 531, 529, 529, 369, 529, 529, 675, 369, 529, 529, 528, 772, 529, 657, 529, 529, 529, 528, 529, 483, 784, 528, 608, 626, 529, 529, 529, 529, 529]
