<a href="https://colab.research.google.com/github/eisbetterthanpi/pytorch/blob/main/curiousity_lstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### setup

In [1]:
# # https://github.com/kimhc6028/pytorch-noreward-rl
# https://stackoverflow.com/questions/67808779/running-gym-atari-in-google-colab
%pip install -U gym
%pip install -U gym[atari,accept-rom-license]
# !pip install gym[box2d]
# import gym

!pip install gym-super-mario-bros nes-py
# https://github.com/Kautenja/gym-super-mario-bros
from nes_py.wrappers import JoypadSpace
import gym_super_mario_bros
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT, COMPLEX_MOVEMENT
# env = gym_super_mario_bros.make('SuperMarioBros-v0')
# env = JoypadSpace(env, SIMPLE_MOVEMENT)

!pip install colabgymrender
!pip install perceiver-pytorch

import gym
class SparseEnv(gym.Wrapper): #https://alexandervandekleut.github.io/gym-wrappers/
    def __init__(self, env):
        super().__init__(env)
        self.env = env
        self.total_rewards = 0
    def step(self, action):
        observation, reward, done, info = self.env.step(action)
        self.total_rewards += reward
        if done: return observation, self.total_rewards, done, info
        else:
            self.total_rewards = 0
            return observation, 0, done, info
    def reset(self):
        self.total_rewards = 0
        return self.env.reset()
# env = SparseEnv(gym.make("LunarLander-v2"))

class MarioSparse(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        self.env = env
        self.total_score = 0
    def step(self, action):
        observation, reward, done, info = self.env.step(action)
        life = info['life']
        score = info['score']
        self.total_score += score
        # print("MarioSparse",life,score)
        # if done: return observation, self.total_rewards, done, info
        if life<2: return observation, score, True, info # lost one life, end env
        else:
            # self.total_score = 0
            return observation, score, False, info
    def reset(self):
        # self.total_score = 0
        return self.env.reset()
# env = MarioSparse(env)

import torch
device = "cuda" if torch.cuda.is_available() else "cpu"

log=False
# !pip install wandb
# import wandb
# wandb.login() # 
# wandb.init(project="curiousity_simple", entity="bobdole")
# log=True


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gym
  Downloading gym-0.24.1.tar.gz (696 kB)
[K     |████████████████████████████████| 696 kB 5.2 MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting gym-notices>=0.0.4
  Downloading gym_notices-0.0.7-py3-none-any.whl (2.7 kB)
Building wheels for collected packages: gym
  Building wheel for gym (PEP 517) ... [?25l[?25hdone
  Created wheel for gym: filename=gym-0.24.1-py3-none-any.whl size=793151 sha256=186fed457d657f0635a3a1e141a35bc84ca692e519eb5a0ecbaf3c2c7e579c10
  Stored in directory: /root/.cache/pip/wheels/18/0e/54/63d9f3d16ddf0fec1622e90d28140df5e6016bcf8ea920037d
Successfully built gym
Installing collected packages: gym-notices, gym
  Attempting uninstall: gym
    Found existing installation: gym 0.17.3
    Uninstalling gym-0.17.3:
    

#### model simplier

In [7]:
# model.py
# https://github.com/kimhc6028/pytorch-noreward-rl/blob/master/model.py
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

class ActorCritic(torch.nn.Module):
    def __init__(self, in_shape, action_space):
        super(ActorCritic, self).__init__()
        self.in_dim = in_shape # mario (240, 256)
        self.conv = nn.Sequential( # A3C pi
            nn.Conv2d(in_shape[0], 32, 3, stride=2, padding=1), nn.ELU(),
            nn.Conv2d(32, 32, 3, stride=2, padding=1), nn.ELU(),
            nn.Conv2d(32, 32, 3, stride=2, padding=1), nn.ELU(),
            nn.Conv2d(32, 32, 3, stride=2, padding=1), nn.ELU(),
            nn.Conv2d(32, 32, 3, stride=2, padding=1), nn.ELU(), # added for RuntimeError: Input batch size 2 doesn't match hidden0 batch size 1
            )
        self.lstm = nn.LSTMCell(in_shape[1], 256)
        num_outputs = action_space.n
        self.critic_linear = nn.Linear(256, 1) # -> value
        self.actor_linear = nn.Linear(256, num_outputs) # -> action

        self.icm_conv = nn.Sequential( # ICM embed phi
            nn.Conv2d(in_shape[0], 32, 3, stride=2, padding=1), nn.ELU(),
            nn.Conv2d(32, 32, 3, stride=2, padding=1), nn.ELU(),
            nn.Conv2d(32, 32, 3, stride=2, padding=1), nn.ELU(),
            nn.Conv2d(32, 32, 3, stride=2, padding=1), nn.ELU(),
            nn.Conv2d(32, 32, 3, stride=2, padding=1), nn.ELU(), # added for cat
            )
        self.icm_lstm = nn.LSTMCell(in_shape[1], 256)
        self.inv_linear = nn.Sequential( # inv learning, predict at
            nn.Linear(in_shape[1] + in_shape[1], 256), nn.ReLU(),
            nn.Linear(256, num_outputs), nn.Softmax()
            ) # cat(phi(st), phi(st+1)) -> athat
        self.fwd_linear = nn.Sequential( # predict phi st+1
            nn.Linear(in_shape[1] + num_outputs, 256), nn.ReLU(),
            nn.Linear(256, in_shape[1])
            ) # cat(phi(st), at) -> phihat(st+1)

    def forward(self, inputs, icm):
        if icm == False: #A3C
            st, (a3c_hx, a3c_cx) = inputs # [1, 210, 160, 3], ([1, 256], [1, 256])
            vec_st = self.conv(st).view(-1, self.in_dim[1])
            a3c_hx1, a3c_cx1 = self.lstm(vec_st, (a3c_hx, a3c_cx))
            critic = self.critic_linear(a3c_hx1)
            actor = self.actor_linear(a3c_hx1)
            # print("forward A3C ",critic.shape, actor.shape, a3c_hx.shape, a3c_cx.shape)
            return critic, actor, (a3c_hx1, a3c_cx1) # [1, 1], [1, 18], ([1, 256], [1, 256])

        else: #icm
            # s_t, s_t1, a_t = inputs # this only for old no lstm
            # # s_t, (icm_hx, icm_cx) = s_t
            # # # s_t1, (icm_hx1, icm_cx1) = s_t1
            
            # vec_st = self.icm_conv(s_t).view(-1, self.in_dim[1])
            # vec_st1 = self.icm_conv(s_t1).view(-1, self.in_dim[1])
            # # icm_hx, icm_cx = self.icm_lstm(vec_st, (icm_hx, icm_cx)) # extra: lstm for icm
            # #icm_hx1, icm_cx1 = self.icm_lstm(vec_st1, (icm_hx1, icm_cx1)) # another lstm for st_1, not necessary?
            # # vec_st = icm_hx
            # #vec_st1 = icm_hx1

            # inverse_vec = torch.cat((vec_st, vec_st1), 1)
            # forward_vec = torch.cat((vec_st, a_t), 1)


            # s_t, st1, at = inputs # can this?
            # vec_st, (icm_hx, icm_cx) = s_t
            (icm_hx, icm_cx), st1, at = inputs # can this?
            # s_t1, (icm_hx1, icm_cx1) = s_t1
            vec_st1 = self.icm_conv(st1).view(-1, self.in_dim[1])
            icm_hx1, icm_cx1 = self.icm_lstm(vec_st1, (icm_hx, icm_cx)) # extra: lstm for icm

            inverse_vec = torch.cat((icm_hx, vec_st1), 1) # predict at
            forward_vec = torch.cat((icm_hx, at), 1) # predict vec_st1
            inverse = self.inv_linear(inverse_vec)
            forward = self.fwd_linear(forward_vec)
            # print("forward icm ",vec_st1.shape, inverse.shape, forward.shape)
            # return vec_st1, inverse, forward # [1, 320], [1, 18], [1, 320]
            #return vec_st1, inverse, forward, (icm_hx, icm_cx), (icm_hx1, icm_cx1)
            return vec_st1, inverse, forward, (icm_hx1, icm_cx1)
# vec_st1, inverse, forward, (icm_hx, icm_cx) = model(((vec_st, (icm_hx, icm_cx)), st1.unsqueeze(0).to(device), at.to(device)), icm = True)            


#### perceiverio

In [None]:
from perceiver_pytorch import PerceiverIO
# https://github.com/lucidrains/perceiver-pytorch
actor = PerceiverIO(
    dim = env.observation_space.shape[0]*env.observation_space.shape[1],                    # dimension of sequence to be encoded
    queries_dim = env.action_space,            # dimension of decoder queries
    logits_dim = None,            # dimension of final logits
    depth = 6,                   # depth of net
    num_latents = 64,           # number of latents, or induced set points, or centroids. different papers giving it different names
    latent_dim = 64,            # latent dimension
    cross_heads = 1,             # number of heads for cross attention. paper said 1
    latent_heads = 4,            # number of heads for latent self attention, 8
    cross_dim_head = 16,         # number of dimensions per cross attention head
    latent_dim_head = 16,        # number of dimensions per latent self attention head
    weight_tie_layers = False    # whether to weight tie layers (optional, as indicated in the diagram)
).to(device)

critic = PerceiverIO(
    dim = env.observation_space.shape[0]*env.observation_space.shape[1],                    # dimension of sequence to be encoded
    queries_dim = env.action_space,            # dimension of decoder queries
    logits_dim = None,            # dimension of final logits
    depth = 6,                   # depth of net
    num_latents = 64,           # number of latents, or induced set points, or centroids. different papers giving it different names
    latent_dim = 64,            # latent dimension
    cross_heads = 1,             # number of heads for cross attention. paper said 1
    latent_heads = 4,            # number of heads for latent self attention, 8
    cross_dim_head = 16,         # number of dimensions per cross attention head
    latent_dim_head = 16,        # number of dimensions per latent self attention head
    weight_tie_layers = False    # whether to weight tie layers (optional, as indicated in the diagram)
).to(device)

lstm = PerceiverIO(
    dim = 256,                    # dimension of sequence to be encoded
    queries_dim = 256,            # dimension of decoder queries
    logits_dim = None,            # dimension of final logits
    depth = 6,                   # depth of net
    num_latents = 64,           # number of latents, or induced set points, or centroids. different papers giving it different names
    latent_dim = 64,            # latent dimension
    cross_heads = 1,             # number of heads for cross attention. paper said 1
    latent_heads = 4,            # number of heads for latent self attention, 8
    cross_dim_head = 16,         # number of dimensions per cross attention head
    latent_dim_head = 16,        # number of dimensions per latent self attention head
    weight_tie_layers = False    # whether to weight tie layers (optional, as indicated in the diagram)
).to(device)



    def forward(self, inputs, icm):
        if icm == False: #A3C
            inputs, (a3c_hx, a3c_cx) = inputs # [1, 210, 160, 3], ([1, 256], [1, 256])


            x = F.elu(self.conv4(x)) # [1, 32, 10, 1]
            # x = x.view(-1, 32 * 3 * 3)
            x = x.view(-1, 32 * 10)

            a3c_hx, a3c_cx = self.lstm(x, (a3c_hx, a3c_cx))

            return critic, actor, (a3c_hx, a3c_cx) # [1, 1], [1, 18], ([1, 256], [1, 256])

        else: #icm
            s_t, s_t1, a_t = inputs
            # s_t1=s_t1.float()
            # print("###s t###",s_t.dtype) # [1, 210, 160, 3] torch.float32


            return vec_st1, inverse, forward # [1, 320], [1, 18], [1, 320]



NameError: ignored

#### train simple

In [8]:
# train.py
# https://github.com/kimhc6028/pytorch-noreward-rl/blob/master/train.py
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import time

# def train(rank, args, shared_model, optimizer=None):
def train(env, args, model, optimizer=None):
    # torch.manual_seed(seed)
    # model = ActorCritic(env.observation_space.shape, env.action_space)
    if optimizer is None:
        optimizer = torch.optim.Adam(shared_model.parameters(), lr)
    model.train()
    for x in range(num_episodes):
        # model.load_state_dict(shared_model.state_dict()) # Sync with the shared model
        hx = torch.zeros(1, 256).to(device)
        cx = torch.zeros(1, 256).to(device)
        icm_hx = torch.zeros(1, 256).to(device)
        icm_cx = torch.zeros(1, 256).to(device)
        vec_st = torch.zeros(1, 256).to(device)
        values = []
        log_probs = []
        rewards = []
        entropies = []
        inverses = []
        forwards = []
        actions = []
        vec_st1s = []
        episode_length = 0

        state = env.reset()
        # state=state[:,:,0]
        state = torch.from_numpy(state.copy()).type(torch.float) # i added, change from int to float
        st1 = state.float()
        # print("#####www####",state.dtype,hx.dtype)
        while True:
            episode_length += 1
        # for step in range(num_steps):
            value, logit, (hx, cx) = model((state.unsqueeze(0).to(device), (hx, cx)), icm = False)
            # s_t = state
            # print("logit.size",logit.shape) # [1, 6]
            prob = F.softmax(logit, dim=1)
            log_prob = F.log_softmax(logit, dim=1)
            entropy = -(log_prob * prob).sum(1)
            entropies.append(entropy.cpu())
            # action = prob.multinomial().data
            action = prob.multinomial(1).data
            log_prob = log_prob.gather(1, action)
            # oh_action = torch.zeros(1, env.action_space.n).scatter_(1,action,1)
            oh_action = torch.zeros(1, env.action_space.n)
            # with torch.no_grad():
            #     oh_action[0][action.item()] = 1.0
            oh_action[0][action.item()] = 1.0
            at = oh_action
            actions.append(oh_action)
            state, reward, done, _ = env.step(action.item())
            state = torch.from_numpy(state.copy()).type(torch.float).to(device)
            # state=state[:,:,0]
            # print("reward",reward)

            done = done or episode_length >= max_episode_length
            # reward = max(min(reward, 1), -1) #why clip rewards?
            st = st1
            st1 = state.float()
            # print("###st###",s_t.unsqueeze(0).dtype)
            # vec_st1, inverse, forward = model((st.unsqueeze(0).to(device), st1.unsqueeze(0).to(device), at.to(device)), icm = True)
            # vec_st1, inverse, forward = model((s_t.unsqueeze(0).to(device), s_t1.unsqueeze(0).to(device), a_t.to(device)), icm = True)
            # vec_st1, inverse, forward, (icm_hx, icm_cx) = model(((vec_st, (icm_hx, icm_cx)), st1.unsqueeze(0).to(device), at.to(device)), icm = True)            
            vec_st1, inverse, forward, (icm_hx, icm_cx) = model(((icm_hx, icm_cx), st1.unsqueeze(0).to(device), at.to(device)), icm = True)            

            reward_intrinsic = eta * ((vec_st1 - forward).pow(2)).sum(1) / 2.
            #reward_intrinsic = eta * ((vec_st1 - forward).pow(2)).sum(1).sqrt() / 2.
            # print("reward_intrinsic", reward_intrinsic)
            reward_intrinsic = reward_intrinsic.item()
            # print("ep ",x,", rwd ext: ", reward, " ,rwd int: ", reward_intrinsic.item())
            reward += reward_intrinsic

            values.append(value.cpu())
            log_probs.append(log_prob.cpu())
            rewards.append(reward)
            vec_st1s.append(vec_st1.cpu())
            inverses.append(inverse.cpu())
            forwards.append(forward.cpu())

            if done:
                print(episode_length)
                episode_length = 0
                break

        R = torch.zeros(1, 1)
        if not done:
            value, _, _ = model((state.unsqueeze(0), (hx, cx)), icm = False)
            R = value.data
        values.append(R)
        policy_loss = 0
        value_loss = 0
        inverse_loss = 0
        forward_loss = 0
        gae = torch.zeros(1, 1)
        for i in reversed(range(len(rewards))):
            R = gamma * R + rewards[i]
            advantage = R - values[i]
            value_loss = value_loss + 0.5 * advantage.pow(2)
            # Generalized Advantage Estimataion
            # delta_t = rewards[i] + gamma * values[i + 1].data - values[i].data
            delta_t = torch.tensor(rewards[i]) + gamma * values[i + 1].data - values[i].data
            gae = gae * gamma * tau + delta_t
            policy_loss = policy_loss - log_probs[i] * gae - 0.01 * entropies[i]
            cross_entropy = - (actions[i] * torch.log(inverses[i] + 1e-15)).sum(1)
            inverse_loss = inverse_loss + cross_entropy
            forward_err = forwards[i] - vec_st1s[i]
            forward_loss = forward_loss + 0.5 * (forward_err.pow(2)).sum(1)
        optimizer.zero_grad()
        # ((1-beta) * inverse_loss + beta * forward_loss).backward(retain_variables=True)
        inv_loss = (1-beta) * inverse_loss + beta * forward_loss
        pol_loss = lmbda * (policy_loss + 0.5 * value_loss)
        (inv_loss + pol_loss).backward()
        # (inv_loss + 0*pol_loss).backward()
        # (((1-beta) * inverse_loss + beta * forward_loss) + lmbda * (policy_loss + 0.5 * value_loss)).backward()
        print(''.join([str(torch.argmax(a).item()) for a in actions]))
        print("inv_loss: ", inv_loss.item(), " ,pol_loss: ", pol_loss.item())
        if log: 
            wandb.log({"inv_loss": inv_loss.item(), "pol_loss": pol_loss.item()})
        torch.nn.utils.clip_grad_norm(model.parameters(), 40)
        optimizer.step()


#### test

In [4]:
# test.py
# https://github.com/kimhc6028/pytorch-noreward-rl/blob/master/test.py
import numpy as np
import torch
import torch.nn.functional as F
import time

# def test(rank, args, shared_model):
def test(env, args, model):
    # torch.manual_seed(seed)
    # model = ActorCritic(env.observation_space.shape, env.action_space)
    # model.load_state_dict(shared_model.state_dict())
    model.eval()
    state = env.reset()
    state = torch.from_numpy(state.copy()).type(torch.float)
    reward_sum = 0
    start_time = time.time()
    actions = []
    episode_length = 0
    result = []
    cx = torch.zeros(1, 256).to(device)
    hx = torch.zeros(1, 256).to(device)
    while True:
        episode_length += 1
        # value, logit, (hx, cx) = model((state.unsqueeze(0), (hx, cx)), icm = False)
        value, logit, (hx, cx) = model((state.unsqueeze(0).to(device), (hx, cx)), icm = False)
        # action = prob.max(1)[1].data.numpy() #stupid from test
        prob = F.softmax(logit, dim=1) #from train
        action = prob.multinomial(1).data
        state, reward, done, _ = env.step(action.item())
        state = torch.from_numpy(state.copy()).type(torch.float)

        done = done or episode_length >= max_episode_length
        # print("rwd ext: ", reward)
        reward_sum += reward
        actions.append(action[0])
        if done:
            end_time = time.time()
            print("Time {}, episode reward {}, episode length {}".format(
                time.strftime("%Hh %Mm %Ss", time.gmtime(end_time - start_time)), reward_sum, episode_length))
            result.append((reward_sum, end_time - start_time))
            torch.save(model.state_dict(), 'model.pth')
            # print(''.join([str(a.item()) for a in actions]))
            print([a.item() for a in actions])
            break


#### main

In [11]:
# main.py
# https://github.com/kimhc6028/pytorch-noreward-rl/blob/master/main.py
# import os, sys, cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import gym

lr=0.001
gamma=0.99
tau=1.00
seed=1
num_processes=4
num_steps=20
max_episode_length=500 # 10000
# env_name='PongDeterministic-v4'
# env_name='LunarLander-v2'
# env_name='MontezumaRevengeDeterministic-v4'
# env_name='MontezumaRevengeDeterministic-ram-v4'

no_shared=False
eta=0.01
beta=0.2
lmbda=0.1
outdir="output"
record='store_true'
num_episodes=10#100

torch.manual_seed(seed)
# env = gym.make(env_name)
# env = SparseEnv(env)
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, COMPLEX_MOVEMENT) # SIMPLE_MOVEMENT COMPLEX_MOVEMENT
env = MarioSparse(env)
# query_environment("MountainCar-v0")

# print(env.observation_space.shape, env.action_space) # (210, 160, 3) Discrete(18)

shared_model = ActorCritic(env.observation_space.shape, env.action_space).to(device)
# shared_model.share_memory()
if no_shared:
    optimizer = None
else:
    optimizer = torch.optim.Adam(shared_model.parameters(), lr=lr)

    # optimizer.share_memory()
args=None
# train(0, args, shared_model, optimizer)

# processes = []
# import torch.multiprocessing as mp
# p = mp.Process(target=test, args=(num_processes, args, shared_model))
# p.start()
# processes.append(p)
# for rank in range(0, num_processes):
#     p = mp.Process(target=train, args=(rank, args, shared_model, optimizer))
#     p.start()
#     processes.append(p)
# for p in processes:
#     p.join()



8020


#### wwwwwwwww

In [None]:
max_episode_length=1000 # 10000


In [None]:
# train(env, args, shared_model)

for x in range(25):
    train(env, args, shared_model, optimizer)
test(env, args, shared_model)


500
9801904950324791100114019043114041134044101949983059011641086391844146110516401190061101000091410990046496066392649048654510119164182183911044974490144674400394673405804794811067614011991413499523109561911047908115114690114996791014100441103486941849091619960991070311001100147990114411147010431163710426091046614400996642115984110441011007102610160949076490103511060009696899110307374941841161140669559094141031040100111779706935111071304414209318114498006400596011491109107607043011074652090411130894610599511479101004701419430473484434984936404
inv_loss:  1092.3968505859375  ,pol_loss:  -247.0916748046875
500
0641491427010331080310800610907090303604093500803150104919469100700104010850910045487117104042101017101056091111183401110107550011118189971306000700033111001011341369131109198111104375113065877000610070038353297554399483636910868101047824364262114138411031831404471111524166638449019611111974977501171411115830119403949114011011300495843940001640116001104081011910411030097254111031

#### save

In [10]:

# from google.colab import drive
# drive.mount('/content/gdrive')
PATH="/content/gdrive/MyDrive/curious/" # for saving to google drive
# PATH="/content" # for saving on colab only
name='model_mario_lstm.pth'
model=shared_model
# torch.save(model.state_dict(), PATH+name)

# model.load_state_dict(torch.load(PATH+name))
# shared_model=model


#### video

In [9]:

import gym
from colabgymrender.recorder import Recorder
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT, COMPLEX_MOVEMENT

# # env = gym.make("MontezumaRevengeDeterministic-v4")
# env = SparseEnv(env)
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, COMPLEX_MOVEMENT) # SIMPLE_MOVEMENT COMPLEX_MOVEMENT
env = MarioSparse(env)
env = Recorder(env, './video')

state = env.reset()

model = ActorCritic(env.observation_space.shape, env.action_space)
model.load_state_dict(shared_model.state_dict())
model.eval()
cx = torch.zeros(1, 256)
hx = torch.zeros(1, 256)
# torch.manual_seed(6)
x=0
# acts1=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 11, 2, 9, 9, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 11, 1, 1, 3, 3, 0, 1, 6, 1, 0, 1, 1, 3, 1, 1, 7, 1, 2, 1, 1, 7, 3, 3, 1, 1, 1, 10, 1, 1, 1, 7, 1, 1, 1, 1, 6, 9, 1, 1, 1, 11, 1, 2, 1, 1, 3, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 0, 7, 1, 1, 1, 2, 7, 11, 1, 10, 1, 0, 1, 1, 3, 1, 1, 1, 1, 1, 4, 1, 3, 1, 1, 3, 1, 1, 1, 1, 7, 1, 1, 1, 1, 7, 1, 6, 1, 1, 1, 1, 6, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 5, 1, 7, 1, 1, 7, 1, 0, 11, 2, 2, 1, 1, 3, 1, 7, 1, 1, 1, 1, 1, 4, 1, 11, 1, 1, 1, 1, 4, 1, 1, 1, 1, 3, 1, 1, 1, 3, 1, 1, 4, 1, 1, 1, 1, 9, 3, 1, 3, 4, 10, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 4, 1, 1, 0, 8, 1, 1, 3, 1, 1, 1, 3, 11, 1, 1, 1, 3, 7, 3, 3, 10, 1, 1, 1, 1, 1, 0, 1, 1, 4, 1, 4, 1, 1, 1, 1, 1, 10, 0, 1, 7, 7, 2, 1, 6, 10, 0, 1, 1, 1, 11, 1, 1, 3, 1, 1, 1, 11, 1, 1, 1, 1, 1, 1, 7, 1, 1, 6, 1, 3, 1, 1, 1, 4, 1, 1, 2, 10, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 6, 3, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 2, 1, 3, 1, 6, 1, 1, 1, 1, 1, 1, 10, 1, 1, 2, 2, 4, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 3, 0, 1, 0, 1, 7, 3, 3, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 1, 1, 1, 1, 0, 1, 1, 5, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 7, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 3, 3, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 9, 1, 3, 1, 1, 1, 10, 1, 1, 1, 1, 1, 4, 9, 1, 1, 1, 1, 3, 10, 1, 1, 1, 1, 1, 10, 1, 1, 1, 1, 8, 1, 1, 1, 3, 1, 1, 7, 1, 1, 0, 1, 1, 1, 1, 6, 1, 1, 2, 1, 4, 0, 1, 1, 3, 1]
# acts2=[4, 10, 10, 9, 10, 10, 1, 10, 10, 10, 1, 3, 0, 10, 10, 3, 2, 1, 0, 1, 4, 2, 2, 10, 2, 10, 10, 1, 10, 6, 1, 7, 1, 5, 1, 1, 10, 9, 1, 1, 1, 7, 10, 7, 1, 1, 1, 10, 1, 10, 1, 6, 2, 3, 2, 2, 11, 3, 4, 2, 2, 1, 10, 1, 9, 3, 9, 7, 10, 0, 10, 1, 10, 0, 0, 1, 11, 7, 10, 10, 10, 10, 7, 1, 9, 0, 0, 10, 1, 0, 1, 1, 10, 7, 1, 10, 2, 5, 0, 0, 0, 4, 9, 10, 10, 11, 1, 10, 4, 1, 10, 7, 1, 10, 1, 7, 3, 0, 11, 7, 1, 7, 1, 10, 6, 10, 7, 10, 10, 1, 10, 10, 10, 1, 1, 9, 1, 3, 3, 10, 1, 3, 2, 7, 10, 10, 10, 1, 10, 1, 10, 10, 1, 9, 9, 4, 1, 1, 11, 3, 10, 10, 10, 5, 1, 10, 1, 9, 9, 6, 10, 0, 10, 9, 7, 11, 3, 0, 0, 1, 10, 10, 10, 7, 2, 1, 7, 1, 10, 10, 1, 9, 7, 1, 1, 2, 10, 9, 10, 9, 4, 10, 10, 10, 3, 1, 1, 1, 4, 1, 1, 10, 3, 7, 10, 0, 6, 1, 11, 10, 9, 1, 7, 10, 2, 10, 3, 1, 10, 9, 1, 11, 1, 3, 0, 1, 1, 3, 9, 1, 4, 0, 2, 1, 1, 10, 4, 9, 4, 1, 1, 6, 10, 0, 1, 10, 0, 10, 0, 2, 3, 6, 7, 3, 10, 1, 10, 1, 1, 10, 1, 0, 9, 10, 4, 7, 1, 7, 7, 6, 7, 1, 1, 10, 1, 10, 7, 6, 9, 10, 7, 3, 1, 1, 1, 7, 1, 3, 3, 10, 3, 7, 7, 10, 11, 10, 1, 10, 10, 1, 2, 10, 7, 2, 2, 1, 7, 1, 1, 10, 10, 10, 7, 10, 1, 4, 3, 2, 3, 0, 3, 10, 10, 7, 7, 1, 1, 10, 0, 1, 10, 0, 8, 1, 1, 1, 0, 0, 7, 6, 10, 3, 7, 1, 0, 10, 4, 2, 1, 10, 1, 10, 2, 10, 6, 0, 4, 2, 6, 3, 1, 7, 2, 3, 10, 10, 10, 1, 3, 1, 3, 3, 10, 1, 10, 3, 2, 2, 6, 11, 1, 9, 0, 9, 1, 7, 11, 3, 3, 10, 9, 1, 10, 2, 7, 10, 3, 2, 7, 1, 10, 10, 7, 1, 2, 0, 7, 3, 2, 1, 0, 11, 10, 2, 10, 6, 10, 10, 2, 10, 4, 10, 10, 6, 10, 10, 9, 1, 1, 1, 7, 1, 10, 1, 5, 0, 0, 2, 1, 0, 3, 10, 10, 11, 2, 10, 10, 7, 10, 6, 0, 10, 3, 1, 10, 1, 1, 10, 0, 3, 1, 9, 1, 11, 3, 2, 10, 1, 0, 0, 9, 1, 1, 1, 0, 3, 9, 10, 10, 9, 10, 1, 11, 3, 2, 4, 7, 3, 0, 10]
# acts3=[2, 0, 7, 1, 2, 0, 2, 7, 1, 0, 0, 9, 9, 2, 2, 0, 0, 1, 7, 0, 0, 2, 10, 7, 2, 2, 0, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 3, 1, 2, 3, 0, 0, 2, 0, 2, 7, 2, 0, 3, 0, 2, 2, 9, 2, 2, 7, 9, 0, 2, 0, 2, 1, 2, 7, 2, 0, 1, 0, 2, 6, 2, 2, 1, 0, 2, 2, 10, 0, 7, 2, 0, 2, 2, 2, 1, 3, 0, 0, 2, 9, 3, 0, 7, 2, 2, 0, 2, 2, 2, 0, 0, 0, 7, 2, 3, 2, 2, 3, 2, 2, 0, 0, 2, 2, 7, 2, 2, 10, 7, 0, 0, 0, 0, 7, 2, 7, 0, 2, 2, 0, 2, 2, 1, 2, 1, 2, 0, 7, 10, 7, 2, 0, 1, 2, 0, 2, 0, 0, 2, 7, 3, 3, 0, 7, 10, 10, 0, 0, 2, 3, 0, 7, 1, 0, 2, 7, 2, 1, 1, 6, 0, 2, 2, 0, 0, 0, 2, 0, 2, 0, 7, 2, 2, 3, 3, 0, 5, 3, 3, 11, 0, 3, 0, 3, 2, 0, 2, 1, 0, 0, 2, 9, 2, 2, 0, 10, 7, 2, 3, 7, 1, 10, 3, 1, 2, 0, 0, 9, 0, 2, 0, 2, 1, 0, 2, 0, 3, 0, 1, 0, 7, 1, 9, 2, 2, 2, 0, 0, 2, 2, 2, 1, 7, 10, 0, 2, 2, 2, 7, 2, 7, 0, 11, 7, 5, 9, 10, 0, 0, 2, 1, 2, 7, 7, 0, 10, 3, 0, 10, 4, 2, 3, 1, 7, 3, 0, 2, 7, 0, 0, 0, 2, 3, 2, 0, 0, 1, 0, 2, 2, 0, 2, 7, 0, 2, 2, 0, 2, 2, 2, 0, 7, 2, 2, 7, 2, 3, 11, 7, 3, 7, 10, 0, 0, 0, 11, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 9, 7, 2, 3, 2, 1, 10, 2, 2, 2, 7, 3, 0, 0, 6, 2, 0, 1, 2, 2, 2, 2, 0, 0, 0, 10, 1, 0, 0, 0, 2, 3, 2, 2, 10, 7, 10, 2, 7, 0, 2, 0, 2, 0, 10, 0, 1, 0, 0, 2, 0, 0, 7, 1, 2, 0, 11, 7, 0, 0, 2, 2, 3, 2, 7, 0, 7, 2, 3, 7, 7, 2, 0, 0, 2, 2, 2, 9, 2, 0, 0, 0, 0, 2, 0, 7, 7, 0, 2, 0, 7, 10, 7, 2, 7, 0, 11, 0, 2, 2, 7, 2, 3, 3, 10, 1, 1, 1, 2, 2, 0, 0, 2, 0, 7, 3, 2, 0, 10, 0, 2, 3, 1, 2, 2, 11, 10, 0, 0, 7, 7, 0, 2, 7, 6, 1, 2, 0, 9, 5, 7, 2, 7, 1, 0, 0, 2, 0, 3, 0, 2, 3, 2, 7, 2, 1, 0, 2, 0, 3, 2, 10, 2, 7, 0, 11, 2, 0, 1, 2]
# acts4=[5, 3, 2, 1, 2, 2, 2, 0, 2, 3, 3, 0, 2, 2, 2, 0, 7, 0, 7, 7, 10, 1, 0, 0, 2, 0, 7, 7, 7, 3, 7, 7, 0, 7, 10, 10, 7, 3, 0, 0, 1, 7, 7, 1, 2, 2, 2, 1, 0, 7, 2, 2, 7, 10, 2, 1, 0, 7, 7, 2, 7, 2, 2, 7, 3, 2, 3, 7, 2, 3, 0, 2, 3, 7, 10, 3, 1, 1, 2, 7, 2, 2, 0, 3, 3, 2, 2, 2, 5, 2, 7, 0, 2, 7, 1, 2, 0, 2, 0, 2, 2, 10, 10, 2, 2, 3, 3, 10, 7, 2, 1, 3, 10, 2, 2, 0, 9, 0, 3, 7, 2, 0, 2, 2, 0, 2, 2, 1, 2, 2, 2, 1, 0, 7, 3, 0, 3, 1, 2, 3, 2, 2, 2, 2, 10, 5, 7, 7, 7, 2, 3, 3, 2, 0, 0, 2, 3, 2, 2, 11, 2, 2, 1, 7, 1, 1, 3, 1, 3, 2, 2, 2, 3, 7, 7, 3, 2, 2, 3, 0, 2, 7, 2, 0, 2, 10, 3, 3, 0, 2, 10, 7, 0, 3, 2, 7, 2, 7, 2, 7, 2, 1, 2, 3, 2, 2, 2, 2, 2, 3, 2, 2, 3, 2, 2, 7, 3, 2, 7, 7, 3, 0, 11, 7, 5, 2, 7, 2, 2, 2, 7, 7, 2, 3, 2, 7, 1, 2, 7, 2, 9, 2, 2, 2, 3, 1, 7, 7, 3, 2, 10, 7, 2, 2, 2, 2, 2, 7, 2, 7, 0, 0, 2, 2, 2, 2, 3, 2, 2, 3, 2, 0, 2, 0, 2, 7, 7, 10, 3, 0, 2, 2, 9, 2, 2, 7, 7, 3, 7, 0, 2, 2, 2, 7, 2, 3, 7, 0, 2, 7, 2, 2, 7, 0, 2, 2, 10, 2, 7, 2, 0, 3, 7, 2, 3, 3, 0, 0, 7, 2, 3, 3, 7, 2, 2, 0, 2, 7, 2, 0, 3, 0, 7, 2, 3, 2, 2, 3, 2, 2, 3, 7, 2, 2, 7, 2, 7, 10, 7, 7, 7, 7, 10, 7, 2, 7, 0, 2, 2, 2, 2, 2, 1, 2, 2, 2, 7, 7, 7, 7, 2, 7, 2, 3, 0, 2, 0, 7, 2, 7, 3, 3, 2, 7, 10, 7, 0, 0, 2, 3, 2, 7, 1, 0, 2, 7, 2, 1, 1, 6, 2, 3, 7, 3, 0, 2, 2, 2, 2, 7, 7, 2, 2, 3, 3, 0, 3, 3, 3, 3, 0, 3, 0, 3, 2, 0, 2, 7, 0, 3, 2, 7, 2, 2, 7, 10, 7, 2, 3, 7, 7, 2, 3, 1, 2, 0, 0, 7, 3, 2, 3, 2, 1, 2, 2, 0, 3, 0, 1, 7, 7, 1, 3, 2, 2, 2, 7, 0, 2, 7, 2, 1, 7, 10, 0, 3, 2, 7, 7, 7, 7, 2, 2, 7, 5, 3, 7, 0, 2, 2, 7, 2, 7, 7, 3, 10, 3, 0, 10, 7]
# acts5=[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 11, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 0, 2, 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 10, 7, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 10]
# acts6.0=[0, 0, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 7, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 7, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2]
# acts=[2, 2, 2, 2, 2, 2, 2, 2, 1, 7, 3, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 11, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 7, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 7, 2, 2, 2, 2, 2, 0, 2, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 0, 2, 0, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 0, 4, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 2, 2, 9, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 9, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 7, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 7, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 11, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 2]
# acts=[0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 7, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 3, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 0, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 7, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 0, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 9, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 7, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 7, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 3, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2]
# acts=[2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 0, 0, 0, 9, 0, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 0, 0, 0, 2, 2, 0, 2, 2, 3, 2, 0, 2, 0, 2, 2, 0, 2, 2, 0, 7, 2, 2, 2, 7, 2, 2, 1, 0, 0, 2, 2, 0, 2, 0, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 0, 7, 2, 2, 2, 2, 2, 2, 2, 7, 2, 2, 0, 2, 10, 2, 2, 7, 0, 2, 3, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 9, 0, 2, 2, 2, 2, 2, 2, 0, 7, 0, 2, 2, 2, 0, 0, 0, 0, 2, 2, 0, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 2, 0, 10, 0, 2, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 0, 2, 0, 2, 2, 2, 0, 0, 2, 8, 2, 0, 3, 2, 2, 0, 2, 2, 0, 2, 0, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 7, 0, 2, 2, 0, 0, 0, 2, 1, 2, 2, 0, 2, 2, 0, 0, 2, 2, 2, 3, 0, 0, 2, 2, 0, 7, 0, 0, 0, 0, 2, 2, 1, 2, 0, 2, 0, 0, 2, 0, 0, 0, 2, 2, 9, 0, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 0, 0, 0, 2, 2, 0, 2, 2, 2, 9, 2, 0, 2, 2, 0, 7, 0, 2, 0, 2, 2, 0, 2, 2, 2, 2, 0, 0, 3, 0, 1, 0, 0, 0, 2, 0, 0, 0, 2, 2, 2, 0, 2, 0, 0, 2, 2, 3, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 1, 7, 2, 0, 2, 2, 2, 2, 0, 0, 2, 0, 2, 2, 0, 10, 2, 2, 2, 0, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 0, 2, 0, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 0, 2, 0, 2, 2, 2, 0, 0, 0, 2, 2, 3, 2, 0, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 0, 7, 0, 2, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2, 0, 0, 7, 2, 0, 0, 2, 0, 0, 0, 2, 3, 0, 2, 2, 2, 0, 7, 2, 2, 0, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 0, 2, 0, 0, 2, 0, 0, 2, 0, 2, 2, 2, 0, 9, 2, 0, 0, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 2, 3, 2, 2, 0, 2, 2, 1, 2, 0, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 7, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 0, 2, 3, 7, 0, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 0, 0, 2, 3, 2, 2, 7, 7, 0, 10, 2, 0, 2, 7, 2, 0, 0, 2, 2, 0, 2, 2, 0, 0, 7, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 0, 2, 10, 0, 0, 0, 2, 2, 2, 1, 0, 2, 0, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 0, 7, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 0, 1, 9, 0, 7, 2, 0, 0, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 0, 2, 2, 0, 2, 0, 2, 2, 2, 2, 0, 10, 2, 2, 0, 0, 7, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0, 7, 2, 2, 2, 0, 0, 0, 0, 2, 0, 2, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 7, 2, 2, 0, 0, 0, 0, 2, 2, 2, 9, 3, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 0, 2, 2, 2, 2, 2, 3, 7, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 0, 0, 2, 7, 2, 2, 2, 2, 2, 0, 0, 0, 7, 0, 2, 0, 3, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 7, 2, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 0, 2, 7, 0, 1, 2, 7, 2, 2, 2, 2, 2, 2, 1, 0, 2, 0, 0, 0, 2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 3, 0, 0, 2, 2, 0, 7, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 0, 2, 0, 2, 2, 0, 0, 0, 2, 2, 0, 2, 7, 2, 2, 2, 0, 2, 2, 0, 2, 7, 2, 2, 2, 2, 3, 2, 0, 0, 2, 2, 0, 2, 2, 0, 2, 0, 2, 0, 2, 2, 2, 2, 2, 0, 0, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 1, 2, 0, 7, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2, 0, 2]

while True:
    state = torch.from_numpy(state.copy()).type(torch.float)
    value, logit, (hx, cx) = model((state.unsqueeze(0), (hx, cx)), icm = False)
    prob = F.softmax(logit, dim=1) #from train
    action = prob.multinomial(1).data
    state, reward, done, _ = env.step(action.item())
    # try:
    #     action=int(acts[x])
    # except:
    #     action = 10
    # # print("action",action)
    # # action = env.action_space.sample()
    # state, reward, done, info = env.step(action)
    x+=1
    if done: break
env.play()
print(x)



Imageio: 'ffmpeg-linux64-v3.3.1' was not found on your computer; downloading it now.
Try 1. Download from https://github.com/imageio/imageio-binaries/raw/master/ffmpeg/ffmpeg-linux64-v3.3.1 (43.8 MB)
Downloading: 8192/45929032 bytes (0.0%)3014656/45929032 bytes (6.6%)6758400/45929032 bytes (14.7%)10526720/45929032 bytes (22.9%)14598144/45929032 bytes (31.8%)18677760/45929032 bytes (40.7%)22306816/45929032 bytes (48.6%)26411008/45929032 bytes (57.5%)30482432/45929032 bytes (66.4%)34521088/45929032 bytes (75.2%)38666240/45929032 bytes (84.2%)42696704/45929032 bytes (93.0%)45929032/45929032 bytes (100.0%)
  Done
File saved as /root

In [None]:
# https://stackoverflow.com/questions/57377185/how-play-mp4-video-in-google-colab
from IPython.display import HTML
from base64 import b64encode
mp4_path='/content/video/1656241889.873849.mp4'
# mp4 = open('video.mp4','rb').read()
mp4 = open(mp4_path,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

In [None]:
# @title video base

import gym
from colabgymrender.recorder import Recorder
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT, COMPLEX_MOVEMENT

# env = gym.make("MontezumaRevengeDeterministic-v4")
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, COMPLEX_MOVEMENT)
env = Recorder(env, './video')
state = env.reset()

while True:
    action = env.action_space.sample()
    state, reward, done, info = env.step(action)

    if done: break #can only break when done, else error
env.play()



In [None]:
# @title og video
!pip install colabgymrender
import gym
from colabgymrender.recorder import Recorder

env = gym.make("MontezumaRevengeDeterministic-v4")
env = Recorder(env, './video')
observation = env.reset()
terminal= False
while not done:
    action =env.action_space.sample()
    observation, reward, done, info = env.step(action)
env.play()

