<a href="https://colab.research.google.com/github/eisbetterthanpi/pytorch/blob/main/curiousity_perceiverio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### setup

In [1]:
# # https://github.com/kimhc6028/pytorch-noreward-rl
# https://stackoverflow.com/questions/67808779/running-gym-atari-in-google-colab
%pip install -U gym
%pip install -U gym[atari,accept-rom-license]
# !pip install gym[box2d]
import gym

!pip install gym-super-mario-bros nes-py
# https://github.com/Kautenja/gym-super-mario-bros
from nes_py.wrappers import JoypadSpace
import gym_super_mario_bros
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT, COMPLEX_MOVEMENT
# env = gym_super_mario_bros.make('SuperMarioBros-v0')
# env = JoypadSpace(env, SIMPLE_MOVEMENT)

!pip install colabgymrender
!pip install perceiver-pytorch

import torch
device = "cuda" if torch.cuda.is_available() else "cpu"

log=False
# !pip install wandb
# import wandb
# wandb.login() # 
# wandb.init(project="curiousity_simple", entity="bobdole")
# log=True

!pip install einops
from math import pi, log
from functools import wraps
import torch
from torch import nn, einsum
import torch.nn.functional as F
from einops import rearrange, repeat



Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gym
  Downloading gym-0.24.1.tar.gz (696 kB)
[K     |████████████████████████████████| 696 kB 34.7 MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting gym-notices>=0.0.4
  Downloading gym_notices-0.0.7-py3-none-any.whl (2.7 kB)
Building wheels for collected packages: gym
  Building wheel for gym (PEP 517) ... [?25l[?25hdone
  Created wheel for gym: filename=gym-0.24.1-py3-none-any.whl size=793152 sha256=2d99fbf6769bc517a56006634c11695bc793a818e5241826f7116f1738198a3c
  Stored in directory: /root/.cache/pip/wheels/18/0e/54/63d9f3d16ddf0fec1622e90d28140df5e6016bcf8ea920037d
Successfully built gym
Installing collected packages: gym-notices, gym
  Attempting uninstall: gym
    Found existing installation: gym 0.17.3
    Uninstalling gym-0.17.3:
   

# functions

#### gym wrappers

In [None]:

import gym
class SparseEnv(gym.Wrapper): #https://alexandervandekleut.github.io/gym-wrappers/
    def __init__(self, env):
        super().__init__(env)
        self.env = env
        self.total_rewards = 0
    def step(self, action):
        observation, reward, done, info = self.env.step(action)
        self.total_rewards += reward
        if done: return observation, self.total_rewards, done, info
        else:
            self.total_rewards = 0
            return observation, 0, done, info
    def reset(self):
        self.total_rewards = 0
        return self.env.reset()
# env = SparseEnv(gym.make("LunarLander-v2"))

class MarioSparse(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        self.env = env
        self.total_score = 0
    def step(self, action):
        observation, reward, done, info = self.env.step(action)
        life = info['life']
        score = info['score']
        self.total_score += score
        # print("MarioSparse",life,score)
        # if done: return observation, self.total_rewards, done, info
        if life<2:
            print("MarioSparse: died")
            return observation, score, True, info # lost one life, end env
        else:
            # self.total_score = 0
            return observation, score, False, info
    def reset(self):
        # self.total_score = 0
        return self.env.reset()
# env = MarioSparse(env)

class MarioEarlyStop(gym.Wrapper):
    def __init__(self, env):
        super().__init__(env)
        self.env = env
        self.max_pos = 0
        self.count_step = 0
    def step(self, action):
        observation, reward, done, info = self.env.step(action)
        x_pos = info['x_pos']
        if x_pos <= self.max_pos: self.count_step += 1
        else:
            self.max_pos = x_pos
            self.count_step = 0
        if self.count_step > 30:
            print("MarioEarlyStop: early stop ", self.max_pos)
            return observation, reward, True, info # early stop
        else:
            return observation, reward, False, info
    def reset(self):
        self.max_pos = 0
        self.count_step = 0
        return self.env.reset()
# env = MarioEarlyStop(env)


#### helpers

In [2]:
# helpers
def exists(val):
    return val is not None

def default(val, d):
    return val if exists(val) else d

def cache_fn(f):
    cache = None
    # @wraps(f)
    def cached_fn(*args, _cache = True, **kwargs):
        if not _cache:
            return f(*args, **kwargs)
        nonlocal cache
        if cache is not None:
            return cache
        cache = f(*args, **kwargs)
        return cache
    return cached_fn

# helper classes
class PreNorm(nn.Module):
    def __init__(self, dim, fn, context_dim = None):
        super().__init__()
        self.fn = fn
        self.norm = nn.LayerNorm(dim)
        self.norm_context = nn.LayerNorm(context_dim) if exists(context_dim) else None

    def forward(self, x, **kwargs):
        x = self.norm(x)
        if exists(self.norm_context):
            context = kwargs['context']
            normed_context = self.norm_context(context)
            kwargs.update(context = normed_context)
        return self.fn(x, **kwargs)

class GEGLU(nn.Module):
    def forward(self, x):
        x, gates = x.chunk(2, dim = -1)
        return x * F.gelu(gates)

class FeedForward(nn.Module):
    def __init__(self, dim, mult = 4):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, dim * mult * 2),
            GEGLU(),
            nn.Linear(dim * mult, dim)
        )

    def forward(self, x):
        return self.net(x)

class Attention(nn.Module):
    def __init__(self, query_dim, context_dim = None, heads = 8, dim_head = 64):
        super().__init__()
        inner_dim = dim_head * heads
        context_dim = default(context_dim, query_dim)
        self.scale = dim_head ** -0.5
        self.heads = heads
        self.to_q = nn.Linear(query_dim, inner_dim, bias = False)
        self.to_kv = nn.Linear(context_dim, inner_dim * 2, bias = False)
        self.to_out = nn.Linear(inner_dim, query_dim)

    def forward(self, x, context = None, mask = None):
        h = self.heads
        q = self.to_q(x)
        context = default(context, x)
        k, v = self.to_kv(context).chunk(2, dim = -1)
        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h = h), (q, k, v))
        sim = einsum('b i d, b j d -> b i j', q, k) * self.scale
        if exists(mask):
            mask = rearrange(mask, 'b ... -> b (...)')
            max_neg_value = -torch.finfo(sim.dtype).max
            mask = repeat(mask, 'b j -> (b h) () j', h = h)
            sim.masked_fill_(~mask, max_neg_value)
        attn = sim.softmax(dim = -1)
        out = einsum('b i j, b j d -> b i d', attn, v)
        out = rearrange(out, '(b h) n d -> b n (h d)', h = h)
        return self.to_out(out)



#### PerceiverIO

In [3]:
# PerceiverIO class save
class PerceiverIO(nn.Module):
    def __init__(
        self,
        *,
        depth,
        dim,
        queries_dim,
        logits_dim = None,
        num_latents = 512,
        latent_dim = 512,
        cross_heads = 1,
        latent_heads = 8,
        cross_dim_head = 64,
        latent_dim_head = 64,
        weight_tie_layers = False,
        decoder_ff = False
    ):
        super().__init__()
        self.latents = nn.Parameter(torch.randn(num_latents, latent_dim))
        self.cross_attend_blocks = nn.ModuleList([
            PreNorm(latent_dim, Attention(latent_dim, dim, heads = cross_heads, dim_head = cross_dim_head), context_dim = dim),
            PreNorm(latent_dim, FeedForward(latent_dim))
        ])
        get_latent_attn = lambda: PreNorm(latent_dim, Attention(latent_dim, heads = latent_heads, dim_head = latent_dim_head))
        get_latent_ff = lambda: PreNorm(latent_dim, FeedForward(latent_dim))
        get_latent_attn, get_latent_ff = map(cache_fn, (get_latent_attn, get_latent_ff))
        self.layers = nn.ModuleList([])
        cache_args = {'_cache': weight_tie_layers}
        for i in range(depth):
            self.layers.append(nn.ModuleList([get_latent_attn(**cache_args), get_latent_ff(**cache_args)]))
        self.decoder_cross_attn = PreNorm(queries_dim, Attention(queries_dim, latent_dim, heads = cross_heads, dim_head = cross_dim_head), context_dim = latent_dim)
        self.decoder_ff = PreNorm(queries_dim, FeedForward(queries_dim)) if decoder_ff else None
        self.to_logits = nn.Linear(queries_dim, logits_dim) if exists(logits_dim) else nn.Identity()

    def forward(self, data, mask = None, queries = None):
        b, *_, device = *data.shape, data.device
        x = repeat(self.latents, 'n d -> b n d', b = b)
        cross_attn, cross_ff = self.cross_attend_blocks
        # cross attention only happens once for Perceiver IO
        x = cross_attn(x, context = data, mask = mask) + x
        x = cross_ff(x) + x
        # layers
        for self_attn, self_ff in self.layers:
            x = self_attn(x) + x
            x = self_ff(x) + x
        if not exists(queries):
            return x
        # make sure queries contains batch dimension
        if queries.ndim == 2:
            queries = repeat(queries, 'n d -> b n d', b = b)
        # cross attend from decoder queries to latents
        latents = self.decoder_cross_attn(queries, context = x)
        if exists(self.decoder_ff):
            latents = latents + self.decoder_ff(latents)
        return self.to_logits(latents)

def preprocess(X):
    if X.dim()==1:
        X=X.unsqueeze(dim=0)
    X=X.flatten(start_dim=1, end_dim=-1) #(start_dim=1)
    X=X.unsqueeze(dim=1)
    return X

def postprocess(logits):
    if logits.dim()==3:
        logits=logits.squeeze(dim=1)
    return logits


#### PerceiverIOrnn

In [4]:

class PerceiverIOrnn(nn.Module):
    def __init__(
        self,
        *,
        depth,
        dim,
        queries_dim,
        logits_dim = None,
        num_latents = 512,
        latent_dim = 512,
        cross_heads = 1,
        latent_heads = 8,
        cross_dim_head = 64,
        latent_dim_head = 64,
        weight_tie_layers = False,
        decoder_ff = False
    ):
        super().__init__()
        # self.latents = nn.Parameter(torch.randn(num_latents, latent_dim))
        self.latents = torch.zeros(num_latents, latent_dim)
        self.cross_attend_blocks = nn.ModuleList([
            PreNorm(latent_dim, Attention(latent_dim, dim, heads = cross_heads, dim_head = cross_dim_head), context_dim = dim),
            PreNorm(latent_dim, FeedForward(latent_dim))
        ])
        get_latent_attn = lambda: PreNorm(latent_dim, Attention(latent_dim, heads = latent_heads, dim_head = latent_dim_head))
        get_latent_ff = lambda: PreNorm(latent_dim, FeedForward(latent_dim))
        get_latent_attn, get_latent_ff = map(cache_fn, (get_latent_attn, get_latent_ff))
        self.layers = nn.ModuleList([])
        cache_args = {'_cache': weight_tie_layers}
        for i in range(depth):
            self.layers.append(nn.ModuleList([get_latent_attn(**cache_args), get_latent_ff(**cache_args)]))
        self.decoder_cross_attn = PreNorm(queries_dim, Attention(queries_dim, latent_dim, heads = cross_heads, dim_head = cross_dim_head), context_dim = latent_dim)
        self.decoder_ff = PreNorm(queries_dim, FeedForward(queries_dim)) if decoder_ff else None
        self.to_logits = nn.Linear(queries_dim, logits_dim) if exists(logits_dim) else nn.Identity()

    def forward(self, data, mask = None, queries = None, x = None):
        b, *_, device = *data.shape, data.device
        if x == None: x = repeat(self.latents, 'n d -> b n d', b = b).to(device)
        cross_attn, cross_ff = self.cross_attend_blocks
        # cross attention only happens once for Perceiver IO
        x = cross_attn(x, context = data, mask = mask) + x
        x = cross_ff(x) + x
        # layers
        for self_attn, self_ff in self.layers:
            x = self_attn(x) + x
            x = self_ff(x) + x
        if not exists(queries):
            return x
        # make sure queries contains batch dimension
        if queries.ndim == 2:
            queries = repeat(queries, 'n d -> b n d', b = b)
        # cross attend from decoder queries to latents
        latents = self.decoder_cross_attn(queries, context = x)
        if exists(self.decoder_ff):
            latents = latents + self.decoder_ff(latents)
        # return self.to_logits(latents)
        return x, self.to_logits(latents)


#### cnn

In [5]:

class Conv(nn.Module):
    # def __init__(self):
    def __init__(self, in_channels=1):
        super(Conv, self).__init__()
        self.conv = nn.Sequential(
            # nn.Conv2d(in_channels, out_channels=1, kernel_size=3, stride=2, padding=1),
            # nn.Conv2d(in_channels, 1, 5, stride=2, padding=3),
            nn.Conv2d(in_channels, 1, 5, stride=1, padding=3),
            nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            nn.MaxPool2d(3, stride=2, padding=0),
            # nn.MaxPool2d(5, stride=2, padding=1),
        )
    def forward(self, x): # in [4, 3, 224, 224]
        x = self.conv(x)
        # x = x.view(-1, 16 * 5 * 5)
        return x # out [4, 1, 56, 56]

class Conv_Encoder(nn.Module):
    # def __init__(self):
    def __init__(self, in_channels=1):
        super(Conv_Encoder, self).__init__()
        self.conv_encoder = nn.Sequential( # embed pi (240, 256, 3) -> 256 when flattened
            nn.Conv2d(in_channels, 8, 3, stride=2, padding=1), nn.ELU(),
            # nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(8, 16, 5, stride=2, padding=2), nn.ELU(),
            nn.AdaptiveAvgPool2d((64,64)),
            nn.Conv2d(16, 8, 7, stride=2, padding=3), nn.ELU(),
            nn.Conv2d(8, 1, 5, stride=2, padding=2), nn.ELU(),
            # # nn.Conv2d(in_channels, out_channels=1, kernel_size=3, stride=2, padding=1),
            # nn.ReLU(),
            )
    def forward(self, x): # in [4, 3, 224, 224]
        x = self.conv_encoder(x)
        # x = x.view(-1, 16 * 5 * 5)
        return x # out [4, 1, 56, 56]


#### perceiverio + fourier

In [6]:

from einops import rearrange, repeat
import numpy as np
num_freq_bands = 6 # num_bands = 4
max_freq = 10
# freq_base = 2,


input_axis = 2 # 2 for images, 3 for video
input_channels = 3
# fourier_encode_data True
fourier_channels = (input_axis * ((num_freq_bands * 2) + 1)) # 26
input_dim = fourier_channels + input_channels # 29
# print("input_dim",input_dim)


def fourier_encode(x, max_freq = 10, num_bands = 6):
    x = x.unsqueeze(-1)
    device, dtype, orig_x = x.device, x.dtype, x
    scales = torch.linspace(1., max_freq / 2, num_bands, device = device, dtype = dtype)
    scales = scales[(*((None,) * (len(x.shape) - 1)), Ellipsis)]
    x = x * scales * np.pi
    x = torch.cat([x.sin(), x.cos()], dim = -1)
    x = torch.cat((x, orig_x), dim = -1)
    return x

def fourier(data): # https://github.com/lucidrains/perceiver-pytorch/blob/main/perceiver_pytorch/perceiver_pytorch.py
    b, *axis, _, device, dtype = *data.shape, data.device, data.dtype
    axis_pos = list(map(lambda size: torch.linspace(-1., 1., steps=size, device=device, dtype=dtype), axis))
    pos = torch.stack(torch.meshgrid(*axis_pos, indexing = 'ij'), dim = -1) # [32, 32, 2]
    # print("fourier pos",pos.shape)
    enc_pos = fourier_encode(pos, max_freq, num_freq_bands) # [32, 32, 2, 13]
    # print("fourier fourier_encode",enc_pos.shape)
    enc_pos = rearrange(enc_pos, '... n d -> ... (n d)') # [32, 32, 26]
    # print("fourier enc_pos rearrange",enc_pos.shape)
    enc_pos = repeat(enc_pos, '... -> b ...', b = b) # [4, 32, 32, 26]
    # print("fourier enc_pos",enc_pos.shape)
    data = torch.cat((data, enc_pos), dim = -1) # [4, 32, 32, 29]
    # print("fourier cat",data.shape)
    data = rearrange(data, 'b ... d -> b (...) d') # [4, 1024, 29]
    # print("fourier rearrange",data.shape)
    return data




In [7]:

# 224 -> CNN -> 32
# images = torch.randn(4, 3, 32, 32, device=device) # batch, rgb, dim_x, dim_y
# seq = np.transpose(images, (0, 2, 3, 1)) # [4, 32, 32, 3] batch, dim_x, dim_y, rgb

images = torch.randn(240, 256, 3, device=device) # dim_x, dim_y, rgb
seq = fourier(images) # [4, 1024, 29] [240, 256, 16]
print(seq.shape)
# batch,_,h,w= seq.shape
# batch,axis,input_dim=seq.shape # [240, 256, 16]
h,w,input_dim=seq.shape # [240, 256, 16]

model = PerceiverIO(
    dim = h*w*input_dim,         # 32*32 dimension of sequence to be encoded
    queries_dim = 10,            # dimension of decoder queries
    logits_dim = None,           # dimension of final logits
    depth = 6,                   # depth of net
    num_latents = 128,           # number of latents, or induced set points, or centroids. different papers giving it different names
    latent_dim = 128,            # latent dimension
    cross_heads = 1,             # number of heads for cross attention. paper said 1
    latent_heads = 8,            # number of heads for latent self attention, 8
    cross_dim_head = 64,         # number of dimensions per cross attention head
    latent_dim_head = 64,        # number of dimensions per latent self attention head
    weight_tie_layers = False    # whether to weight tie layers (optional, as indicated in the diagram)
).to(device)

seq = seq.flatten()
seq = preprocess(seq) # [4, 1, 29696]
# print(seq.shape)
queries = torch.randn(1, 10, device=device)
logits = model(seq, queries = queries)
pred = postprocess(logits)
# print(pred.shape)
# print(pred)
pred_probab = nn.Softmax(dim=1)(pred)
outputs = pred_probab

y_pred = pred_probab.argmax(1)
print(y_pred)


torch.Size([240, 256, 16])
tensor([3], device='cuda:0')


# models

#### model simplier

In [16]:
# model.py
# https://github.com/kimhc6028/pytorch-noreward-rl/blob/master/model.py
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

class ActorCritic(torch.nn.Module):
    def __init__(self, in_shape, action_space):
        super(ActorCritic, self).__init__()
        self.in_dim = in_shape # mario (240, 256)
        self.conv = nn.Sequential( # embed pi
            nn.Conv2d(in_shape[0], 32, 3, stride=2, padding=1), nn.ELU(),
            nn.Conv2d(32, 32, 3, stride=2, padding=1), nn.ELU(),
            nn.Conv2d(32, 32, 3, stride=2, padding=1), nn.ELU(),
            nn.Conv2d(32, 32, 3, stride=2, padding=1), nn.ELU(),
            nn.Conv2d(32, 32, 3, stride=2, padding=1), nn.ELU(), # added for RuntimeError: Input batch size 2 doesn't match hidden0 batch size 1
            )
        self.lstm = nn.LSTMCell(in_shape[1], 256)
        num_outputs = action_space.n
        self.critic_linear = nn.Linear(256, 1) # -> value
        self.actor_linear = nn.Linear(256, num_outputs) # -> action

        self.inv_lstm = nn.LSTMCell(in_shape[1], 256)
        self.fwd_lstm = nn.LSTMCell(in_shape[1], 256)
        self.inv_linear = nn.Sequential( # inv learning, predict at
            nn.Linear(in_shape[1] + in_shape[1], 256), nn.ReLU(),
            nn.Linear(256, num_outputs), nn.Softmax()
            ) # cat(phi(st), phi(st+1)) -> athat
        self.fwd_linear = nn.Sequential( # predict phi st+1
            nn.Linear(in_shape[1] + num_outputs, 256), nn.ReLU(),
            nn.Linear(256, in_shape[1])
            ) # cat(phi(st), at) -> phihat(st+1)

    def forward(self, inputs, icm):
        if icm == False: #A3C
            st, (a3c_hx, a3c_cx) = inputs # [1, 210, 160, 3], ([1, 256], [1, 256])
            vec_st = self.conv(st).view(-1, self.in_dim[1])
            a3c_hx1, a3c_cx1 = self.lstm(vec_st, (a3c_hx, a3c_cx))
            critic = self.critic_linear(a3c_hx1)
            actor = self.actor_linear(a3c_hx1)
            # print("forward A3C ",critic.shape, actor.shape, a3c_hx.shape, a3c_cx.shape)
            return critic, actor, (a3c_hx1, a3c_cx1) # [1, 1], [1, 18], ([1, 256], [1, 256])

        else: #icm
            (inv_hx, inv_cx), (fwd_hx, fwd_cx), st1, at = inputs
            vec_st1 = self.conv(st1).view(-1, self.in_dim[1])
            inv_hx1, inv_cx1 = self.inv_lstm(vec_st1, (icm_hx, icm_cx)) # inv model
            fwd_hx1, fwd_cx1 = self.fwd_lstm(vec_st1, (icm_hx, icm_cx)) # world model

            inv_vec = torch.cat((icm_hx, vec_st1), 1) # predict at
            fwd_vec = torch.cat((icm_hx, at), 1) # predict vec_st1
            inverse = self.inv_linear(inv_vec)
            forward = self.fwd_linear(fwd_vec)
            # print("forward icm ",vec_st1.shape, inverse.shape, forward.shape)
            return vec_st1, inverse, forward, (inv_hx1, inv_cx1), (fwd_hx1, fwd_cx1) # [1, 320], [1, 18], [1, 320], ()


#### curiousity perceiverio

In [None]:

class ActorCritic(torch.nn.Module):
    def __init__(self, in_shape, action_space):
        super(ActorCritic, self).__init__()
        self.in_dim = in_shape # mario (240, 256, 3)
        self.conv = Conv(in_shape[2]).to(device) # embed pi
        # self.lstm = nn.LSTMCell(in_shape[1], 256)
        # print(in_shape[0]*in_shape[1]/4)
        self.encoder = PerceiverIO( # conv -> PerceiverIO ;encodes state for everyone
            dim = int(in_shape[0]*in_shape[1]/4),# dimension of sequence to be encoded
            queries_dim = 256,            # dimension of decoder queries
            logits_dim = None,            # dimension of final logits
            depth = 2,                   # depth of net
            num_latents = 16,           # number of latents, or induced set points, or centroids. different papers giving it different names
            latent_dim = 16,            # latent dimension
            cross_heads = 1,             # number of heads for cross attention. paper said 1
            latent_heads = 4,            # number of heads for latent self attention, 8
            cross_dim_head = 8,         # number of dimensions per cross attention head
            latent_dim_head = 8,        # number of dimensions per latent self attention head
            weight_tie_layers = False    # whether to weight tie layers (optional, as indicated in the diagram)
        ).to(device) # st(240*256) -zeros> phist(256)
        self.encoder_query = torch.zeros(1, 256, device=device)
        num_outputs = action_space.n
        self.lstm = PerceiverIOrnn( # latent + phist1(256) -zeros> latent1 + vec_st(256)
            dim = 256,                  # dimension of sequence to be encoded
            queries_dim = 256,            # dimension of decoder queries
            logits_dim = None,            # dimension of final logits
            depth = 2,                   # depth of net
            num_latents = 32,           # number of latents, or induced set points, or centroids. different papers giving it different names
            latent_dim = 32,            # latent dimension
            cross_heads = 1,             # number of heads for cross attention. paper said 1
            latent_heads = 4,            # number of heads for latent self attention, 8
            cross_dim_head = 8,         # number of dimensions per cross attention head
            latent_dim_head = 8,        # number of dimensions per latent self attention head
            weight_tie_layers = False    # whether to weight tie layers (optional, as indicated in the diagram)
        ).to(device) # st-> phist ; 240*256 -zeros> 256
        self.lstm_query = torch.zeros(1, 256, device=device)
        self.actor_linear = nn.Linear(256, num_outputs) # vec_st -> action
        self.critic_linear = nn.Linear(256, 1) # vec_st -> value

        self.inv_lstm = PerceiverIOrnn( # inverse model, predict taken action
            dim = 256,                   # dimension of sequence to be encoded
            queries_dim = num_outputs,   # dimension of decoder queries
            logits_dim = None,           # dimension of final logits
            depth = 1,                   # depth of net
            num_latents = 64,           # number of latents, or induced set points, or centroids. different papers giving it different names
            latent_dim = 64,            # latent dimension
            cross_heads = 1,             # number of heads for cross attention. paper said 1
            latent_heads = 4,            # number of heads for latent self attention, 8
            cross_dim_head = 16,         # number of dimensions per cross attention head
            latent_dim_head = 16,        # number of dimensions per latent self attention head
            weight_tie_layers = False    # whether to weight tie layers (optional, as indicated in the diagram)
        ).to(device) # inv_latent + phi(st+1) -zeros> at +inv_latent1
        self.inv_query = torch.zeros(1, num_outputs, device=device)
        self.fwd_lstm = PerceiverIOrnn( # world model
            dim = 256 + num_outputs,     # dimension of sequence to be encoded
            queries_dim = 256,           # dimension of decoder queries
            logits_dim = None,           # dimension of final logits
            depth = 1,                   # depth of net
            num_latents = 128,           # number of latents, or induced set points, or centroids. different papers giving it different names
            latent_dim = 128,            # latent dimension
            cross_heads = 1,             # number of heads for cross attention. paper said 1
            latent_heads = 8,            # number of heads for latent self attention, 8
            cross_dim_head = 32,         # number of dimensions per cross attention head
            latent_dim_head = 32,        # number of dimensions per latent self attention head
            weight_tie_layers = False    # whether to weight tie layers (optional, as indicated in the diagram)
        ).to(device) # fwd_latent + phi(st) cat at -zeros> phi(st1) +fwd_latent1
        self.fwd_query = torch.zeros(1, 256, device=device)

    def encode(self, st):
        st = torch.transpose(st, 1,2)
        st = torch.transpose(st, 0,1) # [3, 240, 256] rgb, dim_x, dim_y
        # vec_st = self.conv(st).view(-1, self.in_dim[1]) # [15, 256]
        cst = self.conv(st).flatten() # [120*128]
        cst = cst.view(1,1,-1) # [1, 1, 120*128]
        phist = self.encoder(cst, queries = self.encoder_query) # 
        return phist # 256

    def forward(self, inputs, icm):
        if icm == False: #A3C
            st, latent = inputs # [240, 256, 3]
            phist = self.encode(st)
            latent1, vec_st = self.lstm(phist, queries = self.lstm_query, x=latent) # 
            critic = self.critic_linear(vec_st)
            actor = self.actor_linear(vec_st)
            return critic[0], actor[0], latent1 # [1, 1], [1, 18], 
        else: #icm
            inv_latent, fwd_latent, st1, at = inputs
            phist = self.encode(st1)
            inv_latent1, inverse = self.inv_lstm(phist, queries = self.inv_query, x=inv_latent) # inv model; inv_latent + phi(st+1) -> at +inv_latent1
            fwd_latent1, forward = self.fwd_lstm(torch.cat((phist, at.unsqueeze(0)), -1), queries = self.fwd_query, x=fwd_latent) # world model; fwd_latent + at cat phi(st) -> phi(st1) +fwd_latent1
            inverse = nn.Softmax()(inverse[0])
            forward = nn.Softmax()(forward[0])
            # print("forward icm ",phist.shape, inverse.shape, forward.shape)
            # print("forward icm ",inverse, forward)
            return phist[0], inverse, forward, inv_latent1, fwd_latent1 # [1, 320], [1, 18], [1, 320], ()


#### alternate light

In [51]:

class ActorCritic(torch.nn.Module):
    def __init__(self, in_shape, action_space):
        super(ActorCritic, self).__init__()
        self.in_dim = in_shape # mario (240, 256, 3)
        self.conv = Conv(in_shape[2]).to(device) # embed pi
        phist_size= 512
        # self.lstm = nn.LSTMCell(in_shape[1], 256)
        # print(in_shape[0]*in_shape[1]/4)
        self.encoder = PerceiverIO( # conv -> PerceiverIO ;encodes state for everyone
            dim = int(in_shape[0]*in_shape[1]/4),# dimension of sequence to be encoded
            queries_dim = 256,            # dimension of decoder queries
            logits_dim = None,            # dimension of final logits
            depth = 2,                   # depth of net
            num_latents = 16,           # number of latents, or induced set points, or centroids. different papers giving it different names
            latent_dim = 16,            # latent dimension
            cross_heads = 1,             # number of heads for cross attention. paper said 1
            latent_heads = 4,            # number of heads for latent self attention, 8
            cross_dim_head = 8,         # number of dimensions per cross attention head
            latent_dim_head = 8,        # number of dimensions per latent self attention head
            weight_tie_layers = False    # whether to weight tie layers (optional, as indicated in the diagram)
        ).to(device) # st(240*256) -zeros> phist(256)
        self.encoder_query = torch.zeros(1, 256, device=device)
        
        self.conv_encoder = Conv_Encoder(in_shape[2]).to(device)
        num_outputs = action_space.n
        self.lstm = PerceiverIOrnn( # latent + phist1(256) -zeros> latent1 + vec_st(256)
            dim = 256,                  # dimension of sequence to be encoded
            queries_dim = 256,            # dimension of decoder queries
            logits_dim = None,            # dimension of final logits
            depth = 2,                   # depth of net
            num_latents = 32,           # number of latents, or induced set points, or centroids. different papers giving it different names
            latent_dim = 32,            # latent dimension
            cross_heads = 1,             # number of heads for cross attention. paper said 1
            latent_heads = 4,            # number of heads for latent self attention, 8
            cross_dim_head = 8,         # number of dimensions per cross attention head
            latent_dim_head = 8,        # number of dimensions per latent self attention head
            weight_tie_layers = False    # whether to weight tie layers (optional, as indicated in the diagram)
        ).to(device) # st-> phist ; 240*256 -zeros> 256
        self.lstm_query = torch.zeros(1, 256, device=device)
        self.lstmcell = nn.LSTMCell(256, phist_size)

        self.actor_linear = nn.Linear(phist_size, num_outputs) # vec_st -> action
        self.critic_linear = nn.Linear(phist_size, 1) # vec_st -> value

        self.inv_lstm = PerceiverIOrnn( # inverse model, predict taken action
            dim = 256,                   # dimension of sequence to be encoded
            queries_dim = num_outputs,   # dimension of decoder queries
            logits_dim = None,           # dimension of final logits
            depth = 1,                   # depth of net
            num_latents = 64,           # number of latents, or induced set points, or centroids. different papers giving it different names
            latent_dim = 64,            # latent dimension
            cross_heads = 1,             # number of heads for cross attention. paper said 1
            latent_heads = 4,            # number of heads for latent self attention, 8
            cross_dim_head = 16,         # number of dimensions per cross attention head
            latent_dim_head = 16,        # number of dimensions per latent self attention head
            weight_tie_layers = False    # whether to weight tie layers (optional, as indicated in the diagram)
        ).to(device) # inv_latent + phi(st+1) -zeros> at +inv_latent1
        self.inv_query = torch.zeros(1,1, num_outputs, device=device)
        self.fwd_lstm = PerceiverIOrnn( # world model
            dim = 256 + num_outputs,     # dimension of sequence to be encoded
            queries_dim = 256,           # dimension of decoder queries
            logits_dim = None,           # dimension of final logits
            depth = 1,                   # depth of net
            num_latents = 128,           # number of latents, or induced set points, or centroids. different papers giving it different names
            latent_dim = 128,            # latent dimension
            cross_heads = 1,             # number of heads for cross attention. paper said 1
            latent_heads = 8,            # number of heads for latent self attention, 8
            cross_dim_head = 32,         # number of dimensions per cross attention head
            latent_dim_head = 32,        # number of dimensions per latent self attention head
            weight_tie_layers = False    # whether to weight tie layers (optional, as indicated in the diagram)
        ).to(device) # fwd_latent + phi(st) cat at -zeros> phi(st1) +fwd_latent1
        self.fwd_query = torch.zeros(1, 256, device=device)

    def encode(self, st):
        st = torch.transpose(st, 1,2)
        st = torch.transpose(st, 0,1) # [3, 240, 256] rgb, dim_x, dim_y
        # vec_st = self.conv(st).view(-1, self.in_dim[1]) # [15, 256]
        cst = self.conv(st).flatten() # [120*128]
        cst = cst.view(1,1,-1) # [1, 1, 120*128]
        phist = self.encoder(cst, queries = self.encoder_query) # 
        return phist # 256

    def conv_encode(self, st):
        st = torch.transpose(st, 1,2)
        st = torch.transpose(st, 0,1) # [3, 240, 256] rgb, dim_x, dim_y
        phist = self.conv_encoder(st).flatten() # [256]
        # phist = phist.view(1,1,-1)
        phist = phist.view(1,-1)
        return phist # 256

    def forward(self, inputs, icm):
        if icm == False: #A3C
            # st, latent = inputs # [240, 256, 3]
            # phist = self.encode(st) # using perceiverio to encode
            # latent1, vec_st = self.lstm(phist, queries = self.lstm_query, x=latent)
            # vec_st, a3c_cx1 = self.lstmcell(phist, latent)

            st, (a3c_hx, a3c_cx) = inputs # [240, 256, 3]
            phist = self.conv_encode(st) # using cnn to encode
            # print(phist.shape, (a3c_hx.shape, a3c_cx.shape)) # [1, 1, 256] ([1, 512], [1, 512])
            vec_st, a3c_cx1 = self.lstmcell(phist, (a3c_hx, a3c_cx))

            critic = self.critic_linear(vec_st)
            actor = self.actor_linear(vec_st)
            # print(critic.shape,actor.shape)
            # return critic[0], actor[0], latent1 # for perceiverio encode [1, 1], [1, 18], 
            return critic, actor, (vec_st, a3c_cx1) # for cnn encode [1, 1], [1, 18], 
        else: #icm
            inv_latent, fwd_latent, st1, at = inputs
            # phist = self.encode(st1) # perceiverio
            phist = self.conv_encode(st1).unsqueeze(0) # cnn [1, 1, 256]
            # print(phist.shape, self.inv_query.shape) #) torch.Size([1, 12]
            inv_latent1, inverse = self.inv_lstm(phist, queries = self.inv_query, x=inv_latent) # inv model; inv_latent + phi(st+1) -> at +inv_latent1
            fwd_latent1, forward = self.fwd_lstm(torch.cat((phist, at.unsqueeze(0)), -1), queries = self.fwd_query, x=fwd_latent) # world model; fwd_latent + at cat phi(st) -> phi(st1) +fwd_latent1
            inverse = nn.Softmax()(inverse[0])
            forward = nn.Softmax()(forward[0])
            # print("forward icm ",phist.shape, inverse.shape, forward.shape)
            # print("forward icm ",inverse, forward)
            return phist[0], inverse, forward, inv_latent1, fwd_latent1 # [1, 320], [1, 18], [1, 320], ()


# wwwwwwwwwwwww

#### train

In [50]:
# train.py
# https://github.com/kimhc6028/pytorch-noreward-rl/blob/master/train.py
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import time

def train(env, args, model, optimizer=None):
    # torch.manual_seed(seed)
    # model = ActorCritic(env.observation_space.shape, env.action_space)
    if optimizer is None:
        optimizer = torch.optim.Adam(shared_model.parameters(), lr)
    model.train()
    for x in range(num_episodes):
        # model.load_state_dict(shared_model.state_dict()) # Sync with the shared model
        latent = None
        vec_st = torch.zeros(1, 256).to(device)
        a3c_hx = torch.zeros(1, 512).to(device)
        a3c_cx = torch.zeros(1, 512).to(device)
        inv_latent = None
        fwd_latent = None
        values = []
        log_probs = []
        rewards = []
        entropies = []
        inverses = []
        forwards = []
        actions = []
        vec_st1s = []
        episode_length = 0

        state = env.reset()
        # state=state[:,:,0]
        state = torch.from_numpy(state.copy()).type(torch.float).to(device) # i added, change from int to float
        st1 = state.float()
        # print("#####www####",state.dtype,hx.dtype)
        while True:
            episode_length += 1
            # value, logit, latent = model((state, latent), icm = False)
            value, logit, (a3c_hx, a3c_cx) = model((state, (a3c_hx, a3c_cx)), icm = False)
            # print(value.shape,logit.shape)

            prob = F.softmax(logit, dim=1)
            log_prob = F.log_softmax(logit, dim=1)
            entropy = -(log_prob * prob).sum(1)
            entropies.append(entropy.cpu())
            action = prob.multinomial(1).data
            log_prob = log_prob.gather(1, action)
            oh_action = torch.zeros(1, env.action_space.n)
            oh_action[0][action.item()] = 1.0
            at = oh_action
            actions.append(oh_action)
            state, reward, done, _ = env.step(action.item())
            state = torch.from_numpy(state.copy()).type(torch.float).to(device)
            # state=state[:,:,0]
            # print("reward",reward)
            done = done or episode_length >= max_episode_length
            # reward = max(min(reward, 1), -1) #why clip rewards?
            st = st1
            st1 = state.float()
            vec_st1, inverse, forward, inv_latent, fwd_latent = model((inv_latent, fwd_latent, st1, at.to(device)), icm = True)            
            reward_intrinsic = eta * ((vec_st1 - forward).pow(2)).sum(1) / 2.
            #reward_intrinsic = eta * ((vec_st1 - forward).pow(2)).sum(1).sqrt() / 2.
            # print("reward_intrinsic", reward_intrinsic)
            reward_intrinsic = reward_intrinsic.item()
            # print("ep ",x,", rwd ext: ", reward, " ,rwd int: ", reward_intrinsic.item())
            reward += reward_intrinsic
            values.append(value.cpu())
            log_probs.append(log_prob.cpu())
            rewards.append(reward)
            vec_st1s.append(vec_st1.cpu())
            inverses.append(inverse.cpu())
            forwards.append(forward.cpu())
            if done:
                print(episode_length)
                episode_length = 0
                break
        R = torch.zeros(1, 1)
        if not done:
            # value, _, _ = model((state, latent), icm = False)
            value, _, _ = model((state, (a3c_hx, a3c_cx)), icm = False)
            R = value.data
        values.append(R)
        policy_loss = 0
        value_loss = 0
        inverse_loss = 0
        forward_loss = 0
        gae = torch.zeros(1, 1)
        for i in reversed(range(len(rewards))):
            R = gamma * R + rewards[i]
            advantage = R - values[i]
            value_loss = value_loss + 0.5 * advantage.pow(2)
            # Generalized Advantage Estimataion
            # delta_t = rewards[i] + gamma * values[i + 1].data - values[i].data
            delta_t = torch.tensor(rewards[i]) + gamma * values[i + 1].data - values[i].data
            gae = gae * gamma * tau + delta_t
            policy_loss = policy_loss - log_probs[i] * gae - 0.01 * entropies[i]
            cross_entropy = - (actions[i] * torch.log(inverses[i] + 1e-15)).sum(1)
            inverse_loss = inverse_loss + cross_entropy
            forward_err = forwards[i] - vec_st1s[i]
            forward_loss = forward_loss + 0.5 * (forward_err.pow(2)).sum(1)
        optimizer.zero_grad()
        # print("invvvvv",inverse_loss , forward_loss)
        # ((1-beta) * inverse_loss + beta * forward_loss).backward(retain_variables=True)
        inv_loss = (1-beta) * inverse_loss + beta * forward_loss
        pol_loss = lmbda * (policy_loss + 0.5 * value_loss)
        (inv_loss + pol_loss).backward()
        # (inv_loss + 0*pol_loss).backward()
        # (((1-beta) * inverse_loss + beta * forward_loss) + lmbda * (policy_loss + 0.5 * value_loss)).backward()
        print(''.join([str(torch.argmax(a).item()) for a in actions]))
        print("inv_loss: ", inv_loss.item(), " ,pol_loss: ", pol_loss.item())
        # if log:
        #     wandb.log({"inv_loss": inv_loss.item(), "pol_loss": pol_loss.item()})
        torch.nn.utils.clip_grad_norm(model.parameters(), 40)
        optimizer.step()


#### test

In [52]:
# test.py
# https://github.com/kimhc6028/pytorch-noreward-rl/blob/master/test.py
import numpy as np
import torch
import torch.nn.functional as F
import time

def test(env, args, model):
    # torch.manual_seed(seed)
    # model = ActorCritic(env.observation_space.shape, env.action_space)
    # model.load_state_dict(shared_model.state_dict())
    model.eval()
    state = env.reset()
    state = torch.from_numpy(state.copy()).type(torch.float).to(device)
    reward_sum = 0
    start_time = time.time()
    actions = []
    episode_length = 0
    result = []
    latent = None
    a3c_hx = torch.zeros(1, 512).to(device)
    a3c_cx = torch.zeros(1, 512).to(device)
    while True:
        episode_length += 1
        # value, logit, latent = model((state, latent), icm = False)
        value, logit, (a3c_hx, a3c_cx) = model((state, (a3c_hx, a3c_cx)), icm = False)
        prob = F.softmax(logit, dim=1) #from train
        action = prob.multinomial(1).data
        state, reward, done, _ = env.step(action.item())
        state = torch.from_numpy(state.copy()).type(torch.float).to(device)

        done = done or episode_length >= max_episode_length
        # print("rwd ext: ", reward)
        reward_sum += reward
        actions.append(action[0])
        if done:
            end_time = time.time()
            print("Time {}, episode reward {}, episode length {}".format(
                time.strftime("%Hh %Mm %Ss", time.gmtime(end_time - start_time)), reward_sum, episode_length))
            result.append((reward_sum, end_time - start_time))
            torch.save(model.state_dict(), 'model.pth')
            # print(''.join([str(a.item()) for a in actions]))
            print([a.item() for a in actions])
            break


#### main

In [53]:
# main.py
# https://github.com/kimhc6028/pytorch-noreward-rl/blob/master/main.py
# import os, sys, cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import gym

lr=0.001
gamma=0.99
tau=1.00
seed=1
num_processes=4
num_steps=20
max_episode_length=500 # 10000
# env_name='PongDeterministic-v4'
# env_name='LunarLander-v2'
# env_name='MontezumaRevengeDeterministic-v4'
# env_name='MontezumaRevengeDeterministic-ram-v4'

no_shared=False
eta=0.01
beta=0.2
lmbda=0.1
outdir="output"
record='store_true'
num_episodes=10#100

torch.manual_seed(seed)
# env = gym.make(env_name)
# env = SparseEnv(env)
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, COMPLEX_MOVEMENT) # SIMPLE_MOVEMENT COMPLEX_MOVEMENT
env = MarioSparse(env)
env = MarioEarlyStop(env)
# query_environment("MountainCar-v0")

print(env.observation_space.shape, env.action_space) # (210, 160, 3) Discrete(18); mario complex (240, 256, 3) Discrete(12)

shared_model = ActorCritic(env.observation_space.shape, env.action_space).to(device)
# shared_model.share_memory()
if no_shared:
    optimizer = None
else:
    optimizer = torch.optim.Adam(shared_model.parameters(), lr=lr)
    # optimizer.share_memory()
args=None
# train(0, args, shared_model, optimizer)

# processes = []
# import torch.multiprocessing as mp
# p = mp.Process(target=test, args=(num_processes, args, shared_model))
# p.start()
# processes.append(p)
# for rank in range(0, num_processes):
#     p = mp.Process(target=train, args=(rank, args, shared_model, optimizer))
#     p.start()
#     processes.append(p)
# for p in processes:
#     p.join()



  f"The environment {id} is out of date. You should consider "


(240, 256, 3) Discrete(12)


  "We recommend you to use a symmetric and normalized Box action space (range=[-1, 1]) "


#### run

In [None]:
max_episode_length=1000 # 10000


In [None]:
# train(env, args, shared_model)

for x in range(20):
    train(env, args, shared_model, optimizer)
test(env, args, shared_model)


500
7790122261111133764654663646105018413103769461165341994745521121821013731007691154103855790130514311424621110295107101135351187081111191174999128059491112920628966611106369110111040040751026772100180612110661287394711615101485049210810111111102111052441001014000104980065107410511610611411119641010102115472411331993108537205911111116610816321329367938342401015971040164117344261073883110290024215929443101030659920100111034107197067105201036111767181114013876197109249911211797338836843310847796042944110210254110251101034117348692859040651100855612820651010116104741411777546411010
inv_loss:  999.70068359375  ,pol_loss:  -5.3031005859375
500
545837809244901092871017442677811343111189100415371144357310103231110628611171726111198104110245291111115324424259509099661072261045101010705633679107386107136101074010214311311105844831015511048261125895891109114171011312461010282188101611204878109113251782118089901766101047358811011128810471102091470117451831474103628111165619346388511493101152435

#### save

In [57]:

from google.colab import drive
drive.mount('/content/gdrive')
PATH="/content/gdrive/MyDrive/curious/" # for saving to google drive
name='model_mario_perceiverio_fwdinv.pth'
# PATH="/content/" # for saving on colab only
# name='model.pth'

model=shared_model
torch.save(model.state_dict(), PATH+name)

# model.load_state_dict(torch.load(PATH+name))
# shared_model=model


Mounted at /content/gdrive


#### video

In [58]:

import gym
from colabgymrender.recorder import Recorder
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT, COMPLEX_MOVEMENT

# # env = gym.make("MontezumaRevengeDeterministic-v4")
# env = SparseEnv(env)
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, COMPLEX_MOVEMENT) # SIMPLE_MOVEMENT COMPLEX_MOVEMENT
env = MarioSparse(env)
env = Recorder(env, './video')

state = env.reset()
# device='cpu'
# model = ActorCritic(env.observation_space.shape, env.action_space)#.to(device)
# model.load_state_dict(shared_model.state_dict())
# model.eval()
latent = None
# torch.manual_seed(6)
x=0

# acts=[8, 0, 3, 5, 8, 3, 8, 7, 3, 6, 11, 8, 9, 6, 3, 8, 7, 4, 4, 7, 6, 0, 5, 6, 10, 4, 8, 11, 4, 10, 7, 10, 8, 7, 7, 0, 3, 7, 7, 9, 4, 7, 0, 4, 10, 4, 5, 0, 6, 3, 7, 1, 10, 7, 8, 4, 4, 6, 7, 4, 2, 1, 3, 7, 10, 9, 7, 5, 1, 6, 2, 10, 0, 11, 7, 2, 6, 10, 10, 0, 9, 1, 9, 7, 5, 10, 8, 11, 11, 6, 1, 2, 5, 11, 7, 4, 9, 1, 8, 3, 7, 0, 1, 0, 4, 7, 2, 3, 2, 7, 7, 6, 6, 0, 9, 7, 7, 3, 11, 7, 0, 10, 4, 4, 3, 3, 6, 7, 11, 8, 1, 9, 2, 7, 11, 1, 9, 3, 1, 11, 0, 0, 6, 1, 5, 5, 6, 7, 5, 8, 2, 10, 8, 8, 4, 7, 3, 1, 10, 4, 10, 1, 10, 4, 8, 7, 0, 6, 6, 2, 7, 1, 4, 6, 6, 10, 11, 7, 0, 9, 0, 7, 5, 11, 11, 1, 0, 8, 8, 2, 0, 0, 9, 4, 0, 5, 6, 6, 10, 6, 10, 10, 9, 8, 10, 9, 1, 6, 7, 4, 7, 4, 9, 11, 7, 4, 3, 10, 5, 3, 7, 10, 8, 4, 1, 0, 2, 1, 11, 5, 10, 6, 9, 6, 5, 11, 5, 8, 0, 6, 11, 8, 6, 11, 7, 1, 7, 0, 8, 6, 9, 7, 9, 8, 6, 2, 7, 7, 4, 10, 9, 8, 11, 2, 4, 9, 2, 6, 10, 8, 8, 10, 10, 4, 7, 6, 0, 10, 10, 10, 0, 11, 8, 7, 7, 10, 3, 3, 10, 9, 6, 6, 7, 8, 3, 0, 0, 2, 0, 0, 9, 2, 2, 10, 4, 6, 5, 3, 8, 10, 10, 9, 3, 6, 3, 7, 6, 10, 10, 6, 10, 0, 1, 10, 1, 10, 0, 9, 3, 7, 4, 7, 3, 7, 8, 1, 10, 7, 10, 4, 6, 7, 10, 2, 9, 0, 1, 0, 7, 2, 1, 2, 11, 3, 9, 4, 6, 0, 7, 10, 10, 7, 0, 6, 7, 1, 2, 7, 10, 10, 9, 6, 8, 8, 10, 6, 4, 2, 0, 1, 7, 6, 4, 7, 3, 6, 2, 10, 7, 9, 11, 8, 3, 2, 9, 7, 7, 6, 1, 1, 11, 10, 3, 10, 1, 4, 2, 4, 4, 4, 6, 9, 6, 10, 7, 8, 10, 10, 10, 8, 2, 4, 0, 4, 1, 10, 5, 0, 7, 5, 1, 6, 2, 2, 3, 7, 5, 4, 8, 0, 5, 9, 11, 5, 3, 10, 10, 7, 0, 0, 2, 7, 1, 0, 2, 2, 4, 8, 7, 6, 10, 10, 8, 9, 6, 11, 1, 7, 0, 0, 7, 10, 2, 4, 6, 6, 8, 2, 0, 7, 10, 10, 11, 0, 7, 10, 9, 3, 9, 7, 8, 9, 11, 3, 11, 0, 11, 2, 0, 2]
# acts=[0, 5, 3, 8, 7, 9, 10, 11, 3, 0, 8, 4, 9, 4, 8, 9, 7, 8, 9, 10, 3, 4, 1, 10, 6, 9, 9, 10, 5, 7, 1, 6, 0, 1, 2, 9, 5, 1, 6, 9, 1, 0, 0, 4, 3, 9, 9, 7, 7, 9, 2, 1, 9, 8, 7, 3, 4, 4, 2, 9, 1, 4, 6, 0, 0, 0, 1, 10, 3, 10, 4, 7, 0, 6, 3, 0, 9, 10, 3, 0, 0, 2, 6, 7, 4, 4, 9, 0, 1, 11, 3, 11, 10, 6, 10, 11, 4, 11, 3, 9, 10, 0, 1, 10, 1, 9, 0, 10, 6, 2, 8, 10, 2, 4, 2, 3, 9, 4, 8, 1, 9, 1, 0, 1, 0, 8, 10, 9, 3, 4, 1, 0, 1, 1, 0, 10, 10, 4, 0, 9, 9, 9, 0, 0, 7, 2, 8, 3, 2, 8, 1, 6, 7, 9, 10, 3, 11, 3, 7, 3, 3, 8, 8, 9, 2, 9, 10, 3, 3, 0, 1, 3, 5, 9, 1, 3, 8, 2, 2, 4, 10, 3, 1, 10, 4, 6, 6, 6, 3, 9, 8, 3, 7, 7, 9, 7, 4, 7, 11, 4, 0, 2, 3, 7, 1, 4, 2, 11, 7, 7, 1, 4, 9, 4, 0, 1, 6, 4, 9, 0, 0, 8, 4, 11, 2, 10, 1, 4, 0, 0, 3, 7, 1, 7, 11, 6, 4, 0, 6, 6, 6, 3, 10, 5, 1, 1, 6, 1, 6, 6, 8, 10, 6, 11, 2, 7, 9, 4, 1, 7, 5, 6, 4, 1, 3, 9, 7, 0, 7, 7, 9, 1, 1, 1, 10, 0, 1, 7, 0, 6, 8, 1, 8, 7, 1, 4, 9, 4, 5, 10, 3, 0, 3, 5, 10, 4, 10, 1, 7, 7, 4, 3, 3, 0, 11, 10, 4, 1, 10, 2, 7, 10, 1, 9, 11, 7, 0, 0, 8, 0, 0, 6, 6, 3, 7, 6, 2, 4, 9, 6, 2, 7, 9, 11, 9, 1, 9, 3, 3, 10, 7, 7, 11, 6, 1, 7, 7, 3, 7, 9, 10, 3, 9, 11, 1, 4, 11, 10, 3, 8, 8, 5, 7, 10, 7, 1, 7, 8, 6, 6, 4, 6, 2, 6, 1, 1, 1, 4, 3, 1, 6, 3, 1, 9, 7, 3, 1, 1, 9, 6, 0, 3, 5, 10, 1, 10, 3, 4, 8, 2, 11, 9, 7, 7, 2, 6, 8, 11, 8, 10, 3, 1, 0, 1, 9, 0, 8, 0, 9, 5, 10, 2, 1, 6, 4, 5, 7, 10, 3, 1, 6, 7, 4, 9, 1, 10, 10, 8, 11, 3, 4, 9, 7, 6, 3, 6, 4, 9, 11, 7, 3, 10, 5, 4, 10, 0, 9, 10, 9, 5, 3, 2, 3, 9, 0, 4, 4, 9, 4, 3, 1, 6, 3, 11, 10, 6, 9, 10, 10, 6, 1, 1, 7, 2, 3, 10, 4, 4, 11, 6, 4, 1, 1, 4, 9, 4, 6, 3, 6, 11]
# acts=[6, 11, 5, 1, 2, 7, 4, 1, 7, 5, 3, 2, 2, 3, 1, 11, 11, 11, 2, 1, 5, 10, 3, 2, 5, 1, 2, 9, 10, 8, 3, 1, 2, 2, 2, 1, 1, 10, 3, 7, 1, 6, 1, 8, 1, 4, 1, 9, 0, 8, 1, 0, 11, 0, 4, 2, 1, 3, 3, 2, 5, 4, 3, 0, 0, 7, 9, 9, 0, 0, 7, 1, 1, 2, 8, 0, 4, 8, 1, 3, 3, 2, 0, 1, 8, 4, 1, 10, 3, 3, 3, 0, 1, 7, 6, 9, 1, 10, 1, 2, 10, 10, 2, 1, 0, 9, 3, 6, 11, 9, 10, 6, 8, 11, 7, 9, 7, 1, 10, 3, 1, 0, 10, 3, 7, 10, 5, 3, 4, 1, 4, 6, 11, 11, 1, 11, 3, 4, 5, 10, 10, 3, 5, 11, 1, 3, 11, 8, 4, 6, 4, 5, 1, 4, 1, 8, 3, 4, 11, 7, 10, 4, 0, 1, 0, 10, 7, 10, 11, 10, 0, 0, 1]
# acts=[3, 3, 3, 3, 4, 3, 3, 1, 3, 5, 6, 10, 3, 10, 4, 3, 10, 1, 10, 1, 3, 3, 3, 1, 3, 9, 10, 11, 11, 3, 10, 3, 3, 4, 7, 10, 3, 1, 3, 1, 2, 3, 8, 11, 3, 3, 8, 1, 1, 4, 3, 3, 10, 1, 3, 3, 3, 8, 9, 10, 3, 3, 3, 3, 5, 3, 3, 1, 3, 2, 3, 1, 3, 3, 3, 1, 10, 10, 3, 1, 8, 9, 1, 5, 3, 3, 3, 3, 11, 1, 5, 3, 10, 3, 11, 3, 9, 3, 8, 4, 3, 4, 7, 5, 8, 1, 3, 3, 5, 11, 3, 7, 1, 5, 3, 7, 3, 1, 10, 5, 1, 3, 3, 3, 11, 3, 10, 8, 3, 10, 3, 4, 10, 3, 3, 6, 3, 3, 3, 3, 3, 3, 0, 4, 11, 3, 3, 3, 3, 3, 4, 3, 3, 1, 10, 4, 1, 3, 3, 7, 3, 2, 2, 4, 8, 3, 3, 1, 1, 1, 3, 10, 3, 0, 9, 4, 4, 3, 3, 3, 8, 1, 3, 3, 3, 3, 8, 3, 3, 3, 1, 11, 3, 4, 5, 3, 0, 1, 7, 1, 7, 3, 7, 3, 10, 3, 3, 3, 3, 4, 3, 1, 9, 3, 3, 8, 1, 3, 1, 1, 8, 7, 4, 4, 3, 3, 3, 3, 3, 10, 3, 2, 5, 7, 3, 1, 1, 10, 8, 3, 8, 3, 8, 4, 10, 9, 1, 4, 9, 8, 3, 7, 3, 1, 3, 3, 4, 10, 4, 4, 1, 0, 3, 4, 3, 3, 3, 3, 1, 3, 5, 3, 3, 4, 4, 3, 3, 4, 3, 3, 3, 3, 1, 3, 1, 1, 3, 3, 3, 3, 4, 4, 0, 4, 1, 3, 7, 3, 9, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 6, 11, 10, 1, 3, 3, 3, 3, 0, 4, 4, 3, 10, 3, 3, 3, 9, 2, 3, 10, 11, 4, 3, 1, 3, 1, 2, 3, 3, 5, 6, 3, 1, 3, 4, 1, 5, 4, 3, 3, 10, 10, 3, 1, 1, 9, 8, 8, 9, 5, 4, 6, 1, 1, 3, 3, 3, 3, 3, 3, 8, 3, 4, 4, 3, 3, 3, 11, 1, 1, 3, 3, 3, 3, 3, 11, 3, 8, 4, 3, 1, 3, 1, 9, 3, 10, 3, 3, 3, 1, 1, 4, 4, 1, 3, 3, 3, 3, 10, 10, 1, 4, 2, 5, 1, 4, 3, 4, 3, 4, 3, 10, 3, 11, 3, 4, 3, 10, 10, 3, 5, 3, 3, 3, 6, 3, 3, 11, 3, 3, 3, 3, 3, 3, 10, 3, 3, 3, 3, 10, 3, 3, 10, 3, 4, 5, 3, 3, 11, 4, 4, 4, 1, 10, 3, 7, 4, 3, 3, 8, 10, 11, 3, 10, 4, 1, 0, 6, 3, 3, 3, 10, 1, 3, 3, 4, 4, 1, 1, 1, 6, 3, 1, 3, 3, 1, 10, 4]
# acts=[4, 3, 10, 4, 4, 10, 4, 4, 4, 4, 4, 4, 4, 9, 3, 4, 3, 10, 4, 10, 4, 4, 8, 9, 10, 4, 9, 10, 4, 4, 4, 4, 4, 4, 9, 9, 10, 7, 3, 4, 2, 3, 3, 9, 4, 4, 10, 4, 4, 10, 4, 4, 4, 4, 1, 4, 4, 4, 4, 3, 10, 4, 3, 4, 4, 9, 4, 4, 3, 9, 4, 4, 9, 9, 6, 3, 4, 10, 4, 7, 10, 4, 3, 4, 4, 4, 10, 3, 7, 9, 4, 4, 3, 10, 4, 4, 9, 9, 2, 4, 4, 4, 7, 9, 9, 9, 4, 7, 3, 10, 7, 3, 10, 9, 10, 9, 9, 4, 10, 4, 4, 10, 9, 9, 4, 4, 9, 10, 4, 4, 3, 3, 10, 9, 9, 4, 9, 4, 4, 9, 9, 4, 3, 3, 4, 7, 10, 4, 9, 4, 4, 7, 9, 4, 4, 4, 4, 10, 4, 4, 9, 4, 9, 4, 10, 9, 3, 3, 4, 4, 4, 4, 4, 4, 4, 10, 4, 11, 4, 10, 9, 4, 4, 4, 4, 3, 4, 3, 9, 10, 10, 4, 4, 4, 4, 4, 10, 8, 9, 3, 4, 4, 4, 4, 11, 3, 10, 4, 10, 11, 9, 4, 4, 4, 10, 4, 4, 4, 10, 4, 4, 9, 4, 9, 10, 10, 3, 3, 4, 9, 3, 3, 9, 7, 4, 4, 9, 10, 4, 10, 3, 9, 4, 11, 4, 9, 4, 3, 4, 4, 4, 8, 4, 10, 10, 10, 10, 10, 3, 6, 4, 4, 9, 4, 9, 4, 3, 4, 9, 7, 11, 9, 4, 9, 7, 3, 10, 4, 10, 4, 4, 4, 4, 4, 4, 3, 4, 4, 3, 4, 9, 4, 9, 4, 10, 10, 4, 3, 9, 10, 4, 8, 3, 10, 10, 3, 3, 4, 10, 10, 9, 4, 10, 4, 8, 4, 4, 4, 4, 4, 4, 10, 3, 10, 4, 3, 9, 10, 4, 4, 10, 4, 6, 4, 9, 3, 4, 10, 4, 3, 6, 4, 4, 2, 1, 4, 9, 4, 4, 9, 4, 8, 4, 4, 4, 3, 4, 4, 10, 4, 11, 7, 4, 8, 4, 4, 4, 9, 4, 4, 9, 4, 4, 11, 4, 4, 9, 0, 4, 8, 9, 3, 4, 10, 3, 4, 4, 10, 4, 4, 11, 8, 9, 9, 3, 4, 3, 9, 9, 10, 4, 4, 3, 4, 4, 4, 4, 4, 10, 9, 10, 4, 4, 4, 10, 4, 9, 3, 3, 9, 9, 8, 4, 4, 4, 4, 9, 4, 4, 4, 3, 9, 3, 3, 4, 4, 9, 4, 9, 4, 3, 4, 4, 4, 10, 4, 9, 4, 10, 9, 9, 4, 4, 9, 4, 9, 10, 4, 4, 4, 3, 4, 3, 8, 9, 3, 4, 9, 4, 10, 4, 9, 9, 4, 9, 3, 4, 3, 4, 4, 9, 9, 4, 4, 3, 9, 6, 4, 4, 8, 4, 4, 4, 9, 9, 4, 3, 8, 3, 10]
# acts=[7, 9, 4, 11, 5, 6, 0, 8, 3, 2, 7, 0, 1, 0, 2, 10, 8, 2, 4, 2, 6, 1, 2, 6, 7, 3, 1, 8, 2, 7, 11, 1, 2, 1, 6, 11, 5, 6, 3, 11, 7, 3, 7, 0, 7, 3, 0, 7, 10, 11, 11, 7, 5, 2, 2, 11, 1, 0, 5, 7, 2, 2, 2, 2, 6, 2, 1, 2, 10, 2, 1, 8, 5, 6, 1, 1, 6, 10, 5, 11, 6, 7, 1, 9, 3, 2, 3, 1, 7, 11, 2, 11, 8, 3, 1, 2, 0, 6, 2, 2, 5, 1, 11, 6, 2, 1, 8, 9, 4, 9, 7, 0, 3, 5, 6, 2, 0, 3, 2, 9, 1, 2, 7, 5, 0, 11, 6, 11, 5, 8, 0, 1, 1, 11, 6, 2, 11, 11, 6, 6, 2, 8, 11, 7, 5, 3, 3, 7, 5, 6, 4, 1, 2, 5, 7, 1, 3, 1, 6, 4, 6, 1, 3, 7, 8, 1, 1, 1, 5, 2, 11, 5, 8, 5, 6, 10, 1, 7, 2, 2, 6, 7, 0, 0, 5, 2, 0, 8, 4, 1, 10, 7, 1, 6, 2, 3, 7, 6, 6, 10, 0, 6, 11, 6, 11, 8, 6, 6, 0, 8, 2, 2, 2, 5, 6, 6, 3, 1, 7, 11, 3, 3, 8, 11, 8, 10, 0, 1, 2, 10, 2, 3, 5, 1, 6, 8, 9, 1, 5, 6, 7, 3, 2, 5, 11, 2, 7, 11, 8, 1, 11, 11, 2, 3, 6, 2, 8, 1, 5, 11, 7, 6, 1, 4, 3, 3, 8, 6, 6, 2, 1, 2, 6, 5, 6, 7, 6, 4, 8, 8, 6, 8, 6, 11, 7, 6, 2, 2, 8, 7, 1, 0, 8, 2, 2, 8, 9, 3, 7, 6, 1, 0, 11, 11, 5, 7, 3, 0, 8, 7, 8, 2, 7, 5, 2, 7, 9, 4, 3, 6, 6, 7, 5, 11, 7, 8, 2, 0, 5, 1, 1, 5, 1, 6, 2, 7, 11, 6, 4, 6, 2, 6, 8, 2, 1, 3, 8, 1, 2, 7, 2, 2, 1, 4, 11, 10, 1, 1, 3, 5, 6, 10, 1, 7, 6, 11, 7, 5, 6, 2, 3, 1, 8, 4, 6, 1, 7, 3, 6, 11, 7, 0, 6, 8, 5, 11, 2, 11, 6, 8, 6, 7, 3, 1, 10, 4, 10, 2, 10, 6, 6, 8, 7, 8, 6, 7, 0, 2, 6, 1, 7, 6, 1, 2, 7, 7, 2, 7, 2, 2, 0, 1, 11, 7, 3, 2, 0, 7, 0, 2, 2, 8, 7, 8, 1, 5, 8, 10, 1, 5, 2, 1, 7, 11, 0, 6, 2, 1, 11, 11, 1, 1, 5, 8, 10, 2, 2, 1, 0, 4, 2, 6, 0, 7, 1, 7, 7, 8, 2, 2, 10, 3, 1, 8, 6, 2, 1, 7, 7, 3, 2, 7, 4, 3, 0, 8, 10, 5, 1, 3, 3, 0, 5, 8, 11, 8, 8, 1, 2, 1]

# acts=[8, 9, 9, 2, 3, 3, 1, 1, 5, 1, 8, 10, 10, 8, 9, 0, 9, 3, 9, 6, 9, 0, 8, 10, 5, 11, 2, 2, 3, 2, 5, 3, 9, 10, 4, 5, 8, 0, 6, 6, 11, 0, 1, 10, 8, 4, 1, 1, 0, 8, 1, 7, 9, 2, 4, 4, 6, 3, 4, 6, 8, 4, 11, 6, 0, 0, 6, 3, 0, 4, 6, 10, 0, 4, 1, 3, 0, 5, 8, 6, 2, 8, 4, 1, 7, 1, 8, 5, 1, 2, 6, 6, 11, 4, 1, 1, 0, 1, 5, 8, 4, 4, 4, 1, 6, 10, 4, 8, 5, 5, 9, 3, 11, 6, 2, 0, 4, 0, 4, 4, 1, 4, 6, 3, 1, 3, 1, 1, 0, 5, 1, 3, 8, 8, 8, 1, 0, 5, 8, 0, 8, 8, 4, 3, 1, 11, 11, 6, 2, 2, 8, 8, 1, 3, 5, 10, 4, 1, 5, 3, 10, 5, 11, 8, 4, 1, 8, 0, 9, 1, 7, 3, 4, 0, 8, 10, 7, 8, 5, 1, 2, 8, 0, 4, 7, 10, 9, 5, 3, 7, 4, 10, 1, 0, 5, 9, 6, 8, 10, 6, 3, 1, 4, 6, 11, 4, 1, 5, 6, 4, 4, 8, 4, 4, 0, 0, 9, 4, 8, 4, 4, 10, 3, 10, 8, 4, 4, 6, 0, 6, 4, 0, 5, 10, 1, 6, 4, 3, 3, 0, 6, 1, 1, 0, 11, 8, 7, 5, 9, 6, 5, 2, 2, 1, 2, 5, 0, 4, 9, 3, 6, 6, 4, 1, 4, 4, 5, 2, 11, 9, 10, 9, 4, 9, 2, 1, 0, 10, 0, 10, 3, 5, 0, 11, 3, 10, 6, 1, 6, 9, 3, 4, 8, 10, 1, 10, 1, 1, 0, 4, 1, 1, 4, 10, 4, 9, 0, 5, 6, 4, 10, 8, 5, 5, 3, 1, 0, 11, 11, 5, 0, 3, 4, 0, 1, 5, 8, 9, 1, 6, 7, 1, 2, 11, 8, 11, 8, 8, 3, 8, 5, 1, 10, 0, 5, 5, 1, 8, 1, 9, 9, 8, 9, 9, 8, 4, 1, 5, 4, 0, 0, 4, 1, 1, 5, 4, 11, 5, 8, 9, 6, 0, 3, 8, 4, 11, 1, 4, 7, 2, 4, 4, 5, 4, 0, 0, 4, 8, 9, 1, 1, 10, 6, 6, 7, 0, 6, 4, 6, 3, 8, 4, 2, 0, 5, 4, 8, 8, 6, 0, 1, 1, 6, 11, 11, 2, 8, 0, 3, 5, 8, 4, 8, 9, 3, 6, 4, 8, 9, 1, 3, 8, 0, 4, 4, 1, 6, 0, 5, 6, 5, 4, 8, 11, 4, 1, 7, 5, 8, 1, 1, 4, 3, 7, 4, 5, 4, 2, 0, 4, 0, 4, 5, 0, 6, 3, 7, 1, 10, 11, 8, 4, 5, 6, 5, 4, 2, 1, 3, 5, 4, 9, 11, 5, 1, 6, 2, 4, 0, 11, 7, 2, 6, 10, 10, 0, 9, 1, 9, 5]
acts=[3, 0, 11, 3, 10, 11, 0, 5, 2, 0, 1, 2, 8, 5, 8, 8, 7, 0, 3, 7, 6, 11, 5, 0, 4, 9, 8, 11, 4, 6, 8, 7, 6, 11, 9, 1, 11, 1, 9, 1, 11, 3, 0, 8, 5, 1, 6, 3, 5, 3, 8, 9, 6, 3, 2, 2, 10, 11, 6, 10, 7, 1, 7, 0, 4, 8, 5, 7, 5, 8, 10, 5, 4, 11, 1, 0, 9, 8, 6, 2, 0, 8, 11, 9, 7, 4, 8, 6, 7, 9, 7, 10, 6, 7, 8, 9, 10, 4, 0, 2, 5, 11, 9, 1, 4, 5, 8, 4, 1, 10, 9, 2, 1, 0, 2, 7, 9, 3, 6, 7, 0, 6, 10, 6, 4, 6, 10, 8, 7, 1, 11, 0, 2, 9, 1, 4, 2, 11, 1, 2, 2, 0, 11, 11, 8, 4, 11, 0, 3, 0, 7, 9, 8, 11, 7, 9, 5, 1, 3, 7, 6, 4, 2, 4, 9, 4, 7, 2, 7, 8, 9, 9, 7, 7, 9, 11, 0, 1, 8, 9, 3, 9, 6, 3, 9, 8, 1, 9, 11, 11, 6, 11, 8, 5, 0, 11, 2, 9, 2, 4, 11, 6, 2, 0, 7, 10, 3, 5, 9, 11, 11, 4, 11, 7, 6, 8, 0, 5, 10, 9, 2, 10, 11, 3, 3, 8, 5, 1, 11, 6, 3, 11, 3, 8, 0, 9, 8, 1, 6, 9, 2, 9, 7, 10, 8, 9, 10, 2, 2, 4, 9, 8, 9, 4, 0, 7, 11, 1, 3, 5, 4, 0, 3, 11, 10, 5, 5, 11, 1, 6, 0, 5, 5, 8, 1, 6, 4, 9, 5, 3, 6, 10, 4, 8, 3, 10, 8, 8, 2, 9, 10, 0, 7, 11, 10, 11, 3, 6, 6, 10, 8, 2, 10, 2, 10, 0, 8, 3, 4, 10, 0, 10, 10, 11, 6, 0, 2, 9, 11, 5, 8, 4, 1, 4, 5, 0, 5, 0, 4, 1, 7, 7, 2, 4, 9, 6, 6, 0, 9, 9, 11, 10, 0, 5, 6, 8, 2, 0, 1, 5, 2, 11, 9, 11, 10, 8, 0, 1, 9, 8, 8, 11, 9, 11, 11, 10, 1, 10, 2, 2, 4, 8, 8, 9, 7, 9, 6, 9, 10, 3, 9, 3, 2, 7, 6, 2, 6, 7, 8, 10, 7, 6, 5, 5, 5, 0, 4, 10, 4, 5, 1, 3, 9, 8, 1, 11, 1, 6, 4, 11, 6, 5, 9, 7, 1, 5, 8, 9, 3, 7, 0, 0, 4, 3, 11, 10, 4, 8, 0, 2, 8, 3, 10, 11, 8, 1, 8, 5, 3, 10, 7, 4, 9, 0, 11, 0, 9, 8, 9, 9, 9, 10, 10, 2, 9, 4, 8, 3, 10, 5, 2, 0, 3, 11, 11, 0, 2, 3, 9, 1, 0, 1, 9, 6, 9, 4, 10, 5, 11, 11, 6, 8, 7, 9, 9, 8, 5, 10, 0, 5, 9, 4, 3, 8, 11, 2, 10, 6, 4, 10]



while True:
    # state = torch.from_numpy(state.copy()).type(torch.float)#.to(device)
    # value, logit, latent = model((state, latent), icm = False)
    # prob = F.softmax(logit, dim=1) #from train
    # action = prob.multinomial(1).data
    # state, reward, done, _ = env.step(action.item())
    try:
        action=int(acts[x])
    except:
        action = 10
    # # print("action",action)
    # # action = env.action_space.sample()
    state, reward, done, info = env.step(action)
    x+=1
    if done: break
env.play()
print(x)



8020
