### 1. Create Vectorized Environment

In [1]:
%load_ext autoreload
%autoreload 2

import gym
import torch
import numpy as np

from parallelEnv import parallelEnv
from model import Policy
from ppo import ppo_agent
from storage import RolloutStorage
from gym.vector import SyncVectorEnv

debug = True

print('gym version: ', gym.__version__)
print('torch version: ', torch.__version__)

seed = 0 
gamma=0.99
num_processes=16 
device = torch.device("cpu")
print('device: ', device)

envs = parallelEnv('Group24M4-v0', n=num_processes, seed=seed)

max_steps = envs.max_steps
print('max_steps: ', max_steps)

if debug:
    action = envs.action_space.sample()
    observation = envs.observation_space.sample()
    ac_size = envs.action_space
    ob_size = envs.observation_space.shape

    print("action", action)
    print("observation", observation)
    print("ac_size", ac_size)
    print("ob_size", ob_size)

torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)

gym version:  0.26.2
torch version:  2.0.0+cu117
device:  cpu


  logger.warn(f"Overriding environment {new_spec.id} already in registry.")


max_steps:  1600
action [-0.7477503  -0.7319027   0.6462268  -0.80477464]
observation [ 1.0646296e+00 -3.0330068e-01  1.3867885e+00 -2.0877299e+00
  7.2955656e+04  8.7424773e+04  1.8948064e+00 -4.9701424e+00
 -3.5884371e+00  1.5295662e-02 -2.2771413e+00 -4.4623909e+00
 -4.1829042e+00  4.7861049e-01  1.4865826e-01  6.6940141e-01
  4.4594008e-02 -4.4420522e-01 -3.1527346e-01  1.4136748e-01
  1.3274233e-01  9.2385018e-01  8.2369620e-01  2.0529243e-01
  6.7419827e-02  7.5690407e-01 -4.4302672e-01  9.9018633e-01
  4.4122100e-01  4.7702712e-01  4.5114204e-01  6.0846961e-01
  3.9013842e-01 -2.6143578e-01  4.2788804e-01]
ac_size Box(-1.0, 1.0, (4,), float32)
ob_size (35,)


### 2. Instantiate Model, Agent and Storage

In [2]:
policy = Policy(envs.observation_space.shape, envs.action_space,\
        base_kwargs={'recurrent': True})

policy.to(device)

agent = ppo_agent(actor_critic=policy, ppo_epoch=16, num_mini_batch=16,\
                lr=0.001, eps=1e-5, max_grad_norm=0.5)

rollouts = RolloutStorage(num_steps=max_steps, num_processes=num_processes, \
                        obs_shape=envs.observation_space.shape, action_space=envs.action_space, \
                        recurrent_hidden_state_size=policy.recurrent_hidden_state_size)

obs = envs.reset()
print('type obs: ', type(obs), ', shape obs: ', obs.shape)
obs_t = torch.tensor(obs)
print('type obs_t: ', type(obs_t), ', shape obs_t: ', obs_t.shape)

rollouts.obs[0].copy_(obs_t)
rollouts.to(device)


type obs:  <class 'numpy.ndarray'> , shape obs:  (16, 35)
type obs_t:  <class 'torch.Tensor'> , shape obs_t:  torch.Size([16, 35])
