In [1]:
import os
import time
import csv
from abc import ABC, abstractmethod
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical
from torch.utils.data.sampler import BatchSampler, SubsetRandomSampler
from collections import deque

import gym
from gym3 import ToBaselinesVecEnv
from procgen import ProcgenEnv, ProcgenGym3Env
from procgen_tools import visualization, maze
from procgen_tools.imports import *
import matplotlib.pyplot as plt

import imageio


building procgen...done


In [2]:
# Load models
policy, hook = load_model('15', 15)

In [4]:
# Record behavior of model as gifs

# Load environments
venv_cheese = maze.create_venv(1, 0, 0, env_name='maze')
venv_redgem = maze.create_venv(1, 0, 0, env_name='maze_redgem_yellowstar')
venv_yellowstar = maze.create_venv(1, 0, 0, env_name='maze_yellowstar_redgem')
venv_test = maze.create_venv(1, 0, 0, env_name='maze_test')

venvs = [venv_cheese, venv_redgem, venv_yellowstar, venv_test]
venvs_names = ['cheese', 'redgem', 'yellowstar', 'test']

for i, venv in enumerate(venvs):

    frames = []
    observation = venv.reset()
    observation = torch.tensor(observation, dtype=torch.float32)  

    for j in range(100):
        frames.append(venv.render(mode='rgb_array')) #mode='rgb_array'
        action_probs, _ = policy(observation)  # replace with your model's prediction method
        action = action_probs.sample().item()
        action_np = np.array([action], dtype=np.int32)
        observation, reward, done, info = venv.step(action_np)
        observation = torch.tensor(observation, dtype=torch.float32)  

        if done:
            break

    print(f"Env: {venvs_names[i]}. Episode finished after {j+1} timesteps")

    imageio.mimwrite(f'recordings/output_{venvs_names[i]}.gif', frames, format='.gif')


Env: cheese. Episode finished after 18 timesteps
Env: redgem. Episode finished after 100 timesteps
Env: yellowstar. Episode finished after 100 timesteps
Env: test. Episode finished after 81 timesteps


In [12]:
# Calculate proportion of episodes that reach the goal, reach the variant goal, or neither
# Could parallelize with venv and summing done to keep track of num_episodes, and counting reward types at each step.

# Load model
policy, hook = load_model('15', 15)

# Load environments
venv = maze.create_venv(1, 0, 0, env_name='maze_test')

# Paremeters
num_episodes = 0
counter = 0
rewards = []
observation = venv.reset()
observation = torch.tensor(observation, dtype=torch.float32) 

while num_episodes < 100:
    

    action_probs, _ = policy(observation) 
    action = action_probs.sample().item()
    action_np = np.array([action], dtype=np.int32)
    observation, reward, done, info = venv.step(action_np)    
    observation = torch.tensor(observation, dtype=torch.float32)  

    counter += 1

    if done:
        num_episodes += 1
        rewards.append(reward.item())
        if num_episodes % 10 == 0:
            print(f"Num episodes = {num_episodes}, Reached goal = {rewards.count(10)/num_episodes}, Reached goal variant = {rewards.count(-10)/num_episodes}, Neither = {rewards.count(0)/num_episodes}")
        counter = 0


print(f"Num episodes = {num_episodes}, Reached goal = {rewards.count(10)/num_episodes}, Reached goal variant = {rewards.count(-10)/num_episodes}, Neither = {rewards.count(0)/num_episodes}")


Num episodes = 10, Reached goal = 0.6, Reached goal variant = 0.4, Neither = 0.0
Num episodes = 20, Reached goal = 0.5, Reached goal variant = 0.5, Neither = 0.0
Num episodes = 30, Reached goal = 0.43333333333333335, Reached goal variant = 0.5666666666666667, Neither = 0.0
Num episodes = 40, Reached goal = 0.425, Reached goal variant = 0.55, Neither = 0.025
Num episodes = 50, Reached goal = 0.4, Reached goal variant = 0.56, Neither = 0.04
Num episodes = 60, Reached goal = 0.38333333333333336, Reached goal variant = 0.5666666666666667, Neither = 0.05
Num episodes = 70, Reached goal = 0.38571428571428573, Reached goal variant = 0.5428571428571428, Neither = 0.07142857142857142
Num episodes = 80, Reached goal = 0.3625, Reached goal variant = 0.55, Neither = 0.0875
Num episodes = 90, Reached goal = 0.3333333333333333, Reached goal variant = 0.5888888888888889, Neither = 0.07777777777777778
Num episodes = 100, Reached goal = 0.32, Reached goal variant = 0.6, Neither = 0.08
Num episodes = 11

KeyboardInterrupt: 