In [1]:
from __future__ import division
import argparse
import os
import sys
import numpy as np
import torch

from agent import Agent
from minecraft import DummyMinecraft, Env, test_policy
from dataset import Dataset, Transition

import pickle
import time
from os.path import join as p_join
from os.path import exists as p_exists

from data_manager import StateManager, ActionManager

from get_dataset import put_data_into_dataset

import minerl
import gym



In [2]:
try:
    from torch.utils.tensorboard import SummaryWriter
except ModuleNotFoundError:
    from tensorboardX import SummaryWriter

In [3]:
def str2bool(v):
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')

In [4]:
OUTPUT_DIR = '/home/ankitagarg/minerl/minerl_imitation_learning/test_output/'
DATASET_DIR = '/home/ankitagarg/minerl/data/'

enable_cudnn = True
train = True
c_action_magnitude = 22.5 #magnitude of discretized camera action
seed = 123
scale_rewards = True

learning_rate = 0.0000625
adam_eps = 1.5e-4

batch_size = 32

# parser.add_argument("--logdir", default=".", type=str, help="used for logging and to save network snapshots")
net = 'deep_resnet'
hidden_size = 1024
dataset_path = None
               
trainsteps = 3000000
augment_flip = True

dataset_only_successful = False
dataset_use_max_duration_steps = True
dataset_continuous_action_stacking = 3
dataset_max_reward = 256

save_dataset_path = '/home/ankitagarg/minerl/minerl_imitation_learning/data/saved_dataset'
quit_after_saving_dataset = False

dueling = True

add_treechop_data = False

stop_time = None
test = False

eval_policy_path='/home/ankitagarg/minerl/minerl_imitation_learning/output_2/'
eval_policy_model_id="last"
eval_policy_episodes=100

In [5]:
#Setup
np.random.seed(seed)
torch.manual_seed(np.random.randint(1, 10000))

assert torch.cuda.is_available()
torch.cuda.manual_seed(np.random.randint(1, 10000))
torch.backends.cudnn.enabled = enable_cudnn
device = torch.device('cuda')

print(f"Running on {device}")

state_manager = StateManager(device)
action_manager = ActionManager(device, c_action_magnitude)

Running on cuda


In [6]:
from pyvirtualdisplay import Display
display = Display(visible=0, size=(400, 300))
display.start();

In [7]:
from colabgymrender.recorder import Recorder
env_ = gym.make('MineRLObtainIronPickaxe-v0')
env_.seed(0)

In [8]:
env = Env(env_, state_manager, action_manager)
# env = Recorder(env, './video', fps=60)

print("started env")

img, vec = env.reset()

print("env reset")

print("img, vec shapes: ", img.shape, vec.shape)

started env
env reset
img, vec shapes:  torch.Size([1, 3, 64, 64]) torch.Size([1, 216])


In [9]:
num_actions = action_manager.num_action_ids_list[0]
image_channels = img.shape[1]

vec_size = vec.shape[1]
vec_shape = vec.shape[1:]

img_shape = list(img.shape[1:])
img_shape[0] = int(img_shape[0])

In [10]:
writer = SummaryWriter(OUTPUT_DIR)

with open(p_join(OUTPUT_DIR, "status.txt"), 'w') as status_file:
    status_file.write('running')

# extended error exception:
def handle_exception(exc_type, exc_value, exc_traceback):

    with open(p_join(OUTPUT_DIR, "status.txt"), 'w') as status_file_:
        status_file_.write('error')

    writer.flush()
    writer.close()
    env.close()
    sys.__excepthook__(exc_type, exc_value, exc_traceback)

sys.excepthook = handle_exception

In [11]:
agent = Agent(num_actions, image_channels, vec_size, writer,
              net, batch_size, augment_flip, hidden_size, dueling,
              learning_rate, adam_eps, device)

In [12]:
assert eval_policy_path is not None

agent.load(eval_policy_path, eval_policy_model_id)

print(f"loaded network {eval_policy_path} {eval_policy_model_id}")

policy = agent.act

with open(p_join(OUTPUT_DIR, "status.txt"), 'w') as status_file:
    status_file.write('running test_policy')

if test:
    args.eval_policy_episodes = 2

test_policy(writer, env, policy, img, vec, eval_policy_episodes)

loaded network /home/ankitagarg/minerl/minerl_imitation_learning/output_2/ last
episode 0
episode reward: 0.0 , episode terminated after 2036 env steps
avg_reward after 1 (out of 100) episodes: 0.0
episode 1
episode reward: 0.0 , episode terminated after 1124 env steps
avg_reward after 2 (out of 100) episodes: 0.0
episode 2
episode reward: 1.0 , episode terminated after 5990 env steps
avg_reward after 3 (out of 100) episodes: 0.3333333333333333
episode 3
episode reward: 0.0 , episode terminated after 5989 env steps
avg_reward after 4 (out of 100) episodes: 0.25
episode 4
episode reward: 1.0 , episode terminated after 5987 env steps
avg_reward after 5 (out of 100) episodes: 0.4
episode 5
episode reward: 0.0 , episode terminated after 5989 env steps
avg_reward after 6 (out of 100) episodes: 0.3333333333333333
episode 6
episode reward: 0.0 , episode terminated after 942 env steps
avg_reward after 7 (out of 100) episodes: 0.2857142857142857
episode 7
episode reward: 7.0 , episode terminate

episode 63
episode reward: 0.0 , episode terminated after 3748 env steps
avg_reward after 64 (out of 100) episodes: 0.4375
episode 64
episode reward: 0.0 , episode terminated after 5984 env steps
avg_reward after 65 (out of 100) episodes: 0.4307692307692308
episode 65
episode reward: 0.0 , episode terminated after 1044 env steps
avg_reward after 66 (out of 100) episodes: 0.42424242424242425
episode 66
episode reward: 0.0 , episode terminated after 921 env steps
avg_reward after 67 (out of 100) episodes: 0.417910447761194
episode 67
episode reward: 0.0 , episode terminated after 5987 env steps
avg_reward after 68 (out of 100) episodes: 0.4117647058823529
episode 68
episode reward: 0.0 , episode terminated after 5982 env steps
avg_reward after 69 (out of 100) episodes: 0.4057971014492754
episode 69
episode reward: 0.0 , episode terminated after 875 env steps
avg_reward after 70 (out of 100) episodes: 0.4
episode 70
episode reward: 1.0 , episode terminated after 5992 env steps
avg_reward 

In [14]:
# env.release()