##installations and imports

In [1]:
%%capture
!sudo add-apt-repository -y ppa:openjdk-r/ppa
!sudo apt-get purge openjdk-*
!sudo apt-get install openjdk-8-jdk
!sudo apt-get install xvfb xserver-xephyr vnc4server python-opengl ffmpeg

In [2]:
%%capture
!pip install --upgrade 'setuptools==57.5.0'
!pip install 'gym==0.19.0'
!pip install --upgrade wheel==0.38.4

In [3]:
%%capture
!pip install --upgrade 'minerl==0.4.4'
!pip install pyvirtualdisplay
# !pip install pytorch
!pip install scikit-learn
!pip install -U colabgymrender

In [4]:
%%capture
!apt-get install xorg openbox
!apt-get install xvfb

In [5]:
import random
import numpy as np
import pandas as pd
import torch as th
from torch import nn
import gym
import minerl
import torch
from tqdm.notebook import tqdm
from colabgymrender.recorder import Recorder
from pyvirtualdisplay import Display
from sklearn.cluster import KMeans
import logging
from sklearn.cluster import DBSCAN
from sklearn.metrics import pairwise_distances
import matplotlib.pyplot as plt
import json
from time import time

logging.disable(logging.ERROR) # reduce clutter, remove if something doesn't work to see the error logs.



In [6]:
minerl.data.download(directory='data', environment='MineRLObtainIronPickaxe-v0');

Download: https://minerl.s3.amazonaws.com/v4/MineRLObtainIronPickaxe-v0.tar: 100%|██████████| 2802.0/2801.89952 [00:47<00:00, 58.60MB/s]


In [7]:
minerl.data.download(directory='data', environment='MineRLTreechop-v0');

Download: https://minerl.s3.amazonaws.com/v4/MineRLTreechop-v0.tar: 100%|██████████| 1511.0/1510.73792 [00:25<00:00, 59.36MB/s]


In [8]:
data=minerl.data.make("MineRLObtainIronPickaxe-v0")

In [9]:
# run only once!!!
display = Display(visible=0, size=(400, 300))
display.start();

## Fully Scripted


In [None]:
# run this code ONLY if you want to RERUN the below code!

# display.stop()
# !rm -rf /content/logs
# !rm -rf /content/video
# display = Display(visible=0, size=(400, 300))
# display.start();

In [11]:
# Parameters:
TEST_EPISODES = 10  # number of episodes to test the agent for.
MAX_TEST_EPISODE_LEN = 15000  # 18k is the default for MineRLObtainDiamond.

def str_to_act(env, actions):
    act = env.action_space.noop()
    for action in actions.split():
        if ":" in action:
            k, v = action.split(':')
            if k == 'camera':
                act[k] = eval(v)
            else:
                act[k] = v
        else:
            act[action] = 1
    return act


def search_sequence():
    action_sequence_search = []
    action_sequence_search += [''] * 100  # wait 5 secs
    action_sequence_search += ['camera:[10,0]'] * 3  # look down 30 degrees

    for _ in range(100):
        action_sequence_search += ['attack sprint forward'] * 100  # dig forward for 5 secs
        action_sequence_search += ['jump']  # jump!
        action_sequence_search += ['attack sprint forward'] * 100
        action_sequence_search += ['jump']
        action_sequence_search += ['attack sprint forward'] * 100

        if random.random() < 0.5:  # turn either 90 degrees left or 90 degrees right with an equal probability
            action_sequence_search += ['camera:[0,-10]'] * 9
        else:
            action_sequence_search += ['camera:[0,10]'] * 9
    return action_sequence_search


def get_action_sequence_log():

    # make planks, sticks, crafting table and wooden pickaxe:
    action_sequence = []
    action_sequence += [''] * 100
    action_sequence += ['craft:planks'] * 4
    action_sequence += ['craft:stick'] * 2
    action_sequence += ['craft:crafting_table']
    action_sequence += ['camera:[10,0]'] * 18
    action_sequence += ['attack'] * 20
    action_sequence += [''] * 10
    action_sequence += ['jump']
    action_sequence += [''] * 5
    action_sequence += ['place:crafting_table']
    action_sequence += [''] * 10

    action_sequence += ['camera:[-1,0]']
    action_sequence += ['nearbyCraft:wooden_pickaxe']
    action_sequence += ['camera:[1,0]']
    action_sequence += [''] * 10
    action_sequence += ['equip:wooden_pickaxe']
    action_sequence += [''] * 10

    # dig down:
    action_sequence += ['attack'] * 600
    action_sequence += [''] * 10

    return action_sequence

def get_action_sequence_cub():

    action_sequence = []
    action_sequence += [''] * 100
    action_sequence += ['craft:crafting_table']
    action_sequence += ['camera:[10,0]'] * 18
    action_sequence += ['attack'] * 20
    action_sequence += [''] * 10
    action_sequence += ['jump']
    action_sequence += [''] * 5
    action_sequence += ['place:crafting_table']
    action_sequence += [''] * 10

    action_sequence += ['camera:[-1,0]']
    action_sequence += ['nearbyCraft:stone_pickaxe']
    action_sequence += ['nearbyCraft:furnace']
    action_sequence += ['camera:[1,0]']
    action_sequence += [''] * 10
    action_sequence += ['equip:stone_pickaxe']
    action_sequence += [''] * 10

    return action_sequence

def get_action_sequence_ir_c():

    action_sequence = []
    action_sequence += [''] * 100
    action_sequence += ['place:furnace']
    action_sequence += [''] * 10

    action_sequence += ['camera:[-1,0]']
    action_sequence += ['nearbySmelt:iron_ingot']
    action_sequence += ['nearbySmelt:iron_ingot']
    action_sequence += ['nearbySmelt:iron_ingot']
    action_sequence += ['camera:[1,0]']
    action_sequence += [''] * 10

    action_sequence += ['place:crafting_table']
    action_sequence += [''] * 10

    action_sequence += ['camera:[-1,0]']
    action_sequence += ['nearbyCraft:iron_pickaxe']
    action_sequence += [''] * 10

    return action_sequence

def main():
    env = gym.make('MineRLObtainIronPickaxe-v0')
    env._max_episode_steps = 15000
    env = Recorder(env, './video', fps=60) ### you can remove this line if you dont want to record the shorts videos. ###

    stats = {'runtime': [], 'reward': [], 'reward_at': []}
    
    for episode in range(TEST_EPISODES):
        start = time()
        env.reset()
        done = False
        total_reward = 0
        steps = 0
        reward_at = []

        action_sequence_search = search_sequence()
        action_sequence_log = get_action_sequence_log()
        action_sequence_cub = get_action_sequence_cub()
        action_sequence_ir_c = get_action_sequence_ir_c

        # scripted part to get some logs:
        for j, action in enumerate(action_sequence_search[:MAX_TEST_EPISODE_LEN]):
            obs, reward, done, _ = env.step(str_to_act(env, action))
            total_reward += reward
            steps += 1
            if reward > 0:
                  reward_at.append((steps, reward))
            if obs['inventory']['log'] >= 3:
                break
            if done:
                break

        # print logs anount
        logs = obs['inventory']['log']
        if logs >= 1:
              print('GOT', logs, 'LOGS')

        # scripted part to use the logs:
        if not done:
            for i, action in enumerate(action_sequence_log[:MAX_TEST_EPISODE_LEN - j]):
                obs, reward, done, _ = env.step(str_to_act(env, action))
                total_reward += reward
                steps += 1
                if reward > 0:
                  reward_at.append((steps, reward))
                if done:
                    break

        # scripted part to get some cobblestones:
        if not done:
          for j, action in enumerate(action_sequence_search[:MAX_TEST_EPISODE_LEN]):
              obs, reward, done, _ = env.step(str_to_act(env, action))
              total_reward += reward
              steps += 1
              if reward > 0:
                  reward_at.append((steps, reward))
              if obs['inventory']['cobblestone'] >= 11:
                  print("GOT", obs['inventory']['cobblestone'], "cobblestone")
                  break
              if done:
                  break
        
        # scripted part to use the cobblestones:
        if not done:
            for i, action in enumerate(action_sequence_cub[:MAX_TEST_EPISODE_LEN - j]):
                obs, reward, done, _ = env.step(str_to_act(env, action))
                total_reward += reward
                steps += 1
                if reward > 0:
                  reward_at.append((steps, reward))
                if done:
                    break


        # scripted part to get some iron_ores & coals:
        if not done:
          for j, action in enumerate(action_sequence_search[:MAX_TEST_EPISODE_LEN]):
              obs, reward, done, _ = env.step(str_to_act(env, action))
              total_reward += reward
              steps += 1
              if reward > 0:
                  reward_at.append((steps, reward))
              iron_ores = obs['inventory']['iron_ore']
              coals = obs['inventory']['coal']
              if iron_ores >= 3 and coals >= 3:
                  print("GOT", iron_ores, "iron_ores")
                  print("GOT", coals, "coals")
                  break
              if done:
                  break

        print("got", obs['inventory']['iron_ore'], "iron_ores")
        print("got", obs['inventory']['coal'], "coals")

        # scripted part to use the iron_ores & coals:
        if not done:
            for i, action in enumerate(action_sequence_ir_c[:MAX_TEST_EPISODE_LEN - j]):
                obs, reward, done, _ = env.step(str_to_act(env, action))
                total_reward += reward
                steps += 1
                if reward > 0:
                  print('WOW')
                  reward_at.append((steps, reward))
                if done:
                    break

        print(f'Episode #{episode+1} reward: {total_reward}\t\t episode length: {steps}')
        
        stats['runtime'].append(time() - start)
        stats['reward'].append(total_reward)
        stats['reward_at'].append(reward_at)

    with open(f'stats_full_scripted_roni.json', 'w') as outfile:
      json.dump(stats, outfile)

    env.close()


if __name__ == '__main__':
    main()


Episode #1 reward: 0.0		 episode length: 376
GOT 3 LOGS
GOT 16 cobblestone
We at 163.0 REWARD
Episode #2 reward: 163.0		 episode length: 5961
Episode #3 reward: 0.0		 episode length: 4054
Episode #4 reward: 0.0		 episode length: 5895
Episode #5 reward: 0.0		 episode length: 5872
Episode #6 reward: 0.0		 episode length: 5911
Episode #7 reward: 0.0		 episode length: 2683
Episode #8 reward: 1.0		 episode length: 1853
Episode #9 reward: 0.0		 episode length: 849
GOT 3 LOGS
GOT 11 cobblestone
We at 99.0 REWARD
Episode #10 reward: 99.0		 episode length: 5841
