# Setup

In [None]:
%%capture
!sudo add-apt-repository -y ppa:openjdk-r/ppa
!sudo apt-get purge openjdk-*
!sudo apt-get install openjdk-8-jdk
!sudo apt-get install xvfb xserver-xephyr vnc4server python-opengl ffmpeg

In [None]:
%%capture
!pip install --upgrade 'setuptools==57.5.0'
!pip install 'gym==0.19.0'


In [None]:
!pip install --upgrade wheel==0.38.4

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wheel==0.38.4
  Downloading wheel-0.38.4-py3-none-any.whl (36 kB)
Installing collected packages: wheel
  Attempting uninstall: wheel
    Found existing installation: wheel 0.40.0
    Uninstalling wheel-0.40.0:
      Successfully uninstalled wheel-0.40.0
Successfully installed wheel-0.38.4


In [None]:
#%%capture
!pip install --upgrade minerl
!pip install pyvirtualdisplay

!pip install scikit-learn
!pip install -U colabgymrender

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting minerl
  Downloading minerl-0.4.4.tar.gz (70.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.1/70.1 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gym<0.20,>=0.13.1 (from minerl)
  Using cached gym-0.19.0.tar.gz (1.6 MB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting Pyro4>=4.76 (from minerl)
  Downloading Pyro4-4.82-py2.py3-none-any.whl (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.0/90.0 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting coloredlogs>=10.0 (from minerl)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m261.1 kB/s[0m eta [36m0:00:00[0m
Collecting dill>=0.3.1.1 (from minerl)
  Downloading dill-0.3.6-py3-none-any.w

In [None]:
# download if needed
# # !apt update 
# !apt install -y python-numpy python-dev cmake zlib1g-dev libjpeg-dev xvfb ffmpeg xorg-dev python-opengl libboost-all-dev libsdl2-dev swig

# !pip3 install pyvirtualdisplay piglet gym torch torchvision
# # !pip3 install "gym[atari]"

In [None]:
!apt-get install xorg openbox
!apt-get install xvfb

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following additional packages will be installed:
  apport apport-symptoms aspell aspell-en bubblewrap desktop-file-utils
  dictionaries-common dmidecode docbook-xml dosfstools eject emacsen-common
  enchant-2 gdisk giblib1 gnome-icon-theme gnome-terminal gnome-terminal-data
  gstreamer1.0-gl gstreamer1.0-plugins-base gstreamer1.0-plugins-good
  gstreamer1.0-pulseaudio gstreamer1.0-x gvfs gvfs-common gvfs-daemons
  gvfs-libs hunspell-en-us keyboard-configuration libaa1 libaspell15
  libatasmart4 libblockdev-crypto2 libblockdev-fs2 libblockdev-loop2
  libblockdev-part-err2 libblockdev-part2 libblockdev-swap2 libblockdev-utils2
  libblockdev2 libcdparanoia0 libdv4 libenchant-2-2 libgck-1-0 libgcr-base-3-1
  libgpgme11 libgraphene-1.0-0 libgstreamer-gl1.0-0
  libgstreamer-plugins-good1.0-0 libharfbuzz-icu0 libhunspell-1.7-0 libhyphen0
  libid3tag0 libimlib2 libjavascriptcoregtk-4.0-18 libloc

# Import Libraries

In [None]:
import random
import numpy as np
import torch as th
from torch import nn
import gym
import minerl
from tqdm.notebook import tqdm
from colabgymrender.recorder import Recorder
from gym.wrappers import Monitor
from time import time
import torch

from pyvirtualdisplay import Display
from sklearn.cluster import KMeans
import logging
logging.disable(logging.ERROR)



# Neural network

In [None]:
class NatureCNN(nn.Module):
    def __init__(self, input_shape, output_dim):
        super(NatureCNN).__init__()
        n_input_channels = input_shape[0]
        self.cnn = nn.Sequential(
            nn.Conv2d(n_input_channels, 32, kernel_size=8, stride=4, padding=0),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.Flatten(),
        )

        # Compute shape by doing one forward pass
        with th.no_grad():
            n_flatten = self.cnn(th.zeros(1, *input_shape)).shape[1]

        self.linear = nn.Sequential(
            nn.Linear(n_flatten, 512),
            nn.ReLU(),
            nn.Linear(512, output_dim)
        )

    def forward(self, observations) -> th.Tensor:
        return self.linear(self.cnn(observations))

# Setup training

In [None]:
def train():
    data = minerl.data.make("MineRLObtainIronPickaxeVectorObf-v0",  data_dir='data', num_workers=1)

    all_actions = []
    all_pov_obs = []

    print("Loading data")
    trajectory_names = data.get_trajectory_names()
    random.shuffle(trajectory_names)

    # Add trajectories to the data until we reach the required DATA_SAMPLES.
    for trajectory_name in trajectory_names:
        trajectory = data.load_data(trajectory_name, skip_interval=0, include_metadata=False)
        for dataset_observation, dataset_action, _, _, _ in trajectory:
            all_actions.append(dataset_action["vector"])
            all_pov_obs.append(dataset_observation["pov"])
        if len(all_actions) >= DATA_SAMPLES:
            break

    all_actions = np.array(all_actions)
    all_pov_obs = np.array(all_pov_obs)

    # Run k-means clustering using scikit-learn.
    print("Running KMeans on the action vectors")
    kmeans = KMeans(n_clusters=NUM_ACTION_CENTROIDS)
    kmeans.fit(all_actions)
    action_centroids = kmeans.cluster_centers_
    print("KMeans done")

    network = NatureCNN((3, 64, 64), NUM_ACTION_CENTROIDS).cuda()
    optimizer = th.optim.Adam(network.parameters(), lr=LEARNING_RATE)
    loss_function = nn.CrossEntropyLoss()

    num_samples = all_actions.shape[0]
    update_count = 0
    losses = []

    print("Training")
    for E in range(EPOCHS):
        # Randomize the order in which we go over the samples
        epoch_indices = np.arange(num_samples)
        np.random.shuffle(epoch_indices)
        for batch_i in range(0, num_samples, BATCH_SIZE):
            batch_indices = epoch_indices[batch_i:batch_i + BATCH_SIZE]

            obs = all_pov_obs[batch_indices].astype(np.float32)
            obs = obs.transpose(0, 3, 1, 2)

            action_vectors = all_actions[batch_indices]
            distances = np.sum((action_vectors - action_centroids[:, None]) ** 2, axis=2)
            actions = np.argmin(distances, axis=0)

            logits = network(th.from_numpy(obs).float().cuda())
            loss = loss_function(logits, th.from_numpy(actions).long().cuda())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            update_count += 1
            losses.append(loss.item())
            if (update_count % 1000) == 0:
                mean_loss = sum(losses) / len(losses)
                tqdm.write("Iteration {}. Loss {:<10.3f}".format(update_count, mean_loss))
                losses.clear()
        
    print("Training done")

    # Save network and the centroids into separate files
    np.save(TRAIN_KMEANS_MODEL_NAME, action_centroids)
    th.save(network.state_dict(), f"{E+1}_"+TRAIN_MODEL_NAME)
    del data

# Parameters

In [None]:
# Parameters:
EPOCHS = 20  
LEARNING_RATE = 0.0001 
BATCH_SIZE = 32
NUM_ACTION_CENTROIDS = 100  # Number of KMeans centroids used to cluster the data.

DATA_SAMPLES = 300000  # how many samples to use from the dataset. Impacts RAM usage

TRAIN_MODEL_NAME = 'CNN_Model.pth'  
TEST_MODEL_NAME = 'CNN_Model.pth'  
TRAIN_KMEANS_MODEL_NAME = 'centroids_for_model.npy' 
TEST_KMEANS_MODEL_NAME = 'centroids_for_model.npy'  

TEST_EPISODES = 10  # number of episodes to test the agent for.
MAX_TEST_EPISODE_LEN = 18000  #NOTE: gym has it's own limit

# Download the data

In [None]:
minerl.data.download(directory='data', environment='MineRLObtainIronPickaxeVectorObf-v0');


Download: https://minerl.s3.amazonaws.com/v4/MineRLObtainDiamondVectorObf-v0.tar: 100%|██████████| 4017.0/4016.70144 [04:57<00:00, 13.50MB/s]


# Train

In [None]:
#forgets enviroment in case of error
!rm -rf /content/logs
!rm -rf /content/video
display.stop()

<pyvirtualdisplay.display.Display at 0x7f18d015fa90>

In [None]:
# run only once!!!
display = Display(visible=0, size=(400, 300))
display.start();

In [None]:
train()  # only need to run this once.

# Start Minecraft

In [None]:
env = gym.make('MineRLObtainDiamondVectorObf-v0')

# #record the agent play if you want
# env = Recorder(env, './video', fps=60)

In [None]:
stats = {'runtime': [], 'reward': [],'reward_at':[]}

models_path="/content/check/"

In [None]:
from time import time

action_centroids = np.load(models_path+TEST_KMEANS_MODEL_NAME)
network = NatureCNN((3, 64, 64), NUM_ACTION_CENTROIDS).cuda()
network.load_state_dict(th.load(models_path+TEST_MODEL_NAME))

num_actions = action_centroids.shape[0]
action_list = np.arange(num_actions)

for episode in range(10):
    env.seed(episode)
    obs = env.reset()
    start = time()
    done = False
    rewards=[]
    reward_sum=0
    steps = 0

    while not done:
        obs = th.from_numpy(obs['pov'].transpose(2, 0, 1)[None].astype(np.float32) / 255).cuda()
        
        probabilities = th.softmax(network(obs), dim=1)[0]
        probabilities = probabilities.detach().cpu().numpy()

        discrete_action = np.random.choice(action_list, p=probabilities)

        # Map the discrete action to the corresponding action centroid (vector)
        action = action_centroids[discrete_action]
        minerl_action = {"vector": action}

        obs, reward, done, info = env.step(minerl_action)

        steps += 1
        if steps%1000==0:
            print(f"step: {steps}")
        reward_sum += reward
        if reward > 0:
            rewards.append((steps,reward))
            print(reward_sum)
        if reward==256:
            print("Met target")
            break

    stats['runtime'].append(time() - start)
    stats['reward'].append(reward_sum)
    stats['reward_at'].append(rewards)

    print(f'Episode #{episode + 1} reward: {reward_sum}\t\t episode length: {steps}\n')

1.0
step: 1000
step: 2000
step: 3000
step: 4000
step: 5000
step: 6000


In [None]:
# #save stats
# import json
# with open('basic_bc.json', 'w') as outfile:
#     json.dump(stats, outfile)