# Configururation

### Enviornment Configuration

In [1]:
ENV_USE_FRAME_STACK = False
ENV_FRAME_STACK_COUNT = 4
ENV_USE_GRAYSCALE_OBSERVATION = False

### Model Configuration

In [2]:
MODEL_USE_CNN = True
MODEL_DEVICE = "auto" # "auto", "cuda", "cpu", or "mps"

### Traning Configuration

In [8]:
TRAINING_NAME = "CNN-GRAYSCALE-PRI_REPLAY"                # Name for the current training (Used for checkpoint-name and log-name)
TRAINING_EVAL_FREQUENCY = 5000                      # How often the agent should be evaluated
TRAINING_EVAL_TIMES = 5                             # How many simulations to perform per evaluation

TRAINING_TOTAL_TIMESTEPS = 300000                   # Total number timesteps before training is complete

TRAINING_LEARNING_RATE = 1e-5                       # Learning rate
TRAINING_LEARNING_STARTS_AT = 10000                 # Timesteps to perform before learing begins

TRAINING_REPLAY_BUFFER_SIZE = 100000                # Number of episode-replays to save in the replay buffer
TRAINING_BATCH_SIZE = 50                            # Number of replays to pick each gradient
TRAINING_TARGET_UPDATE_INTERVAL = 1                 # How often the target network will be updated by online network
TRAINING_GRADIENT_STEPS = -1                        # How many gradient steps shall be performed per training

TRAINING_USE_PRIORITIZED_REPLAY = False             # Wether or not to use Prioritized Replay
TRAINING_PRIORITIZED_REPLAY_EPS = 1e-5              # Epsilon of prioritized replay
TRAINING_PRIORITIZED_REPLAY_BETA_START = 1.0        # Start beta of prioritized replay, beta controls the importance of prioritized replays
TRAINING_PRIORITIZED_REPLAY_BETA_END = 0.1          # End beta-value
TRAINING_PRIORITIZED_REPLAY_BETA_FRACTION = 0.55    # Fraction of total training time for beta-decrease

TRAINING_EPS_START = 1.0                            # Start value for epsilon, epsilon controls exploration rate
TRAINING_EPS_END = 0.0095                           # End value for epsilon
TRAINING_EPS_FRACTION = 0.6                         # Fraction of total training time for epsilon-decrease

TRAINING_GAMMA = 0.99                               # Static gamma, controls how importance uncertain rewards in far future (1.0) and the ones in the near future (0.0)
TRAINING_TAU = 0.975                                 # Static tau, controls how much of the online network shall be copied to the target network at each copy.

# Setup Minedojo Environment

In [4]:
from Environments import SkyRunner, MultithreadGym

# Multithreaded environment wrapper
env = MultithreadGym.MultithreadGym(thread_int=1, env_int=1,
    frame_stack=ENV_USE_FRAME_STACK,
    frames_int=ENV_FRAME_STACK_COUNT,
    use_grayscale=ENV_USE_GRAYSCALE_OBSERVATION
)

[INFO:minedojo.tasks] Loaded 1572 Programmatic tasks, 1558 Creative tasks, and 1 special task: "Playthrough". Totally 3131 tasks loaded.


starting Reloader 0
ThreadID: 0 has received an enviornment from queue. Reset of environement is being prepeared


# Load EVAL-Enviornment

In [5]:
# Evaluation environment
eval_env = SkyRunner.CustomEnv(
    frame_stack=ENV_USE_FRAME_STACK,
    frames_int=ENV_FRAME_STACK_COUNT,
    use_grayscale=ENV_USE_GRAYSCALE_OBSERVATION)
eval_env.reset()

[INFO:minedojo.tasks] Loaded 1572 Programmatic tasks, 1558 Creative tasks, and 1 special task: "Playthrough". Totally 3131 tasks loaded.
[INFO:minedojo.tasks] Loaded 1572 Programmatic tasks, 1558 Creative tasks, and 1 special task: "Playthrough". Totally 3131 tasks loaded.


array([[[22, 24, 26, ..., 29, 27, 24],
        [24, 25, 28, ..., 30, 29, 26],
        [26, 28, 30, ..., 33, 31, 28],
        ...,
        [47, 23, 34, ..., 46, 44, 42],
        [29, 39, 51, ..., 45, 43, 40],
        [20, 46, 22, ..., 44, 41, 38]]], dtype=uint8)

# Begin training

In [9]:
import train_openAI
import importlib

importlib.reload(train_openAI)

train_openAI.train(
    env=env,
    eval_env=eval_env,
    name=TRAINING_NAME,
    eval_freq=TRAINING_EVAL_FREQUENCY,                  
    n_eval_episodes=TRAINING_EVAL_TIMES,                           
    total_timesteps=TRAINING_TOTAL_TIMESTEPS,                 
    learning_rate=TRAINING_LEARNING_RATE,                     
    learning_starts=0,
    buffer_size=TRAINING_REPLAY_BUFFER_SIZE,              
    batch_size=TRAINING_BATCH_SIZE,                          
    target_update_interval=TRAINING_TARGET_UPDATE_INTERVAL,              
    gradient_steps=TRAINING_GRADIENT_STEPS,                       
    use_prioritized_replay=TRAINING_USE_PRIORITIZED_REPLAY,           
    prioritized_replay_eps=TRAINING_PRIORITIZED_REPLAY_EPS,           
    prioritized_replay_initial_beta=TRAINING_PRIORITIZED_REPLAY_BETA_START,      
    prioritized_replay_final_beta=TRAINING_PRIORITIZED_REPLAY_BETA_END,        
    prioritized_replay_beta_fraction=TRAINING_PRIORITIZED_REPLAY_BETA_FRACTION,  
    exploration_initial_eps=TRAINING_EPS_START,                          
    exploration_final_eps=TRAINING_EPS_END,                         
    exploration_fraction=TRAINING_EPS_FRACTION ,                       
    gamma=TRAINING_GAMMA,                            
    tau=TRAINING_TAU,
    use_cnn=MODEL_USE_CNN,
    device=MODEL_DEVICE                               
)

Inventory and weather cleared!
Inventory and weather cleared!
Broken block detected. Moving to location 82
Inventory and weather cleared!
Inventory and weather cleared!
Broken block detected. Moving to location 83
Inventory and weather cleared!
Inventory and weather cleared!
Broken block detected. Moving to location 84
Inventory and weather cleared!
Broken block detected. Moving to location 85
Inventory and weather cleared!
Broken block detected. Moving to location 86
Inventory and weather cleared!
Broken block detected. Moving to location 87
Inventory and weather cleared!
Inventory and weather cleared!
Inventory and weather cleared!
Inventory and weather cleared!
Inventory and weather cleared!
Inventory and weather cleared!
Inventory and weather cleared!
Inventory and weather cleared!
Broken block detected. Moving to location 88
Inventory and weather cleared!
Broken block detected. Moving to location 89
Inventory and weather cleared!
Inventory and weather cleared!
Broken block detecte

KeyboardInterrupt: 

# Preview Trained Model

In [None]:
from stable_baselines3 import DQN
from CustomBaselines3.DoubleDQN import DoubleDQN

model = DoubleDQN.load("./dDQN-checkpoints/" + TRAINING_NAME + "/final_model.zip")

obs = env.reset()
acc_r = 0
while True:
    act, st = model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(act)

    acc_r += reward

    env.render()

    if done:
        obs = env.reset()
        print("Finished with reward %d" % acc_r)
        acc_r = 0

# Shutdown Environments

In [None]:
env.close()
eval_env.close()