In [1]:
from obstacle_tower_env import ObstacleTowerEnv

import time as systime
import numpy as np
from collections import deque

import torch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
from Source.Agents import DoubleDQN_Agent
from Source.Buffer import Buffer, SortedBuffer
from Source.Utils  import Tracker, Converter

SyntaxError: invalid syntax (Models.py, line 109)

In [None]:
UPDATE_EVERY = 4
DISPLAY_EVERY = 100
EPOCHS = 2500
PREFIX = ""
SUFFIX = "M3_sb"
MODEL_NAME = "DQN_3"

tracker = Tracker(DISPLAY_EVERY)

----

In [None]:
env = ObstacleTowerEnv('./ObstacleTower/obstacletower', retro=False, realtime_mode=False)

In [None]:
action_size = np.prod(env.action_space.nvec.tolist()).item()
state_size  = list(env.observation_space[0].shape)

#buffer  = Buffer(buffer_size = 1e4, batch_size = 64)
buffer  = SortedBuffer(buffer_size = 1e4, batch_size = 64)

agent   = DoubleDQN_Agent(state_size=state_size, action_size=action_size, model_name=MODEL_NAME, buffer=buffer,
                        device=device, n_iter=1, learning_rate=2e-4) 


tracker.load_levels(agent.get_folder(prefix=PREFIX, suffix=SUFFIX))
agent.load_weights(prefix=PREFIX, suffix=SUFFIX)

local_buffer = []

In [None]:
training_clock = systime.time()

for epoch in range(len(tracker.levels)):
    agent.step_begin()
    agent.step_end()

for epoch in range(len(tracker.levels), EPOCHS):
    env_info = env.reset()
    
    # extract data from environment
    state = Converter.ProcessState(env_info[0])
    initial_time, curr_level = env_info[2], env_info[3]

    # define parameters
    last_time, time, done = initial_time, initial_time, False
    acc_reward = 0 
    
    # Play a level
    count = 0
    agent.step_begin()
    while not done and time > 0:
        # Act
        action = agent.act(state=state)
        env_info = env.step(Converter.OneHot2Action(action[0]))
        next_state = Converter.ProcessState(env_info[0][0])
        level, time, done = env_info[-1]['current_floor'], env_info[-1]['time_remaining'], env_info[2]   
        
        if done:
            buffer.add(state=state, action=action, reward=-1, next_state=next_state, done=done)
            break
            
        # Check if the player cleared a level
        if level > curr_level:
            # Dump stored states in local buffer and reset
            for item in local_buffer:
                buffer.add(state=item[0], action=item[1], reward=1/initial_time, next_state=item[2], done=item[3])
            local_buffer.clear()
            
            # Add high reward for final step
            reward, curr_level, initial_time = 1 + max(0,last_time) / initial_time, level, time
            buffer.add(state=state, action=action, reward=reward, next_state=next_state, done=done)
            acc_reward += reward
        else:
            local_buffer.append( (state, action, next_state, done) )
            reward = -1 / initial_time
            
        # check if the simulation is done
        state, last_time = next_state, time
        count += 1

        if count % UPDATE_EVERY == 0:
            agent.step_update()
        
    # Dump stored states in local buffer and reset
    for item in local_buffer:
        buffer.add(state=item[0], action=item[1], reward=1/initial_time, next_state=item[2], done=item[3])
    local_buffer.clear()
    
    tracker.add(reward=acc_reward, level=curr_level)
    tracker.save_levels(agent.get_folder(prefix=PREFIX, suffix=SUFFIX))
    agent.save(prefix=PREFIX, suffix=SUFFIX)
    
    # Display step info
    if (epoch+1) % DISPLAY_EVERY == 0:
        tracker.display(epoch=epoch, total_epochs=EPOCHS, clock=systime.time()-training_clock, end="\n")
        training_clock = systime.time()
    else:
        estimated_clock = (systime.time()-training_clock) * (DISPLAY_EVERY / ( (epoch+1) % DISPLAY_EVERY) - 1)
        tracker.display(epoch=epoch, total_epochs=EPOCHS, clock=estimated_clock, end="\r")
        
    buffer.update()
    agent.step_end()

In [None]:
torch.cuda.empty_cache()
env.close()

In [None]:
X = np.array([x.reward for x in buffer.memory])

In [None]:
X.mean()

In [None]:
X.std()

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.boxplot(X)

In [None]:
sum(X>0)

In [None]:
sum(X<=0.1)

In [None]:
sum(X > 0.1)