In [1]:
import sys, os
sys.path.append(os.path.abspath('../..'))

In [2]:
from tqdm.notebook import tqdm
import math
import torch
import torch.optim as optim 
from torch.utils.tensorboard import SummaryWriter
from collections import deque

from environments.simplified_KGW import KGW
from networks.dqn_KGW import DQN
from utils.KGW_memory import ReplayMemory
from utils.optimization import standard_optimization
from utils.KGW_utils import fp, ActionSelector, evaluate

In [3]:
env = KGW()
c,h,w = c,h,w = fp(env.reset()).shape
n_actions = env.n_actions

In [4]:
BATCH_SIZE = 32
LR = 0.0000625
GAMMA = 0.99
EPS_START = 1.
EPS_END = 0.1
EPS_DECAY = 1000000
TARGET_UPDATE = 10000
NUM_STEPS = 30000000
M_SIZE = 500000
POLICY_UPDATE = 4
EVALUATE_FREQ = 100000
SAVE_FREQ = 100000

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # if gpu is to be used
policy_net = DQN(n_actions).to(device)
target_net = DQN(n_actions).to(device)
policy_net.apply(policy_net.init_weights)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()
optimizer = optim.Adam(policy_net.parameters(), lr=LR, eps=1.5e-4)

In [6]:
memory = ReplayMemory(M_SIZE, [5 * c,h,w], n_actions, device)
action_selector = ActionSelector(EPS_START, EPS_END, policy_net, EPS_DECAY, n_actions, device)

In [7]:
steps_done = 0
writer = SummaryWriter()

In [8]:
q = deque(maxlen=5)
done=True
eps = 0
episode_len = 0

In [9]:
progressive = tqdm(range(NUM_STEPS), total=NUM_STEPS, ncols=400, leave=False, unit='b')
for step in progressive:
  if done:
    env.reset()
    sum_reward = 0
    episode_len = 0
    img, _, _, _ = env.step(1) # BREAKOUT specific !!!
    for i in range(10): # no-op
      n_frame, _, _, _ = env.step(0)
      n_frame = fp(n_frame)
      q.append(n_frame)
        
  train = len(memory) > 50000
  # Select and perform an action
  state = torch.cat(list(q))[8:].unsqueeze(0)
  action, eps = action_selector.select_action(state, training=train)
  n_frame, reward, done, info = env.step(action)
  n_frame = fp(n_frame)

  # 5 frame as memory
  q.append(n_frame)
  memory.push(torch.cat(list(q)).unsqueeze(0), action, reward, done) # here the n_frame means next frame from the previous time step
  episode_len += 1

  # Perform one step of the optimization (on the target network)
  if step % POLICY_UPDATE == 0:
    loss = standard_optimization(policy_net, target_net, optimizer, memory, training=train, batch_size=BATCH_SIZE)
    if loss is not None:
      writer.add_scalar('Performance/loss', loss, step)
    
  # Update the target network, copying all weights and biases in DQN
  if step % TARGET_UPDATE == 0:
    target_net.load_state_dict(policy_net.state_dict())
    
  if step % EVALUATE_FREQ == 0:
    evaluated_reward = evaluate(step, policy_net, device, env, n_actions, eps=0.05, num_episode=15)
    writer.add_scalar('Performance/reward', evaluated_reward, step)
    
  if step % SAVE_FREQ == 0:
    torch.save(policy_net, "models/dqn_expert_KGW_model")

HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=30000000.0), HTML(value='')), layout=Layo…

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/tony/anaconda3/envs/ml/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-9-0435b7044196>", line 27, in <module>
    loss = standard_optimization(policy_net, target_net, optimizer, memory, training=train, batch_size=BATCH_SIZE)
  File "/mnt/hdd1/ml/Active-Actor-Mimic/utils/optimization.py", line 11, in standard_optimization
    batch_size)
  File "/mnt/hdd1/ml/Active-Actor-Mimic/utils/KGW_memory.py", line 31, in sample
    ba = self.m_actions[i].to(self.device)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/tony/anaconda3/envs/ml/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2044, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'KeyboardInterrupt' object has no attribute '_render_traceback_'

Dur

KeyboardInterrupt: 

In [None]:
torch.save(policy_net, "models/dqn_expert_KGW_model")

In [None]:
policy_net = torch.load("models/dqn_expert_KGW_model")