In [26]:
from collections import namedtuple
import os
import argparse

import torch
import torch.multiprocessing as mp

import my_optim
from fun import FeudalNet

%run fun_agent
%run ../hyper_params
%run ../preprocessor
%run ../game

parser = argparse.ArgumentParser(description='Feudal Net with A3C setup')
parser.add_argument('--lr', type=float, default=0.0003,  # try LogUniform(1e-4.5, 1e-3.5)
                    help='learning rate')
parser.add_argument('--alpha', type=float, default=0.8,
                    help='intrinsic reward multiplier')
parser.add_argument('--gamma-worker', type=float, default=0.95,
                    help='worker discount factor for rewards')
parser.add_argument('--gamma-manager', type=float, default=0.99,
                    help='manager discount factor for rewards')
parser.add_argument('--tau-worker', type=float, default=1.00,
                    help='parameter for GAE (worker only)')
parser.add_argument('--entropy-coef', type=float, default=0.01,
                    help='entropy term coefficient (also called beta)')
parser.add_argument('--value-worker-loss-coef', type=float, default=1,
                    help='worker value loss coefficient')
parser.add_argument('--value-manager-loss-coef', type=float, default=1,
                    help='manager value loss coefficient')
parser.add_argument('--max-grad-norm', type=float, default=40,
                    help='value loss coefficient')
parser.add_argument('--seed', type=int, default=123,
                    help='random seed')
parser.add_argument('--num-processes', type=int, default=4,
                    help='how many training processes to use')
parser.add_argument('--num-steps', type=int, default=400,
                    help='number of forward steps in A3C')
parser.add_argument('--max-episode-length', type=int, default=1000000,
                    help='maximum length of an episode')
parser.add_argument('--env-name', default='PongDeterministic-v4',
                    help='environment to train on (default: PongDeterministic-v4)')
parser.add_argument('--no-shared', default=False,
                    help='use an optimizer without shared momentum.')

"""
if __name__ == "__main__":
    os.environ['OMP_NUM_THREADS'] = '1'
    os.environ['CUDA_VISIBLE_DEVICES'] = ""
    mp.set_start_method('spawn')

    args = parser.parse_args()

    torch.manual_seed(args.seed)
    env = create_atari_env(args.env_name)
    shared_model = FeudalNet(env.observation_space, env.action_space, channel_first=True)
    shared_model.share_memory()

    if args.no_shared:
        optimizer = None
    else:
        optimizer = my_optim.SharedAdam(shared_model.parameters(), lr=args.lr)
        optimizer.share_memory()

    processes = []

    counter = mp.Value('i', 0)
    lock = mp.Lock()

    import socket
    from datetime import datetime
    current_time = datetime.now().strftime('%b%d_%H-%M-%S')
    log_dir = os.path.join('runs', current_time + '_' + socket.gethostname())

    p = mp.Process(target=test, args=(args.num_processes, shared_model, counter, log_dir, lock, args))
    p.start()
    processes.append(p)

    for rank in range(0, args.num_processes):
        p = mp.Process(target=train, args=(rank, shared_model, counter, log_dir, lock, optimizer, args))
        p.start()
        processes.append(p)
    for p in processes:
        p.join()
"""

params = get_param_dict("../hyperparameters.json")


learning_rate = params["learning_rate"]
seed = params["seed"]
num_steps = params["num_steps"]
gamma = params["gamma"]
gamma_worker = params["gamma_worker"]
gamma_manager = params["gamma_manager"]
alpha = params["alpha"]
tau_worker = params["tau_worker"]
entropy_coef = params["entropy_coef"]
value_worker_loss_coef = params["value_worker_loss_coef"]
value_manager_loss_coef = params["value_manager_loss_coef"]
max_grad_norm = params["max_grad_norm"]
        
n_frames = params["n_frames"]
n_channels = params["n_channels"]
original_width = params["original_width"]
original_height = params["original_height"]
scaled_width = params["scaled_width"]
scaled_height = params["scaled_height"]
rgb = params["rgb"]

game_visible = params["game_visible"]
mario_scale = params["mario_scale"]
mario_state = params["mario_state"]
mario_timer = params["mario_timer"]
mario_fps = params["mario_fps"]
level_path = params["level_path"]
#level_path = "/levels/custom/flat.txt",
preprocess = Preprocessor(n_frames, n_channels, original_height, original_width, scaled_height, scaled_width)
game = Game(game_visible, mario_scale, mario_state, mario_timer, mario_fps, level_path, preprocess, rgb)

shared_model = FeudalNet(game.observation_space, game.action_space, channel_first=True)
print(type(shared_model))
agent = FunAgent(game, shared_model, None, seed, learning_rate, num_steps, gamma, gamma_worker, gamma_manager, alpha, tau_worker, entropy_coef, value_worker_loss_coef, value_manager_loss_coef, max_grad_norm, 1)
#agent.train()

shared_model = game.load_model("./models/epochs_100_gap_1_fun.pt")
shared_model.worker = game.load_model("./models/epochs_100_gap_1_worker.pt")
shared_model.manager = game.load_model("./models/epochs_100_gap_1_manager.pt")
shared_model.perception = game.load_model("./models/epochs_100_gap_1_perception.pt")

agent.play()

<class 'fun.FeudalNet'>
Loading model
Loading model
Loading model
Loading model
no shared optimizer
Episode 0
tensor([[0.1219, 0.1292, 0.1257, 0.1264, 0.1268, 0.1283, 0.1186, 0.1232]],
       grad_fn=<SoftmaxBackward>)
tensor([[0.1180, 0.1356, 0.1264, 0.1287, 0.1326, 0.1313, 0.1094, 0.1181]],
       grad_fn=<SoftmaxBackward>)
tensor([[0.1124, 0.1433, 0.1276, 0.1273, 0.1389, 0.1316, 0.0994, 0.1194]],
       grad_fn=<SoftmaxBackward>)
tensor([[0.1072, 0.1475, 0.1287, 0.1293, 0.1395, 0.1428, 0.0917, 0.1133]],
       grad_fn=<SoftmaxBackward>)
tensor([[0.1043, 0.1556, 0.1277, 0.1300, 0.1449, 0.1442, 0.0825, 0.1109]],
       grad_fn=<SoftmaxBackward>)
tensor([[0.0989, 0.1631, 0.1307, 0.1313, 0.1500, 0.1372, 0.0738, 0.1151]],
       grad_fn=<SoftmaxBackward>)
tensor([[0.0916, 0.1642, 0.1304, 0.1335, 0.1521, 0.1525, 0.0686, 0.1071]],
       grad_fn=<SoftmaxBackward>)
tensor([[0.0868, 0.1786, 0.1295, 0.1286, 0.1488, 0.1554, 0.0631, 0.1092]],
       grad_fn=<SoftmaxBackward>)
tensor([[0.0832, 0

tensor([[0.0602, 0.1519, 0.1277, 0.1277, 0.1546, 0.1870, 0.0396, 0.1513]],
       grad_fn=<SoftmaxBackward>)
tensor([[0.0592, 0.1493, 0.1268, 0.1302, 0.1452, 0.1946, 0.0404, 0.1542]],
       grad_fn=<SoftmaxBackward>)
tensor([[0.0619, 0.1470, 0.1277, 0.1252, 0.1597, 0.1716, 0.0425, 0.1643]],
       grad_fn=<SoftmaxBackward>)
tensor([[0.0615, 0.1466, 0.1315, 0.1384, 0.1711, 0.1662, 0.0383, 0.1464]],
       grad_fn=<SoftmaxBackward>)
tensor([[0.0608, 0.1487, 0.1263, 0.1248, 0.1612, 0.1897, 0.0390, 0.1495]],
       grad_fn=<SoftmaxBackward>)
tensor([[0.0595, 0.1474, 0.1269, 0.1290, 0.1457, 0.1973, 0.0405, 0.1536]],
       grad_fn=<SoftmaxBackward>)
tensor([[0.0590, 0.1455, 0.1248, 0.1294, 0.1470, 0.1963, 0.0406, 0.1573]],
       grad_fn=<SoftmaxBackward>)
tensor([[0.0590, 0.1443, 0.1247, 0.1285, 0.1413, 0.2090, 0.0404, 0.1527]],
       grad_fn=<SoftmaxBackward>)
tensor([[0.0593, 0.1431, 0.1251, 0.1264, 0.1376, 0.2193, 0.0400, 0.1491]],
       grad_fn=<SoftmaxBackward>)
tensor([[0.0596, 0.

KeyboardInterrupt: 