In [1]:
import gym
import time
import torch
import argparse
import numpy as np

from logger import Logger

import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.animation as animation

from copy import deepcopy
from argparse import Namespace

from option_critic import OptionCriticFeatures, OptionCriticConv
from option_critic import critic_loss as critic_loss_fn
from option_critic import actor_loss as actor_loss_fn

from train import run
from logger import Logger
from utils import to_tensor
from fourrooms import Fourrooms
from environment import make_env
from experience_replay import ReplayBuffer

## Parameters

In [2]:
ENV = "ltl_fourrooms"

In [3]:
env, is_atari = make_env("ltl_fourrooms", False)

In [4]:
env.spec.specs[0].ldba.delta

[{(): 0, ('phi',): 1, ('psi',): 0, ('phi', 'psi'): 1},
 {(): 2, ('phi',): 2, ('psi',): 0, ('phi', 'psi'): 1},
 {(): 2, ('phi',): 2, ('psi',): 2, ('phi', 'psi'): 2}]

In [5]:
env.spec.specs[0].ldba.state

0

In [6]:
env.spec.specs[0].ldba.reset()

(0, False)

In [7]:
env.spec.specs[0].ldba.step(('phi',))

(1, True)

In [8]:
env.spec.specs[0].ldba.acc

[{(): [None], ('phi',): [True], ('psi',): [True], ('phi', 'psi'): [True]},
 {(): [None], ('phi',): [None], ('psi',): [True], ('phi', 'psi'): [True]},
 {(): [None], ('phi',): [None], ('psi',): [None], ('phi', 'psi'): [None]}]

## Training

In [9]:
args = Namespace(batch_size=32, cuda=True, entropy_reg=0.01, env='ltl_fourrooms', 
                 epsilon_decay=20000, epsilon_min=0.1, epsilon_start=1.0, exp=None, 
                 frame_skip=4, freeze_interval=200, gamma=0.99, learning_rate=0.0005, 
                 logdir='runs', max_history=10000, max_steps_ep=18000, max_steps_total=4000000, 
                 num_options=4, optimal_eps=0.05, seed=0, switch_goal=True, temp=1, 
                 termination_reg=0.01, update_frequency=4, render=False, name="Standard")
# logger = Logger(
#         logdir=args.logdir, 
#         run_name=f"{OptionCriticFeatures.__name__}-{args.env}-{args.exp}-{time.ctime()}")

run(args)

Current goal {16: ('phi',), 17: ('psi',)}


2023/03/11 04:35:35 PM > ep 1 done. total_steps=1000 | reward=0 | episode_steps=1000 | hours=0.002 | epsilon=0.956
2023/03/11 04:35:39 PM > ep 2 done. total_steps=1936 | reward=1.0 | episode_steps=936 | hours=0.003 | epsilon=0.917
2023/03/11 04:35:40 PM > ep 3 done. total_steps=2397 | reward=8.0 | episode_steps=461 | hours=0.003 | epsilon=0.898
2023/03/11 04:35:44 PM > ep 4 done. total_steps=3397 | reward=10 | episode_steps=1000 | hours=0.004 | epsilon=0.859
2023/03/11 04:35:47 PM > ep 5 done. total_steps=4070 | reward=4.0 | episode_steps=673 | hours=0.005 | epsilon=0.834
2023/03/11 04:35:51 PM > ep 6 done. total_steps=4968 | reward=1.0 | episode_steps=898 | hours=0.006 | epsilon=0.802
2023/03/11 04:35:53 PM > ep 7 done. total_steps=5618 | reward=2.0 | episode_steps=650 | hours=0.007 | epsilon=0.780
2023/03/11 04:35:56 PM > ep 8 done. total_steps=6204 | reward=1.0 | episode_steps=586 | hours=0.008 | epsilon=0.760
2023/03/11 04:36:00 PM > ep 9 done. total_steps=7204 | reward=0 | episode

2023/03/11 04:36:54 PM > ep 71 done. total_steps=20470 | reward=1.0 | episode_steps=190 | hours=0.024 | epsilon=0.423
2023/03/11 04:36:54 PM > ep 72 done. total_steps=20496 | reward=1.0 | episode_steps=26 | hours=0.024 | epsilon=0.423
2023/03/11 04:36:54 PM > ep 73 done. total_steps=20540 | reward=1.0 | episode_steps=44 | hours=0.024 | epsilon=0.422
2023/03/11 04:36:57 PM > ep 74 done. total_steps=21112 | reward=6.0 | episode_steps=572 | hours=0.024 | epsilon=0.413
2023/03/11 04:36:59 PM > ep 75 done. total_steps=21627 | reward=12.0 | episode_steps=515 | hours=0.025 | epsilon=0.405
2023/03/11 04:36:59 PM > ep 76 done. total_steps=21716 | reward=4.0 | episode_steps=89 | hours=0.025 | epsilon=0.404
2023/03/11 04:37:00 PM > ep 77 done. total_steps=21835 | reward=3.0 | episode_steps=119 | hours=0.025 | epsilon=0.402
2023/03/11 04:37:00 PM > ep 78 done. total_steps=21866 | reward=2.0 | episode_steps=31 | hours=0.025 | epsilon=0.402
2023/03/11 04:37:01 PM > ep 79 done. total_steps=22094 | re

2023/03/11 04:37:38 PM > ep 141 done. total_steps=31241 | reward=14.0 | episode_steps=93 | hours=0.036 | epsilon=0.289
2023/03/11 04:37:38 PM > ep 142 done. total_steps=31248 | reward=3.0 | episode_steps=7 | hours=0.036 | epsilon=0.289
2023/03/11 04:37:38 PM > ep 143 done. total_steps=31347 | reward=9.0 | episode_steps=99 | hours=0.036 | epsilon=0.288
2023/03/11 04:37:38 PM > ep 144 done. total_steps=31361 | reward=1.0 | episode_steps=14 | hours=0.036 | epsilon=0.288
2023/03/11 04:37:38 PM > ep 145 done. total_steps=31408 | reward=2.0 | episode_steps=47 | hours=0.036 | epsilon=0.287
2023/03/11 04:37:39 PM > ep 146 done. total_steps=31544 | reward=32.0 | episode_steps=136 | hours=0.036 | epsilon=0.286
2023/03/11 04:37:40 PM > ep 147 done. total_steps=31674 | reward=7.0 | episode_steps=130 | hours=0.036 | epsilon=0.285
2023/03/11 04:37:40 PM > ep 148 done. total_steps=31824 | reward=21.0 | episode_steps=150 | hours=0.037 | epsilon=0.283
2023/03/11 04:37:41 PM > ep 149 done. total_steps=3

2023/03/11 04:38:10 PM > ep 210 done. total_steps=38858 | reward=8.0 | episode_steps=49 | hours=0.045 | epsilon=0.229
2023/03/11 04:38:11 PM > ep 211 done. total_steps=39138 | reward=32.0 | episode_steps=280 | hours=0.045 | epsilon=0.227
2023/03/11 04:38:11 PM > ep 212 done. total_steps=39257 | reward=34.0 | episode_steps=119 | hours=0.045 | epsilon=0.226
2023/03/11 04:38:12 PM > ep 213 done. total_steps=39309 | reward=1.0 | episode_steps=52 | hours=0.045 | epsilon=0.226
2023/03/11 04:38:12 PM > ep 214 done. total_steps=39319 | reward=3.0 | episode_steps=10 | hours=0.045 | epsilon=0.226
2023/03/11 04:38:13 PM > ep 215 done. total_steps=39522 | reward=54.0 | episode_steps=203 | hours=0.045 | epsilon=0.225
2023/03/11 04:38:13 PM > ep 216 done. total_steps=39643 | reward=14.0 | episode_steps=121 | hours=0.046 | epsilon=0.224
2023/03/11 04:38:13 PM > ep 217 done. total_steps=39687 | reward=4.0 | episode_steps=44 | hours=0.046 | epsilon=0.224
2023/03/11 04:38:13 PM > ep 218 done. total_step

2023/03/11 04:38:40 PM > ep 279 done. total_steps=45934 | reward=48.0 | episode_steps=143 | hours=0.053 | epsilon=0.191
2023/03/11 04:38:40 PM > ep 280 done. total_steps=45989 | reward=16.0 | episode_steps=55 | hours=0.053 | epsilon=0.190
2023/03/11 04:38:40 PM > ep 281 done. total_steps=46073 | reward=20.0 | episode_steps=84 | hours=0.053 | epsilon=0.190
2023/03/11 04:38:41 PM > ep 282 done. total_steps=46197 | reward=32.0 | episode_steps=124 | hours=0.053 | epsilon=0.189
2023/03/11 04:38:41 PM > ep 283 done. total_steps=46217 | reward=3.0 | episode_steps=20 | hours=0.053 | epsilon=0.189
2023/03/11 04:38:41 PM > ep 284 done. total_steps=46345 | reward=38.0 | episode_steps=128 | hours=0.053 | epsilon=0.189
2023/03/11 04:38:42 PM > ep 285 done. total_steps=46463 | reward=38.0 | episode_steps=118 | hours=0.054 | epsilon=0.188
2023/03/11 04:38:42 PM > ep 286 done. total_steps=46487 | reward=1.0 | episode_steps=24 | hours=0.054 | epsilon=0.188
2023/03/11 04:38:43 PM > ep 287 done. total_st

2023/03/11 04:39:14 PM > ep 348 done. total_steps=54863 | reward=9.0 | episode_steps=90 | hours=0.062 | epsilon=0.158
2023/03/11 04:39:14 PM > ep 349 done. total_steps=54970 | reward=13.0 | episode_steps=107 | hours=0.063 | epsilon=0.158
2023/03/11 04:39:14 PM > ep 350 done. total_steps=54989 | reward=4.0 | episode_steps=19 | hours=0.063 | epsilon=0.158
2023/03/11 04:39:14 PM > ep 351 done. total_steps=55021 | reward=2.0 | episode_steps=32 | hours=0.063 | epsilon=0.157
2023/03/11 04:39:14 PM > ep 352 done. total_steps=55045 | reward=3.0 | episode_steps=24 | hours=0.063 | epsilon=0.157
2023/03/11 04:39:15 PM > ep 353 done. total_steps=55234 | reward=45.0 | episode_steps=189 | hours=0.063 | epsilon=0.157
2023/03/11 04:39:15 PM > ep 354 done. total_steps=55283 | reward=5.0 | episode_steps=49 | hours=0.063 | epsilon=0.157
2023/03/11 04:39:16 PM > ep 355 done. total_steps=55420 | reward=15.0 | episode_steps=137 | hours=0.063 | epsilon=0.156
2023/03/11 04:39:17 PM > ep 356 done. total_steps=

2023/03/11 04:39:51 PM > ep 417 done. total_steps=65861 | reward=26.0 | episode_steps=264 | hours=0.073 | epsilon=0.133
2023/03/11 04:39:51 PM > ep 418 done. total_steps=65970 | reward=5.0 | episode_steps=109 | hours=0.073 | epsilon=0.133
2023/03/11 04:39:51 PM > ep 419 done. total_steps=65985 | reward=2.0 | episode_steps=15 | hours=0.073 | epsilon=0.133
2023/03/11 04:39:51 PM > ep 420 done. total_steps=66046 | reward=13.0 | episode_steps=61 | hours=0.073 | epsilon=0.133
2023/03/11 04:39:52 PM > ep 421 done. total_steps=66322 | reward=56.0 | episode_steps=276 | hours=0.073 | epsilon=0.133
2023/03/11 04:39:53 PM > ep 422 done. total_steps=66386 | reward=9.0 | episode_steps=64 | hours=0.073 | epsilon=0.133
2023/03/11 04:39:53 PM > ep 423 done. total_steps=66401 | reward=1.0 | episode_steps=15 | hours=0.073 | epsilon=0.133
2023/03/11 04:39:54 PM > ep 424 done. total_steps=66686 | reward=79.0 | episode_steps=285 | hours=0.074 | epsilon=0.132
2023/03/11 04:39:54 PM > ep 425 done. total_step

2023/03/11 04:40:24 PM > ep 486 done. total_steps=75626 | reward=8.0 | episode_steps=213 | hours=0.082 | epsilon=0.121
2023/03/11 04:40:25 PM > ep 487 done. total_steps=75842 | reward=79.0 | episode_steps=216 | hours=0.082 | epsilon=0.120
2023/03/11 04:40:25 PM > ep 488 done. total_steps=75903 | reward=14.0 | episode_steps=61 | hours=0.082 | epsilon=0.120
2023/03/11 04:40:26 PM > ep 489 done. total_steps=75996 | reward=2.0 | episode_steps=93 | hours=0.082 | epsilon=0.120
2023/03/11 04:40:26 PM > ep 490 done. total_steps=76131 | reward=39.0 | episode_steps=135 | hours=0.083 | epsilon=0.120
2023/03/11 04:40:26 PM > ep 491 done. total_steps=76151 | reward=2.0 | episode_steps=20 | hours=0.083 | epsilon=0.120
2023/03/11 04:40:26 PM > ep 492 done. total_steps=76242 | reward=17.0 | episode_steps=91 | hours=0.083 | epsilon=0.120
2023/03/11 04:40:27 PM > ep 493 done. total_steps=76422 | reward=48.0 | episode_steps=180 | hours=0.083 | epsilon=0.120
2023/03/11 04:40:28 PM > ep 494 done. total_ste

2023/03/11 04:41:10 PM > ep 555 done. total_steps=88071 | reward=15.0 | episode_steps=122 | hours=0.095 | epsilon=0.111
2023/03/11 04:41:10 PM > ep 556 done. total_steps=88125 | reward=3.0 | episode_steps=54 | hours=0.095 | epsilon=0.111
2023/03/11 04:41:12 PM > ep 557 done. total_steps=88637 | reward=90.0 | episode_steps=512 | hours=0.095 | epsilon=0.111
2023/03/11 04:41:12 PM > ep 558 done. total_steps=88644 | reward=3.0 | episode_steps=7 | hours=0.095 | epsilon=0.111
2023/03/11 04:41:13 PM > ep 559 done. total_steps=88956 | reward=75.0 | episode_steps=312 | hours=0.096 | epsilon=0.111
2023/03/11 04:41:14 PM > ep 560 done. total_steps=89033 | reward=18.0 | episode_steps=77 | hours=0.096 | epsilon=0.110
2023/03/11 04:41:14 PM > ep 561 done. total_steps=89104 | reward=22.0 | episode_steps=71 | hours=0.096 | epsilon=0.110
2023/03/11 04:41:15 PM > ep 562 done. total_steps=89325 | reward=54.0 | episode_steps=221 | hours=0.096 | epsilon=0.110
2023/03/11 04:41:15 PM > ep 563 done. total_ste

2023/03/11 04:41:57 PM > ep 624 done. total_steps=101013 | reward=27.0 | episode_steps=189 | hours=0.108 | epsilon=0.106
2023/03/11 04:41:57 PM > ep 625 done. total_steps=101073 | reward=6.0 | episode_steps=60 | hours=0.108 | epsilon=0.106
2023/03/11 04:41:58 PM > ep 626 done. total_steps=101254 | reward=24.0 | episode_steps=181 | hours=0.108 | epsilon=0.106
2023/03/11 04:42:00 PM > ep 627 done. total_steps=101762 | reward=74.0 | episode_steps=508 | hours=0.109 | epsilon=0.106
2023/03/11 04:42:00 PM > ep 628 done. total_steps=101880 | reward=13.0 | episode_steps=118 | hours=0.109 | epsilon=0.106
2023/03/11 04:42:01 PM > ep 629 done. total_steps=101949 | reward=10.0 | episode_steps=69 | hours=0.109 | epsilon=0.106
2023/03/11 04:42:01 PM > ep 630 done. total_steps=102019 | reward=5.0 | episode_steps=70 | hours=0.109 | epsilon=0.105
2023/03/11 04:42:01 PM > ep 631 done. total_steps=102057 | reward=6.0 | episode_steps=38 | hours=0.109 | epsilon=0.105
2023/03/11 04:42:01 PM > ep 632 done. t

2023/03/11 04:43:04 PM > ep 693 done. total_steps=119252 | reward=133.0 | episode_steps=557 | hours=0.126 | epsilon=0.102
2023/03/11 04:43:05 PM > ep 694 done. total_steps=119490 | reward=39.0 | episode_steps=238 | hours=0.127 | epsilon=0.102
2023/03/11 04:43:06 PM > ep 695 done. total_steps=119657 | reward=23.0 | episode_steps=167 | hours=0.127 | epsilon=0.102
2023/03/11 04:43:06 PM > ep 696 done. total_steps=119736 | reward=15.0 | episode_steps=79 | hours=0.127 | epsilon=0.102
2023/03/11 04:43:10 PM > ep 697 done. total_steps=120736 | reward=241 | episode_steps=1000 | hours=0.128 | epsilon=0.102
2023/03/11 04:43:10 PM > ep 698 done. total_steps=120807 | reward=6.0 | episode_steps=71 | hours=0.128 | epsilon=0.102
2023/03/11 04:43:10 PM > ep 699 done. total_steps=120971 | reward=34.0 | episode_steps=164 | hours=0.128 | epsilon=0.102
2023/03/11 04:43:13 PM > ep 700 done. total_steps=121554 | reward=150.0 | episode_steps=583 | hours=0.129 | epsilon=0.102
2023/03/11 04:43:13 PM > ep 701 d

2023/03/11 04:43:52 PM > ep 762 done. total_steps=132434 | reward=76.0 | episode_steps=272 | hours=0.140 | epsilon=0.101
2023/03/11 04:43:54 PM > ep 763 done. total_steps=132789 | reward=24.0 | episode_steps=355 | hours=0.140 | epsilon=0.101
2023/03/11 04:43:54 PM > ep 764 done. total_steps=133005 | reward=26.0 | episode_steps=216 | hours=0.140 | epsilon=0.101
2023/03/11 04:43:55 PM > ep 765 done. total_steps=133107 | reward=18.0 | episode_steps=102 | hours=0.141 | epsilon=0.101
2023/03/11 04:43:55 PM > ep 766 done. total_steps=133213 | reward=28.0 | episode_steps=106 | hours=0.141 | epsilon=0.101
2023/03/11 04:43:56 PM > ep 767 done. total_steps=133363 | reward=1.0 | episode_steps=150 | hours=0.141 | epsilon=0.101
2023/03/11 04:43:56 PM > ep 768 done. total_steps=133529 | reward=50.0 | episode_steps=166 | hours=0.141 | epsilon=0.101
2023/03/11 04:43:56 PM > ep 769 done. total_steps=133538 | reward=2.0 | episode_steps=9 | hours=0.141 | epsilon=0.101
2023/03/11 04:43:57 PM > ep 770 done

2023/03/11 04:44:31 PM > ep 831 done. total_steps=143157 | reward=24.0 | episode_steps=109 | hours=0.151 | epsilon=0.101
2023/03/11 04:44:31 PM > ep 832 done. total_steps=143201 | reward=12.0 | episode_steps=44 | hours=0.151 | epsilon=0.101
2023/03/11 04:44:32 PM > ep 833 done. total_steps=143471 | reward=61.0 | episode_steps=270 | hours=0.151 | epsilon=0.101
2023/03/11 04:44:34 PM > ep 834 done. total_steps=143818 | reward=73.0 | episode_steps=347 | hours=0.151 | epsilon=0.101
2023/03/11 04:44:34 PM > ep 835 done. total_steps=144005 | reward=35.0 | episode_steps=187 | hours=0.152 | epsilon=0.101
2023/03/11 04:44:35 PM > ep 836 done. total_steps=144235 | reward=55.0 | episode_steps=230 | hours=0.152 | epsilon=0.101
2023/03/11 04:44:35 PM > ep 837 done. total_steps=144330 | reward=14.0 | episode_steps=95 | hours=0.152 | epsilon=0.101
2023/03/11 04:44:36 PM > ep 838 done. total_steps=144441 | reward=31.0 | episode_steps=111 | hours=0.152 | epsilon=0.101
2023/03/11 04:44:36 PM > ep 839 do

2023/03/11 04:45:12 PM > ep 899 done. total_steps=154533 | reward=3.0 | episode_steps=12 | hours=0.162 | epsilon=0.100
2023/03/11 04:45:12 PM > ep 900 done. total_steps=154549 | reward=1.0 | episode_steps=16 | hours=0.162 | epsilon=0.100
2023/03/11 04:45:12 PM > ep 901 done. total_steps=154648 | reward=26.0 | episode_steps=99 | hours=0.162 | epsilon=0.100
2023/03/11 04:45:12 PM > ep 902 done. total_steps=154760 | reward=26.0 | episode_steps=112 | hours=0.162 | epsilon=0.100
2023/03/11 04:45:13 PM > ep 903 done. total_steps=154982 | reward=52.0 | episode_steps=222 | hours=0.162 | epsilon=0.100
2023/03/11 04:45:13 PM > ep 904 done. total_steps=155017 | reward=12.0 | episode_steps=35 | hours=0.162 | epsilon=0.100
2023/03/11 04:45:14 PM > ep 905 done. total_steps=155148 | reward=44.0 | episode_steps=131 | hours=0.163 | epsilon=0.100
2023/03/11 04:45:15 PM > ep 906 done. total_steps=155432 | reward=80.0 | episode_steps=284 | hours=0.163 | epsilon=0.100
2023/03/11 04:45:15 PM > ep 907 done. 

2023/03/11 04:45:49 PM > ep 968 done. total_steps=165142 | reward=21.0 | episode_steps=81 | hours=0.172 | epsilon=0.100
2023/03/11 04:45:49 PM > ep 969 done. total_steps=165171 | reward=5.0 | episode_steps=29 | hours=0.172 | epsilon=0.100
2023/03/11 04:45:50 PM > ep 970 done. total_steps=165524 | reward=97.0 | episode_steps=353 | hours=0.173 | epsilon=0.100
2023/03/11 04:45:51 PM > ep 971 done. total_steps=165647 | reward=40.0 | episode_steps=123 | hours=0.173 | epsilon=0.100
2023/03/11 04:45:51 PM > ep 972 done. total_steps=165710 | reward=17.0 | episode_steps=63 | hours=0.173 | epsilon=0.100
2023/03/11 04:45:51 PM > ep 973 done. total_steps=165913 | reward=64.0 | episode_steps=203 | hours=0.173 | epsilon=0.100
2023/03/11 04:45:52 PM > ep 974 done. total_steps=166080 | reward=47.0 | episode_steps=167 | hours=0.173 | epsilon=0.100
2023/03/11 04:45:53 PM > ep 975 done. total_steps=166291 | reward=47.0 | episode_steps=211 | hours=0.173 | epsilon=0.100
2023/03/11 04:45:53 PM > ep 976 done

New goal {52: ('phi',), 46: ('psi',)}


2023/03/11 04:46:10 PM > ep 1001 done. total_steps=171592 | reward=0 | episode_steps=1000 | hours=0.178 | epsilon=0.100
2023/03/11 04:46:12 PM > ep 1002 done. total_steps=172592 | reward=0 | episode_steps=1000 | hours=0.179 | epsilon=0.100
2023/03/11 04:46:14 PM > ep 1003 done. total_steps=173202 | reward=7.0 | episode_steps=610 | hours=0.179 | epsilon=0.100
2023/03/11 04:46:17 PM > ep 1004 done. total_steps=174202 | reward=0 | episode_steps=1000 | hours=0.180 | epsilon=0.100
2023/03/11 04:46:19 PM > ep 1005 done. total_steps=174898 | reward=1.0 | episode_steps=696 | hours=0.181 | epsilon=0.100
2023/03/11 04:46:21 PM > ep 1006 done. total_steps=175898 | reward=0 | episode_steps=1000 | hours=0.181 | epsilon=0.100
2023/03/11 04:46:22 PM > ep 1007 done. total_steps=176088 | reward=1.0 | episode_steps=190 | hours=0.181 | epsilon=0.100
2023/03/11 04:46:23 PM > ep 1008 done. total_steps=176489 | reward=1.0 | episode_steps=401 | hours=0.182 | epsilon=0.100
2023/03/11 04:46:26 PM > ep 1009 don

2023/03/11 04:47:05 PM > ep 1069 done. total_steps=192026 | reward=78.0 | episode_steps=117 | hours=0.194 | epsilon=0.100
2023/03/11 04:47:06 PM > ep 1070 done. total_steps=192334 | reward=161.0 | episode_steps=308 | hours=0.194 | epsilon=0.100
2023/03/11 04:47:07 PM > ep 1071 done. total_steps=192445 | reward=4.0 | episode_steps=111 | hours=0.194 | epsilon=0.100
2023/03/11 04:47:07 PM > ep 1072 done. total_steps=192496 | reward=31.0 | episode_steps=51 | hours=0.194 | epsilon=0.100
2023/03/11 04:47:07 PM > ep 1073 done. total_steps=192605 | reward=10.0 | episode_steps=109 | hours=0.194 | epsilon=0.100
2023/03/11 04:47:07 PM > ep 1074 done. total_steps=192677 | reward=3.0 | episode_steps=72 | hours=0.194 | epsilon=0.100
2023/03/11 04:47:10 PM > ep 1075 done. total_steps=193611 | reward=263.0 | episode_steps=934 | hours=0.195 | epsilon=0.100
2023/03/11 04:47:11 PM > ep 1076 done. total_steps=194132 | reward=55.0 | episode_steps=521 | hours=0.195 | epsilon=0.100
2023/03/11 04:47:12 PM > e

2023/03/11 04:48:17 PM > ep 1137 done. total_steps=218202 | reward=116.0 | episode_steps=141 | hours=0.213 | epsilon=0.100
2023/03/11 04:48:17 PM > ep 1138 done. total_steps=218255 | reward=3.0 | episode_steps=53 | hours=0.213 | epsilon=0.100
2023/03/11 04:48:18 PM > ep 1139 done. total_steps=218389 | reward=73.0 | episode_steps=134 | hours=0.214 | epsilon=0.100
2023/03/11 04:48:18 PM > ep 1140 done. total_steps=218600 | reward=100.0 | episode_steps=211 | hours=0.214 | epsilon=0.100
2023/03/11 04:48:19 PM > ep 1141 done. total_steps=218749 | reward=57.0 | episode_steps=149 | hours=0.214 | epsilon=0.100
2023/03/11 04:48:20 PM > ep 1142 done. total_steps=219041 | reward=163.0 | episode_steps=292 | hours=0.214 | epsilon=0.100
2023/03/11 04:48:21 PM > ep 1143 done. total_steps=219550 | reward=324.0 | episode_steps=509 | hours=0.215 | epsilon=0.100
2023/03/11 04:48:21 PM > ep 1144 done. total_steps=219608 | reward=23.0 | episode_steps=58 | hours=0.215 | epsilon=0.100
2023/03/11 04:48:22 PM 

2023/03/11 04:49:14 PM > ep 1205 done. total_steps=238818 | reward=47.0 | episode_steps=55 | hours=0.229 | epsilon=0.100
2023/03/11 04:49:14 PM > ep 1206 done. total_steps=239017 | reward=65.0 | episode_steps=199 | hours=0.229 | epsilon=0.100
2023/03/11 04:49:15 PM > ep 1207 done. total_steps=239345 | reward=198.0 | episode_steps=328 | hours=0.230 | epsilon=0.100
2023/03/11 04:49:16 PM > ep 1208 done. total_steps=239432 | reward=47.0 | episode_steps=87 | hours=0.230 | epsilon=0.100
2023/03/11 04:49:16 PM > ep 1209 done. total_steps=239548 | reward=65.0 | episode_steps=116 | hours=0.230 | epsilon=0.100
2023/03/11 04:49:16 PM > ep 1210 done. total_steps=239773 | reward=154.0 | episode_steps=225 | hours=0.230 | epsilon=0.100
2023/03/11 04:49:17 PM > ep 1211 done. total_steps=239924 | reward=54.0 | episode_steps=151 | hours=0.230 | epsilon=0.100
2023/03/11 04:49:18 PM > ep 1212 done. total_steps=240516 | reward=521.0 | episode_steps=592 | hours=0.230 | epsilon=0.100
2023/03/11 04:49:20 PM 

2023/03/11 04:50:09 PM > ep 1273 done. total_steps=258981 | reward=52.0 | episode_steps=55 | hours=0.244 | epsilon=0.100
2023/03/11 04:50:09 PM > ep 1274 done. total_steps=258988 | reward=3.0 | episode_steps=7 | hours=0.244 | epsilon=0.100
2023/03/11 04:50:09 PM > ep 1275 done. total_steps=259050 | reward=41.0 | episode_steps=62 | hours=0.244 | epsilon=0.100
2023/03/11 04:50:09 PM > ep 1276 done. total_steps=259221 | reward=75.0 | episode_steps=171 | hours=0.245 | epsilon=0.100
2023/03/11 04:50:10 PM > ep 1277 done. total_steps=259404 | reward=154.0 | episode_steps=183 | hours=0.245 | epsilon=0.100
2023/03/11 04:50:11 PM > ep 1278 done. total_steps=259656 | reward=91.0 | episode_steps=252 | hours=0.245 | epsilon=0.100
2023/03/11 04:50:11 PM > ep 1279 done. total_steps=259829 | reward=98.0 | episode_steps=173 | hours=0.245 | epsilon=0.100
2023/03/11 04:50:12 PM > ep 1280 done. total_steps=260052 | reward=120.0 | episode_steps=223 | hours=0.245 | epsilon=0.100
2023/03/11 04:50:13 PM > ep

2023/03/11 04:51:08 PM > ep 1341 done. total_steps=280632 | reward=461.0 | episode_steps=880 | hours=0.261 | epsilon=0.100
2023/03/11 04:51:11 PM > ep 1342 done. total_steps=281632 | reward=0 | episode_steps=1000 | hours=0.262 | epsilon=0.100
2023/03/11 04:51:11 PM > ep 1343 done. total_steps=281706 | reward=41.0 | episode_steps=74 | hours=0.262 | epsilon=0.100
2023/03/11 04:51:14 PM > ep 1344 done. total_steps=282706 | reward=268 | episode_steps=1000 | hours=0.263 | epsilon=0.100
2023/03/11 04:51:14 PM > ep 1345 done. total_steps=282730 | reward=14.0 | episode_steps=24 | hours=0.263 | epsilon=0.100
2023/03/11 04:51:14 PM > ep 1346 done. total_steps=282815 | reward=22.0 | episode_steps=85 | hours=0.263 | epsilon=0.100
2023/03/11 04:51:14 PM > ep 1347 done. total_steps=282882 | reward=46.0 | episode_steps=67 | hours=0.263 | epsilon=0.100
2023/03/11 04:51:17 PM > ep 1348 done. total_steps=283882 | reward=0 | episode_steps=1000 | hours=0.263 | epsilon=0.100
2023/03/11 04:51:20 PM > ep 134

2023/03/11 04:52:34 PM > ep 1409 done. total_steps=312207 | reward=91.0 | episode_steps=232 | hours=0.285 | epsilon=0.100
2023/03/11 04:52:35 PM > ep 1410 done. total_steps=312329 | reward=105.0 | episode_steps=122 | hours=0.285 | epsilon=0.100
2023/03/11 04:52:35 PM > ep 1411 done. total_steps=312362 | reward=2.0 | episode_steps=33 | hours=0.285 | epsilon=0.100
2023/03/11 04:52:35 PM > ep 1412 done. total_steps=312433 | reward=34.0 | episode_steps=71 | hours=0.285 | epsilon=0.100
2023/03/11 04:52:35 PM > ep 1413 done. total_steps=312474 | reward=31.0 | episode_steps=41 | hours=0.285 | epsilon=0.100
2023/03/11 04:52:36 PM > ep 1414 done. total_steps=312945 | reward=282.0 | episode_steps=471 | hours=0.285 | epsilon=0.100
2023/03/11 04:52:36 PM > ep 1415 done. total_steps=313052 | reward=61.0 | episode_steps=107 | hours=0.285 | epsilon=0.100
2023/03/11 04:52:37 PM > ep 1416 done. total_steps=313088 | reward=28.0 | episode_steps=36 | hours=0.285 | epsilon=0.100
2023/03/11 04:52:38 PM > ep

2023/03/11 04:53:14 PM > ep 1477 done. total_steps=326875 | reward=3.0 | episode_steps=10 | hours=0.296 | epsilon=0.100
2023/03/11 04:53:15 PM > ep 1478 done. total_steps=327161 | reward=193.0 | episode_steps=286 | hours=0.296 | epsilon=0.100
2023/03/11 04:53:15 PM > ep 1479 done. total_steps=327181 | reward=3.0 | episode_steps=20 | hours=0.296 | epsilon=0.100
2023/03/11 04:53:16 PM > ep 1480 done. total_steps=327329 | reward=55.0 | episode_steps=148 | hours=0.296 | epsilon=0.100
2023/03/11 04:53:16 PM > ep 1481 done. total_steps=327540 | reward=55.0 | episode_steps=211 | hours=0.296 | epsilon=0.100
2023/03/11 04:53:17 PM > ep 1482 done. total_steps=327698 | reward=119.0 | episode_steps=158 | hours=0.297 | epsilon=0.100
2023/03/11 04:53:17 PM > ep 1483 done. total_steps=327899 | reward=19.0 | episode_steps=201 | hours=0.297 | epsilon=0.100
2023/03/11 04:53:17 PM > ep 1484 done. total_steps=327984 | reward=1.0 | episode_steps=85 | hours=0.297 | epsilon=0.100
2023/03/11 04:53:18 PM > ep 

2023/03/11 04:54:07 PM > ep 1545 done. total_steps=346306 | reward=114 | episode_steps=1000 | hours=0.311 | epsilon=0.100
2023/03/11 04:54:08 PM > ep 1546 done. total_steps=346591 | reward=1.0 | episode_steps=285 | hours=0.311 | epsilon=0.100
2023/03/11 04:54:08 PM > ep 1547 done. total_steps=346634 | reward=10.0 | episode_steps=43 | hours=0.311 | epsilon=0.100
2023/03/11 04:54:08 PM > ep 1548 done. total_steps=346697 | reward=1.0 | episode_steps=63 | hours=0.311 | epsilon=0.100
2023/03/11 04:54:09 PM > ep 1549 done. total_steps=346829 | reward=90.0 | episode_steps=132 | hours=0.311 | epsilon=0.100
2023/03/11 04:54:10 PM > ep 1550 done. total_steps=347379 | reward=142.0 | episode_steps=550 | hours=0.311 | epsilon=0.100
2023/03/11 04:54:11 PM > ep 1551 done. total_steps=347561 | reward=15.0 | episode_steps=182 | hours=0.312 | epsilon=0.100
2023/03/11 04:54:11 PM > ep 1552 done. total_steps=347685 | reward=9.0 | episode_steps=124 | hours=0.312 | epsilon=0.100
2023/03/11 04:54:12 PM > ep 

2023/03/11 04:55:15 PM > ep 1613 done. total_steps=371162 | reward=115.0 | episode_steps=219 | hours=0.330 | epsilon=0.100
2023/03/11 04:55:16 PM > ep 1614 done. total_steps=371412 | reward=196.0 | episode_steps=250 | hours=0.330 | epsilon=0.100
2023/03/11 04:55:16 PM > ep 1615 done. total_steps=371468 | reward=38.0 | episode_steps=56 | hours=0.330 | epsilon=0.100
2023/03/11 04:55:19 PM > ep 1616 done. total_steps=372468 | reward=0 | episode_steps=1000 | hours=0.331 | epsilon=0.100
2023/03/11 04:55:22 PM > ep 1617 done. total_steps=373468 | reward=115 | episode_steps=1000 | hours=0.331 | epsilon=0.100
2023/03/11 04:55:24 PM > ep 1618 done. total_steps=374194 | reward=109.0 | episode_steps=726 | hours=0.332 | epsilon=0.100
2023/03/11 04:55:25 PM > ep 1619 done. total_steps=374610 | reward=34.0 | episode_steps=416 | hours=0.332 | epsilon=0.100
2023/03/11 04:55:27 PM > ep 1620 done. total_steps=375610 | reward=107 | episode_steps=1000 | hours=0.333 | epsilon=0.100
2023/03/11 04:55:30 PM >

2023/03/11 04:56:22 PM > ep 1681 done. total_steps=395342 | reward=1.0 | episode_steps=319 | hours=0.348 | epsilon=0.100
2023/03/11 04:56:22 PM > ep 1682 done. total_steps=395519 | reward=129.0 | episode_steps=177 | hours=0.348 | epsilon=0.100
2023/03/11 04:56:23 PM > ep 1683 done. total_steps=395864 | reward=38.0 | episode_steps=345 | hours=0.348 | epsilon=0.100
2023/03/11 04:56:24 PM > ep 1684 done. total_steps=396104 | reward=106.0 | episode_steps=240 | hours=0.349 | epsilon=0.100
2023/03/11 04:56:26 PM > ep 1685 done. total_steps=396653 | reward=339.0 | episode_steps=549 | hours=0.349 | epsilon=0.100
2023/03/11 04:56:26 PM > ep 1686 done. total_steps=397004 | reward=4.0 | episode_steps=351 | hours=0.349 | epsilon=0.100
2023/03/11 04:56:27 PM > ep 1687 done. total_steps=397275 | reward=89.0 | episode_steps=271 | hours=0.350 | epsilon=0.100
2023/03/11 04:56:28 PM > ep 1688 done. total_steps=397385 | reward=42.0 | episode_steps=110 | hours=0.350 | epsilon=0.100
2023/03/11 04:56:29 PM 

2023/03/11 04:57:22 PM > ep 1749 done. total_steps=417293 | reward=200.0 | episode_steps=251 | hours=0.365 | epsilon=0.100
2023/03/11 04:57:22 PM > ep 1750 done. total_steps=417412 | reward=46.0 | episode_steps=119 | hours=0.365 | epsilon=0.100
2023/03/11 04:57:24 PM > ep 1751 done. total_steps=417851 | reward=28.0 | episode_steps=439 | hours=0.365 | epsilon=0.100
2023/03/11 04:57:24 PM > ep 1752 done. total_steps=417902 | reward=26.0 | episode_steps=51 | hours=0.365 | epsilon=0.100
2023/03/11 04:57:24 PM > ep 1753 done. total_steps=418073 | reward=1.0 | episode_steps=171 | hours=0.365 | epsilon=0.100
2023/03/11 04:57:26 PM > ep 1754 done. total_steps=418629 | reward=52.0 | episode_steps=556 | hours=0.366 | epsilon=0.100
2023/03/11 04:57:27 PM > ep 1755 done. total_steps=418957 | reward=1.0 | episode_steps=328 | hours=0.366 | epsilon=0.100
2023/03/11 04:57:27 PM > ep 1756 done. total_steps=419257 | reward=4.0 | episode_steps=300 | hours=0.366 | epsilon=0.100
2023/03/11 04:57:28 PM > ep

2023/03/11 04:58:12 PM > ep 1817 done. total_steps=435661 | reward=6.0 | episode_steps=210 | hours=0.379 | epsilon=0.100
2023/03/11 04:58:12 PM > ep 1818 done. total_steps=435747 | reward=4.0 | episode_steps=86 | hours=0.379 | epsilon=0.100
2023/03/11 04:58:13 PM > ep 1819 done. total_steps=436244 | reward=92.0 | episode_steps=497 | hours=0.379 | epsilon=0.100
2023/03/11 04:58:14 PM > ep 1820 done. total_steps=436380 | reward=2.0 | episode_steps=136 | hours=0.379 | epsilon=0.100
2023/03/11 04:58:14 PM > ep 1821 done. total_steps=436506 | reward=16.0 | episode_steps=126 | hours=0.379 | epsilon=0.100
2023/03/11 04:58:16 PM > ep 1822 done. total_steps=437094 | reward=60.0 | episode_steps=588 | hours=0.380 | epsilon=0.100
2023/03/11 04:58:16 PM > ep 1823 done. total_steps=437142 | reward=8.0 | episode_steps=48 | hours=0.380 | epsilon=0.100
2023/03/11 04:58:18 PM > ep 1824 done. total_steps=437850 | reward=117.0 | episode_steps=708 | hours=0.380 | epsilon=0.100
2023/03/11 04:58:18 PM > ep 1

2023/03/11 05:00:03 PM > ep 1885 done. total_steps=476598 | reward=0 | episode_steps=1000 | hours=0.410 | epsilon=0.100
2023/03/11 05:00:03 PM > ep 1886 done. total_steps=476612 | reward=2.0 | episode_steps=14 | hours=0.410 | epsilon=0.100
2023/03/11 05:00:04 PM > ep 1887 done. total_steps=476894 | reward=2.0 | episode_steps=282 | hours=0.410 | epsilon=0.100
2023/03/11 05:00:07 PM > ep 1888 done. total_steps=477894 | reward=0 | episode_steps=1000 | hours=0.411 | epsilon=0.100
2023/03/11 05:00:08 PM > ep 1889 done. total_steps=478272 | reward=3.0 | episode_steps=378 | hours=0.411 | epsilon=0.100
2023/03/11 05:00:10 PM > ep 1890 done. total_steps=478969 | reward=1.0 | episode_steps=697 | hours=0.411 | epsilon=0.100
2023/03/11 05:00:13 PM > ep 1891 done. total_steps=479969 | reward=0 | episode_steps=1000 | hours=0.412 | epsilon=0.100
2023/03/11 05:00:15 PM > ep 1892 done. total_steps=480761 | reward=1.0 | episode_steps=792 | hours=0.413 | epsilon=0.100
2023/03/11 05:00:18 PM > ep 1893 don

2023/03/11 05:01:53 PM > ep 1953 done. total_steps=516769 | reward=5.0 | episode_steps=386 | hours=0.440 | epsilon=0.100
2023/03/11 05:01:54 PM > ep 1954 done. total_steps=517330 | reward=1.0 | episode_steps=561 | hours=0.440 | epsilon=0.100
2023/03/11 05:01:55 PM > ep 1955 done. total_steps=517688 | reward=1.0 | episode_steps=358 | hours=0.441 | epsilon=0.100
2023/03/11 05:01:58 PM > ep 1956 done. total_steps=518688 | reward=0 | episode_steps=1000 | hours=0.441 | epsilon=0.100
2023/03/11 05:02:01 PM > ep 1957 done. total_steps=519688 | reward=0 | episode_steps=1000 | hours=0.442 | epsilon=0.100
2023/03/11 05:02:01 PM > ep 1958 done. total_steps=519734 | reward=1.0 | episode_steps=46 | hours=0.442 | epsilon=0.100
2023/03/11 05:02:01 PM > ep 1959 done. total_steps=519804 | reward=2.0 | episode_steps=70 | hours=0.442 | epsilon=0.100
2023/03/11 05:02:04 PM > ep 1960 done. total_steps=520723 | reward=2.0 | episode_steps=919 | hours=0.443 | epsilon=0.100
2023/03/11 05:02:05 PM > ep 1961 don

## Loading

In [5]:
args = Namespace(batch_size=32, cuda=True, entropy_reg=0.01, env=ENV, 
                 epsilon_decay=20000, epsilon_min=0.1, epsilon_start=1.0, exp=None, 
                 frame_skip=4, freeze_interval=200, gamma=0.99, learning_rate=0.0005, 
                 logdir='runs', max_history=10000, max_steps_ep=18000, max_steps_total=4000000, 
                 num_options=4, optimal_eps=0.05, seed=0, switch_goal=True, temp=1, 
                 termination_reg=0.01, update_frequency=4, render=False, name="Not LTL")


## Instantiate Env
env, is_atari = make_env(ENV, 0, True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
devide='cpu'
## Instantiate Model
option_critic = OptionCriticFeatures(
        in_features=env.observation_space.shape[0],
        num_actions=env.action_space.n,
        num_options=args.num_options,
        temperature=args.temp,
        eps_start=0,
        eps_min=0,
        eps_decay=0,
        eps_test=0,
        device=device
    )

## Load model params
params = torch.load("models/Standard_4_0_1k", map_location=torch.device('cpu'))
option_critic.load_state_dict(params["model_params"])
option_critic.eval()

## Load Env goal
env.init_states = list(env.tocell.keys())
env.goal = {}
env.switch_goal(params["goal_state"])
print("Goal State: {}".format(params["goal_state"]))

Goal State: 62


In [6]:
params["goal_state"]

62

## Policy Graphs

In [16]:
env_states = 104
env_spec = 3
            
for i in range(env_spec):
    
    fig, ax = plt.subplots(figsize=(15, 15))    
#     ax.set_title('State {}'.format(i))
    
    # Plot Grid
    matrice = ax.matshow(env.occupancy, vmin=0, vmax=1)

    # Annulus
    if ENV=="ltl_fourrooms":
        goal_pos = [list(env.tocell[key]) for key in env.goal]
    else:
        goal_pos = [list(env.tocell[env.goal])]
    
    for pos in goal_pos:
        pos.reverse()
    for pos in goal_pos:
        annulus = patches.Annulus(pos, r=0.25, width=0.01, color='red')
        ax.add_patch(annulus)
    
    for j in range(env_states):
        env_obs = torch.zeros(env_states)
        spec_obs[i] = 1
        
        if ENV=="ltl_fourrooms":
            spec_obs = torch.zeros(env_spec)
            env_obs[j] = 1
            obs = torch.concat([env_obs, spec_obs])
        else:
            obs = env_obs
        
        state = option_critic.get_state(to_tensor(obs))
        greedy_option  = option_critic.greedy_option(state)
        action, logp, entropy = option_critic.get_action(state, greedy_option)
        
        pos = list(env.tocell[j])
        pos.reverse()
        
        dir_ = env.directions[action] 
        
        arrow = patches.Arrow(pos[0], pos[1], dir_[0]*0.3, dir_[1]*0.3, width=0.25, color='green')
        ax.add_patch(arrow)
        if ENV=="ltl_fourrooms":
            goal = {env.goal[key][0]: env.tocell[key]  for key in env.goal}
            ax.set_title("{}. {}".format(i, goal))

    plt.show()

In [None]:
env.

In [13]:
goal_pos

[[9, 7]]

## Run

In [None]:
## Instantiate Env
env, is_atari = make_env("fourrooms", True)
device = torch.device('cuda' if torch.cuda.is_available() and args.cuda else 'cpu')
device='cpu'
## Instantiate Model
option_critic = OptionCriticFeatures(
        in_features=env.observation_space.shape[0],
        num_actions=env.action_space.n,
        num_options=args.num_options,
        temperature=args.temp,
        eps_start=0,
        eps_min=0,
        eps_decay=0,
        eps_test=0,
        device=device
    )

## Load model params
params = torch.load("models/Standard_4_0_1k", map_location=torch.device('cpu'))
option_critic.load_state_dict(params["model_params"])
option_critic.eval()

## Load Env goal
env.init_states = list(env.tocell.keys())
env.goal = {}
env.switch_goal(params["goal_state"])

obs   = env.reset()
state = option_critic.get_state(to_tensor(obs))
current_option = option_critic.greedy_option(state)
rewards = 0
while True:        
    
    action, logp, entropy = option_critic.get_action(state, current_option)
    next_obs, reward, done, _ = env.step(action)
    rewards += reward
    
    state = option_critic.get_state(to_tensor(next_obs))
    option_termination, current_option = option_critic.predict_option_termination(state, current_option)

    if done:
        obs   = env.reset()
        print(rewards)
        rewards = 0
#         break

#     logger.log_data(reward)

1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
