In [1]:
from absl import flags
import os, traceback
import run_bc

FLAGS = flags.FLAGS

EXPERT_PATHS = {
    "HalfCheetah-v2":        "experts/HalfCheetah-v2_25.pkl",
    "Hopper-v2":             "experts/Hopper-v2_25.pkl",
    "Humanoid-v2":           "experts/Humanoid-v2_25.pkl",
    "Ant-v2":                "experts/Ant-v2_25.pkl",
    "Walker2d-v2":           "experts/Walker2d-v2_25.pkl",
    "AdroitHandHammer-v1":   "experts/hammer-expert-v1_top100.pkl",
}

ENVS  = ["HalfCheetah-v2", "Hopper-v2", "Humanoid-v2", "Ant-v2", "Walker2d-v2", "AdroitHandHammer-v1"]
DEMOS = [3, 10]
SEEDS = [0, 1, 2, 3, 4]

LOGDIR = "logs"
EVAL_EPS = 10

def run_bc_job(env_name: str, demos: int, seed: int):
    argv = [
        "run_bc.py",
        f"--env_name={env_name}",
        f"--seed={seed}",
        f"--num_demonstrations={demos}",
        "--expert_backend=local",
        f"--expert_path={EXPERT_PATHS[env_name]}",
        f"--evaluation_episodes={EVAL_EPS}",
        f"--logdir={LOGDIR}",
    ]

    # In notebooks we parse flags manually (no app.run). Reset between runs, then parse.
    try:
        FLAGS.unparse_flags()  # clears previously parsed values (works in absl>=0.13)
    except Exception:
        pass
    FLAGS(argv)              # parse our argv for this run
    print(f"[BC] env={env_name} demos={demos} seed={seed}  -> starting")
    run_bc.main(None)        # call main() directly
    print(f"[BC] env={env_name} demos={demos} seed={seed}  -> done\n")

# Sweep everything
for env in ENVS:
    for demos in DEMOS:
        for seed in SEEDS:
            try:
                run_bc_job(env, demos, seed)
            except SystemExit:
                # absl may call sys.exit() in some pathways; treat as normal completion
                print(f"[BC] env={env} demos={demos} seed={seed} -> SystemExit (treated as complete)")
            except Exception as e:
                print(f"[ERROR] env={env} demos={demos} seed={seed}: {e}")
                traceback.print_exc()

print("All BC runs finished. CSVs are under logs/bc/<env>/d<demos>/s<seed>/progress.csv")


Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
  if (distutils.version.LooseVersion(tf.__version__) <


[BC] env=HalfCheetah-v2 demos=3 seed=0  -> starting


  logger.warn(
  from distutils.dep_util import newer, newer_group
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpt69lhxih.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpt69lhxih
[reverb/cc/platform/default/server.cc:71] Started replay server on port 33081
[reverb/cc/client.cc:165] Sampler and server are owned by the same process (45885) so Table priority_table is accessed directly without gRPC.
[reverb/cc/client.cc:165] Sampler and server are owned by the same process (45885) so Table priority_table is accessed directly without gRPC.
[reverb/cc/client.cc:165] Sampler and server are owned by the same process (45885) so Table priority_table is accessed directly without gRPC.
[reverb/cc/client.cc:165] Sampler and server are owned by the same process (45885) so Table priority_table is accessed directly without gRPC.
[reverb/cc/platform/default/server.cc:84] Shutting down replay server
[reverb/cc/pl

[BC] env=HalfCheetah-v2 demos=3 seed=0  -> done

[BC] env=HalfCheetah-v2 demos=3 seed=1  -> starting


[reverb/cc/platform/default/server.cc:84] Shutting down replay server
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpaee2h0jx.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpaee2h0jx
[reverb/cc/platform/default/server.cc:71] Started replay server on port 33543
[reverb/cc/client.cc:165] Sampler and server are owned by the same process (45885) so Table priority_table is accessed directly without gRPC.


[BC] env=HalfCheetah-v2 demos=3 seed=1  -> done

[BC] env=HalfCheetah-v2 demos=3 seed=2  -> starting


[reverb/cc/platform/default/server.cc:84] Shutting down replay server
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpd3jp3tjw.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpd3jp3tjw
[reverb/cc/platform/default/server.cc:71] Started replay server on port 40617


[BC] env=HalfCheetah-v2 demos=3 seed=2  -> done

[BC] env=HalfCheetah-v2 demos=3 seed=3  -> starting


[reverb/cc/platform/default/server.cc:84] Shutting down replay server
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmp0kdq1noo.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmp0kdq1noo
[reverb/cc/platform/default/server.cc:71] Started replay server on port 41105


[BC] env=HalfCheetah-v2 demos=3 seed=3  -> done

[BC] env=HalfCheetah-v2 demos=3 seed=4  -> starting


[reverb/cc/platform/default/server.cc:84] Shutting down replay server
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmp8uwe2c64.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmp8uwe2c64
[reverb/cc/platform/default/server.cc:71] Started replay server on port 44621
[reverb/cc/client.cc:165] Sampler and server are owned by the same process (45885) so Table priority_table is accessed directly without gRPC.


[BC] env=HalfCheetah-v2 demos=3 seed=4  -> done

[BC] env=HalfCheetah-v2 demos=10 seed=0  -> starting


[reverb/cc/platform/default/server.cc:84] Shutting down replay server
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpqwem9cvl.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpqwem9cvl
[reverb/cc/platform/default/server.cc:71] Started replay server on port 36099


[BC] env=HalfCheetah-v2 demos=10 seed=0  -> done

[BC] env=HalfCheetah-v2 demos=10 seed=1  -> starting


[reverb/cc/platform/default/server.cc:84] Shutting down replay server
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmp9gmo268s.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmp9gmo268s
[reverb/cc/platform/default/server.cc:71] Started replay server on port 43773


[BC] env=HalfCheetah-v2 demos=10 seed=1  -> done

[BC] env=HalfCheetah-v2 demos=10 seed=2  -> starting


[reverb/cc/platform/default/server.cc:84] Shutting down replay server
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmp7klxa5e6.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmp7klxa5e6
[reverb/cc/platform/default/server.cc:71] Started replay server on port 39829


[BC] env=HalfCheetah-v2 demos=10 seed=2  -> done

[BC] env=HalfCheetah-v2 demos=10 seed=3  -> starting


[reverb/cc/platform/default/server.cc:84] Shutting down replay server
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmp5je0iwvt.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmp5je0iwvt
[reverb/cc/platform/default/server.cc:71] Started replay server on port 34531


[BC] env=HalfCheetah-v2 demos=10 seed=3  -> done

[BC] env=HalfCheetah-v2 demos=10 seed=4  -> starting


[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=HalfCheetah-v2 demos=10 seed=4  -> done

[BC] env=Hopper-v2 demos=3 seed=0  -> starting


  logger.warn(
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpmh_scawb.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpmh_scawb
[reverb/cc/platform/default/server.cc:71] Started replay server on port 43901
[reverb/cc/client.cc:165] Sampler and server are owned by the same process (45885) so Table priority_table is accessed directly without gRPC.
[reverb/cc/platform/default/server.cc:84] Shutting down replay server
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmprpmjtvwr.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmprpmjtvwr
[reverb/cc/platform/default/server.cc:71] Started replay server on port 43619


[BC] env=Hopper-v2 demos=3 seed=0  -> done

[BC] env=Hopper-v2 demos=3 seed=1  -> starting


[reverb/cc/platform/default/server.cc:84] Shutting down replay server
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpagi4whib.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpagi4whib
[reverb/cc/platform/default/server.cc:71] Started replay server on port 36659


[BC] env=Hopper-v2 demos=3 seed=1  -> done

[BC] env=Hopper-v2 demos=3 seed=2  -> starting


[reverb/cc/platform/default/server.cc:84] Shutting down replay server
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpdr5908_0.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpdr5908_0
[reverb/cc/platform/default/server.cc:71] Started replay server on port 45587


[BC] env=Hopper-v2 demos=3 seed=2  -> done

[BC] env=Hopper-v2 demos=3 seed=3  -> starting


[reverb/cc/platform/default/server.cc:84] Shutting down replay server
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpmb5hwpgg.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpmb5hwpgg
[reverb/cc/platform/default/server.cc:71] Started replay server on port 42981


[BC] env=Hopper-v2 demos=3 seed=3  -> done

[BC] env=Hopper-v2 demos=3 seed=4  -> starting


[reverb/cc/platform/default/server.cc:84] Shutting down replay server
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpikiw6j8r.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpikiw6j8r
[reverb/cc/platform/default/server.cc:71] Started replay server on port 45971


[BC] env=Hopper-v2 demos=3 seed=4  -> done

[BC] env=Hopper-v2 demos=10 seed=0  -> starting


[reverb/cc/platform/default/server.cc:84] Shutting down replay server
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmp4v9zq9ud.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmp4v9zq9ud
[reverb/cc/platform/default/server.cc:71] Started replay server on port 42987


[BC] env=Hopper-v2 demos=10 seed=0  -> done

[BC] env=Hopper-v2 demos=10 seed=1  -> starting


[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Hopper-v2 demos=10 seed=1  -> done

[BC] env=Hopper-v2 demos=10 seed=2  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmp8cj7s566.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmp8cj7s566
[reverb/cc/platform/default/server.cc:71] Started replay server on port 45933
[reverb/cc/platform/default/server.cc:84] Shutting down replay server
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpsu65ow8w.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpsu65ow8w
[reverb/cc/platform/default/server.cc:71] Started replay server on port 45661


[BC] env=Hopper-v2 demos=10 seed=2  -> done

[BC] env=Hopper-v2 demos=10 seed=3  -> starting


[reverb/cc/platform/default/server.cc:84] Shutting down replay server
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmplv54k8_b.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmplv54k8_b
[reverb/cc/platform/default/server.cc:71] Started replay server on port 45383


[BC] env=Hopper-v2 demos=10 seed=3  -> done

[BC] env=Hopper-v2 demos=10 seed=4  -> starting


[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Hopper-v2 demos=10 seed=4  -> done

[BC] env=Humanoid-v2 demos=3 seed=0  -> starting


  logger.warn(
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpb6diyl41.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpb6diyl41
[reverb/cc/platform/default/server.cc:71] Started replay server on port 34789
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Humanoid-v2 demos=3 seed=0  -> done

[BC] env=Humanoid-v2 demos=3 seed=1  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpavzizopf.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpavzizopf
[reverb/cc/platform/default/server.cc:71] Started replay server on port 45755
[reverb/cc/client.cc:165] Sampler and server are owned by the same process (45885) so Table priority_table is accessed directly without gRPC.
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Humanoid-v2 demos=3 seed=1  -> done

[BC] env=Humanoid-v2 demos=3 seed=2  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpl9l2kbg3.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpl9l2kbg3
[reverb/cc/platform/default/server.cc:71] Started replay server on port 45193
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Humanoid-v2 demos=3 seed=2  -> done

[BC] env=Humanoid-v2 demos=3 seed=3  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpzq_urchz.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpzq_urchz
[reverb/cc/platform/default/server.cc:71] Started replay server on port 34009
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Humanoid-v2 demos=3 seed=3  -> done

[BC] env=Humanoid-v2 demos=3 seed=4  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmp2amhw_qb.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmp2amhw_qb
[reverb/cc/platform/default/server.cc:71] Started replay server on port 38917
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Humanoid-v2 demos=3 seed=4  -> done

[BC] env=Humanoid-v2 demos=10 seed=0  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpd7ktagzi.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpd7ktagzi
[reverb/cc/platform/default/server.cc:71] Started replay server on port 44877
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Humanoid-v2 demos=10 seed=0  -> done

[BC] env=Humanoid-v2 demos=10 seed=1  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpm1joj589.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpm1joj589
[reverb/cc/platform/default/server.cc:71] Started replay server on port 35181
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Humanoid-v2 demos=10 seed=1  -> done

[BC] env=Humanoid-v2 demos=10 seed=2  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpp9h7zn8c.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpp9h7zn8c
[reverb/cc/platform/default/server.cc:71] Started replay server on port 36743
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Humanoid-v2 demos=10 seed=2  -> done

[BC] env=Humanoid-v2 demos=10 seed=3  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpt0ep0n2t.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpt0ep0n2t
[reverb/cc/platform/default/server.cc:71] Started replay server on port 43203
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Humanoid-v2 demos=10 seed=3  -> done

[BC] env=Humanoid-v2 demos=10 seed=4  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpj1txff9z.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpj1txff9z
[reverb/cc/platform/default/server.cc:71] Started replay server on port 45689
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Humanoid-v2 demos=10 seed=4  -> done

[BC] env=Ant-v2 demos=3 seed=0  -> starting


  logger.warn(
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmp9ix0ur5l.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmp9ix0ur5l
[reverb/cc/platform/default/server.cc:71] Started replay server on port 37945
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Ant-v2 demos=3 seed=0  -> done

[BC] env=Ant-v2 demos=3 seed=1  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmp5fl0qhup.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmp5fl0qhup
[reverb/cc/platform/default/server.cc:71] Started replay server on port 42491
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Ant-v2 demos=3 seed=1  -> done

[BC] env=Ant-v2 demos=3 seed=2  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpvdnfpzo6.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpvdnfpzo6
[reverb/cc/platform/default/server.cc:71] Started replay server on port 34709
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Ant-v2 demos=3 seed=2  -> done

[BC] env=Ant-v2 demos=3 seed=3  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpaihbjf3l.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpaihbjf3l
[reverb/cc/platform/default/server.cc:71] Started replay server on port 41603
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Ant-v2 demos=3 seed=3  -> done

[BC] env=Ant-v2 demos=3 seed=4  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmprv_5736l.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmprv_5736l
[reverb/cc/platform/default/server.cc:71] Started replay server on port 44059
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Ant-v2 demos=3 seed=4  -> done

[BC] env=Ant-v2 demos=10 seed=0  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmp8j8ns9ew.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmp8j8ns9ew
[reverb/cc/platform/default/server.cc:71] Started replay server on port 40661
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Ant-v2 demos=10 seed=0  -> done

[BC] env=Ant-v2 demos=10 seed=1  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpqyolkmjh.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpqyolkmjh
[reverb/cc/platform/default/server.cc:71] Started replay server on port 44049
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Ant-v2 demos=10 seed=1  -> done

[BC] env=Ant-v2 demos=10 seed=2  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpufllpu33.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpufllpu33
[reverb/cc/platform/default/server.cc:71] Started replay server on port 33383
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Ant-v2 demos=10 seed=2  -> done

[BC] env=Ant-v2 demos=10 seed=3  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpbkea3w9j.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpbkea3w9j
[reverb/cc/platform/default/server.cc:71] Started replay server on port 36653
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Ant-v2 demos=10 seed=3  -> done

[BC] env=Ant-v2 demos=10 seed=4  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpvmmhpbqv.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpvmmhpbqv
[reverb/cc/platform/default/server.cc:71] Started replay server on port 44275
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Ant-v2 demos=10 seed=4  -> done

[BC] env=Walker2d-v2 demos=3 seed=0  -> starting


  logger.warn(
[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmp09fcnhz2.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmp09fcnhz2
[reverb/cc/platform/default/server.cc:71] Started replay server on port 38859
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Walker2d-v2 demos=3 seed=0  -> done

[BC] env=Walker2d-v2 demos=3 seed=1  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmphw3ltkj3.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmphw3ltkj3
[reverb/cc/platform/default/server.cc:71] Started replay server on port 36159
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Walker2d-v2 demos=3 seed=1  -> done

[BC] env=Walker2d-v2 demos=3 seed=2  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmprjly7xnk.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmprjly7xnk
[reverb/cc/platform/default/server.cc:71] Started replay server on port 36939
[reverb/cc/client.cc:165] Sampler and server are owned by the same process (45885) so Table priority_table is accessed directly without gRPC.
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Walker2d-v2 demos=3 seed=2  -> done

[BC] env=Walker2d-v2 demos=3 seed=3  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmp4yv_ftg5.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmp4yv_ftg5
[reverb/cc/platform/default/server.cc:71] Started replay server on port 43691
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Walker2d-v2 demos=3 seed=3  -> done

[BC] env=Walker2d-v2 demos=3 seed=4  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpom_tam5p.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpom_tam5p
[reverb/cc/platform/default/server.cc:71] Started replay server on port 39981
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Walker2d-v2 demos=3 seed=4  -> done

[BC] env=Walker2d-v2 demos=10 seed=0  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpo32760i0.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpo32760i0
[reverb/cc/platform/default/server.cc:71] Started replay server on port 37345
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Walker2d-v2 demos=10 seed=0  -> done

[BC] env=Walker2d-v2 demos=10 seed=1  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpp2trvkgq.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpp2trvkgq
[reverb/cc/platform/default/server.cc:71] Started replay server on port 45413
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Walker2d-v2 demos=10 seed=1  -> done

[BC] env=Walker2d-v2 demos=10 seed=2  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpetdo0pu7.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpetdo0pu7
[reverb/cc/platform/default/server.cc:71] Started replay server on port 43225
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Walker2d-v2 demos=10 seed=2  -> done

[BC] env=Walker2d-v2 demos=10 seed=3  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpm56wl070.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpm56wl070
[reverb/cc/platform/default/server.cc:71] Started replay server on port 41775
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Walker2d-v2 demos=10 seed=3  -> done

[BC] env=Walker2d-v2 demos=10 seed=4  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpvgvnneag.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpvgvnneag
[reverb/cc/platform/default/server.cc:71] Started replay server on port 38215
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=Walker2d-v2 demos=10 seed=4  -> done

[BC] env=AdroitHandHammer-v1 demos=3 seed=0  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpag9r1se9.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpag9r1se9
[reverb/cc/platform/default/server.cc:71] Started replay server on port 46559
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=AdroitHandHammer-v1 demos=3 seed=0  -> done

[BC] env=AdroitHandHammer-v1 demos=3 seed=1  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpnvf11v8k.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpnvf11v8k
[reverb/cc/platform/default/server.cc:71] Started replay server on port 37157
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=AdroitHandHammer-v1 demos=3 seed=1  -> done

[BC] env=AdroitHandHammer-v1 demos=3 seed=2  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpnlwuc205.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpnlwuc205
[reverb/cc/platform/default/server.cc:71] Started replay server on port 38913
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=AdroitHandHammer-v1 demos=3 seed=2  -> done

[BC] env=AdroitHandHammer-v1 demos=3 seed=3  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpy4t9u7ba.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpy4t9u7ba
[reverb/cc/platform/default/server.cc:71] Started replay server on port 46505
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=AdroitHandHammer-v1 demos=3 seed=3  -> done

[BC] env=AdroitHandHammer-v1 demos=3 seed=4  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmp1zign1gz.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmp1zign1gz
[reverb/cc/platform/default/server.cc:71] Started replay server on port 34835
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=AdroitHandHammer-v1 demos=3 seed=4  -> done

[BC] env=AdroitHandHammer-v1 demos=10 seed=0  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpa7jzxfmj.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpa7jzxfmj
[reverb/cc/platform/default/server.cc:71] Started replay server on port 33079
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=AdroitHandHammer-v1 demos=10 seed=0  -> done

[BC] env=AdroitHandHammer-v1 demos=10 seed=1  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpeg4w731p.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpeg4w731p
[reverb/cc/platform/default/server.cc:71] Started replay server on port 44655
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=AdroitHandHammer-v1 demos=10 seed=1  -> done

[BC] env=AdroitHandHammer-v1 demos=10 seed=2  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpy8sxo6bf.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpy8sxo6bf
[reverb/cc/platform/default/server.cc:71] Started replay server on port 39751
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=AdroitHandHammer-v1 demos=10 seed=2  -> done

[BC] env=AdroitHandHammer-v1 demos=10 seed=3  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmpfol0ssej.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmpfol0ssej
[reverb/cc/platform/default/server.cc:71] Started replay server on port 41087
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=AdroitHandHammer-v1 demos=10 seed=3  -> done

[BC] env=AdroitHandHammer-v1 demos=10 seed=4  -> starting


[reverb/cc/platform/tfrecord_checkpointer.cc:162]  Initializing TFRecordCheckpointer in /tmp/tmp8wzu0e43.
[reverb/cc/platform/tfrecord_checkpointer.cc:552] Loading latest checkpoint from /tmp/tmp8wzu0e43
[reverb/cc/platform/default/server.cc:71] Started replay server on port 45725
[reverb/cc/platform/default/server.cc:84] Shutting down replay server


[BC] env=AdroitHandHammer-v1 demos=10 seed=4  -> done

All BC runs finished. CSVs are under logs/bc/<env>/d<demos>/s<seed>/progress.csv


In [None]:
# !python run_bc.py \
#   --env_name=HalfCheetah-v2 \
#   --seed=0 \
#   --num_demonstrations=3 \
#   --expert_backend=local \
#   --expert_path=experts/HalfCheetah-v2_25.pkl \
#   --evaluation_episodes=10 \
#   --logdir=logs


In [None]:
# !python run_csil.py \
#   --env_name=AdroitHandHammer-v1 \
#   --expert_path=experts/hammer-expert-v1_top100.pkl \
#   --entropy_coefficient=0.1 \
#   --policy_pretrain_steps=500 \
#   --policy_pretrain_lr=0.0001 \
#   --num_demonstrations=3 \
#   --seed=1 \
#   --num_steps=510000 \
#   --eval_every=10000 \
#   --evaluation_episodes=10

In [None]:
# !python run_csil.py \
#   --env_name=Humanoid-v2 \
#   --expert_path=experts/Humanoid-v2_25.pkl \
#   --entropy_coefficient=0.01 \
#   --policy_pretrain_steps=500 \
#   --policy_pretrain_lr=0.001 \
#   --num_demonstrations=3 \
#   --seed=0 \
#   --num_steps=310000 \
#   --eval_every=10000 \
#   --evaluation_episodes=10 \


In [None]:
# !python run_csil.py \
#   --env_name=Ant-v2 \
#   --expert_path=experts/Ant-v2_25.pkl \
#   --entropy_coefficient=0.01 \
#   --policy_pretrain_steps=25000 \
#   --policy_pretrain_lr=0.001 \
#   --num_demonstrations=3 \
#   --seed=0 \
#   --num_steps=310000 \
#   --eval_every=10000 \
#   --evaluation_episodes=10 \


In [None]:
# !python run_csil.py \
#   --env_name=Walker2d-v2 \
#   --expert_path=experts/Walker2d-v2_25.pkl \
#   --entropy_coefficient=0.01 \
#   --policy_pretrain_steps=25000 \
#   --policy_pretrain_lr=0.001 \
#   --num_demonstrations=3 \
#   --seed=0 \
#   --num_steps=310000 \
#   --eval_every=10000 \
#   --evaluation_episodes=10 \


In [None]:
# !python run_csil.py \
#   --env_name=HalfCheetah-v2 \
#   --expert_path=experts/HalfCheetah-v2_25.pkl \
#   --entropy_coefficient=0.01 \
#   --policy_pretrain_steps=25000 \
#   --policy_pretrain_lr=0.001 \
#   --num_demonstrations=3 \
#   --seed=0 \
#   --num_steps=310000 \
#   --eval_every=10000 \
#   --evaluation_episodes=10 \
