In [1]:
import numpy as np
import laserhockey.hockey_env as h_env
import gymnasium as gym
from importlib import reload
from TD3_helpers import *
import time
import torch
import DDPG
import TD3
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
import pickle

In [2]:
start_config = {
    "name" : "name",
    "agent_type" : "TD3",
    "env_type" : "hockey",
    "test" : False,
    "render" : False,
    "episodes" : 400,
    "max_steps" : 50000,
    "mode" : "normal",
    "eps" : 0.1,
    "discount":0.99,
    "update_target_every":100,
    "update_policy_every":2,
    "hidden_sizes_actor" : [256,256],
    "hidden_sizes_critic" : [256,256],
    "iter_fit" : 1,
    "batch_size" : 256,
    "smoothing_std"  : 0.0001,
    "smoothing_clip" : 0.0002,
    "checkpoint1" : None,
    "checkpoint2" : None,
    "learning_rate_critic": 0.001,
    "learning_rate_actor": 0.001,
    "buffer_size" : int(1e6),
    "theta" : 0.005,
    "prio_replay" : False,
    "exp_phase" : 0,
    "cdq" : True
}
# lr of 0.0001 for both seems to work best for hockey
# lr 0.001 for pendulum
# iter_fit 40 for walker, 20 for rest
# eps 0.1 seems to be best
# discount 1 shows best results (winning later isn't worse than earlier?)
# 20 iterations with policy delay 2 best for hockey
# fixedactionscomparefinal

In [None]:
### WEAK EXPERIMENT ###
for agent_type in ["TD3", "DDPG", "DPU", "TPS", "CDQ"]:
    print("AGENT", agent_type)
    config = start_config.copy()
    config["discount"] = 0.99
    config["ou"] = False
    config["learning_rate_critic"] = 0.001
    config["learning_rate_actor"] = 0.001
    config["smoothing_std"] = 0.0001
    config["smoothing_clip"] = 0.0002
    config["exp_phase"] = int(2e3)
    config["max_steps"] = int(1e6)
    config["agent_type"] = agent_type
    config["env_type"] = "hockey"
    config["mode"] = "weak"
    config["name"] = "no_ou_highlr_no_prio_long"
    init_train(config)
    config["checkpoint1"] = f'./results/{config["agent_type"]}_hockey_{config["name"]}_{config["mode"]}_agent.pth'
    config["test"] = True
    init_train(config)

AGENT TD3


Training...:   9%|[32m█████▏                                                 [0m| 94276/1000000 [54:14<10:32:25, 23.87steps/s][0m

In [None]:
### NORMAL EXPERIMENT ###
for agent_type in ["TD3", "DDPG", "DPU", "TPS", "CDQ"]:
    print("AGENT", agent_type)
    config = start_config.copy()
    config["discount"] = 0.99
    config["learning_rate_critic"] = 0.001
    config["learning_rate_actor"] = 0.001
    config["exp_phase"] = 50
    config["episodes"] = 1000
    config["agent_type"] = agent_type
    config["env_type"] = "hockey"
    config["mode"] = "normal"
    config["name"] = f'agentcomparefinal'
    init_train(config)
    config["checkpoint1"] = f'./results/{config["agent_type"]}_hockey_{config["name"]}_{config["mode"]}_agent.pth'
    config["test"] = True
    init_train(config)

In [None]:
### PENDULUM EXPERIMENT ###
#for agent_type in ["TD3", "DDPG", "DPU", "TPS", "CDQ"]:
for agent_type in ["CDQ"]:
    print("AGENT", agent_type)
    config = start_config.copy()
    config["discount"] = 0.99
    config["episodes"] = 50
    config["max_steps"] = int(1e4)
    config["exp_phase"] = 0
    config["hidden_sizes_critic"] = [32,32]
    config["hidden_sizes_actor"] = [32,32]
    config["learning_rate_critic"] = 0.001
    config["learning_rate_actor"] = 0.001
    config["agent_type"] = agent_type
    config["env_type"] = "pendulum"
    config["name"] = "pendulum new"
    init_train(config)

In [None]:
### CHEETAH EXPERIMENT ###
for agent_type in [ "TD3", "DDPG", "DPU", "TPS", "CDQ"]:
    print("AGENT", agent_type)
    config = start_config.copy()
    config["discount"] = 0.99
    config["episodes"] = 200
    config["exp_phase"] = 20
    config["max_steps"] = int(1e5)
    config["learning_rate_critic"] = 0.001
    config["learning_rate_actor"] = 0.001
    config["agent_type"] = agent_type
    config["env_type"] = "cheetah"
    config["name"] = "agentcompare"
    init_train(config)

In [None]:
### WALKER EXPERIMENT ###
for agent_type in ["DDPG", "DPU", "TPS", "CDQ", "TD3"]:
    print("AGENT", agent_type)
    config = start_config.copy()
    config["episodes"] = 500
    config["exp_phase"] = 50
    config["max_steps"] = int(1e5)
    config["learning_rate_critic"] = 0.001
    config["learning_rate_actor"] = 0.001
    config["agent_type"] = agent_type
    config["env_type"] = "walker"
    config["name"] = "agentcompare_long"
    init_train(config)
    config["checkpoint1"] = f'./results/{config["agent_type"]}_{config["env_type"]}_{config["name"]}_{config["mode"]}_agent.pth'

In [None]:
### DEFENSE EXPERIMENT ###
for agent_type in ["TD3", "DDPG", "DPU", "TPS", "CDQ"]:
    print("AGENT", agent_type)
    config = start_config.copy()
    config["episodes"] = 1000
    config["agent_type"] = agent_type
    config["env_type"] = "hockey"
    config["mode"] = "defense"
    config["name"] = "agentcompare_new"
    init_train(config)
    config["checkpoint1"] = f'./results/{config["agent_type"]}_hockey_{config["name"]}_{config["mode"]}_agent.pth'
    config["test"] = True
    init_train(config)

In [None]:
### ATTACK EXPERIMENT ###
for agent_type in ["DDPG", "DPU", "TPS", "CDQ", "TD3"]:
    print("AGENT", agent_type)
    config = start_config.copy()
    config["episodes"] = 1000
    config["agent_type"] = agent_type
    config["env_type"] = "hockey"
    config["mode"] = "attack"
    config["name"] = f'env_{config["env_type"]}_agent_{agent_type}'
    init_train(config)
    config["checkpoint1"] = f'./results/{config["agent_type"]}_hockey_{config["name"]}_{config["mode"]}_agent.pth'

In [None]:
### TRAINING CAMP ###
config = start_config.copy()
# defense training
config["name"] = "traincamp_new"
config["env_type"] = "hockey"
config["mode"] = "defense"
config["episodes"] = 500
init_train(config)
config["checkpoint1"] = f'./results/{config["agent_type"]}_hockey_{config["name"]}_{config["mode"]}_agent.pth'
# test agent trained on defense
config["mode"] = "weak"
config["test"] = True
init_train(config)

# shoot training
config["mode"] = "attack"
config["episodes"] = 500
config["test"] = False
init_train(config)
config["checkpoint1"] = f'./results/{config["agent_type"]}_hockey_{config["name"]}_{config["mode"]}_agent.pth'
# test agent trained on defense AND shooting
config["mode"] = "weak"
config["test"] = True
init_train(config)

# regular training
config["mode"] = "weak"
config["episodes"] = 1000
config["test"] = False
init_train(config)
config["checkpoint1"] = f'./results/{config["agent_type"]}_hockey_{config["name"]}_{config["mode"]}_agent.pth'
# test agent trained on defense AND shooting
config["mode"] = "normal"
config["test"] = True
init_train(config)

# regular training
config["mode"] = "normal"
config["episodes"] = 1000
config["test"] = False
init_train(config)
config["checkpoint1"] = f'./results/{config["agent_type"]}_hockey_{config["name"]}_{config["mode"]}_agent.pth'
# test agent trained on defense AND shooting
config["mode"] = "normal"
config["test"] = True
init_train(config)

In [None]:
### priority replay !!!NOT USED!!! ###
for pri in [True, False]:
    config = start_config.copy()
    config["episodes"] = 5000
    config["prio_replay"] = pri
    config["name"] = f"prio_{pri}"
    config["mode"] = "normal"
    agent_type="TD3"
    env_type="hockey"
    init_train(agent_type, env_type, config)
    config["checkpoint1"] = f'./results/{agent_type}_hockey_{config["name"]}_{config["mode"]}_agent.pth'
    # test agent
    config["mode"] = "normal"
    config["test"] = True
    #config["episodes"] = 10
    #config["render"] = True
    init_train(agent_type, env_type, config)

In [None]:
### NOT USED ###
config = start_config.copy()
config["episodes"] = 1000
config["name"] = "20ktest"
for i in range(1):
    if i>0:
        config["mode"] = "selfplay"
    config["checkpoint1"] = f'./results/TD3Agent_hockey_{config["name"]}_{config["mode"]}_agent.pth'
    config["checkpoint2"] = f'./results/TD3Agent_hockey_{config["name"]}_{config["mode"]}_agent.pth'
    config["name"] = f"selfplay{i}"
    config["mode"] = "selfplay"
    config["test"] = False
    init_train(config)
    config["test"] = True
    config["mode"] = "normal"
    config["checkpoint2"] = None
    init_train(config)