## Test training method (Worm Domain).

@author: j-huthmacher

For testing and debugging!

In [None]:
sys.path.insert(0, "../")
import os
os.chdir("../")

import pickle

import gym
from trainer import DDPGTrainer, TD3Trainer
from utils.mlagent_utils import get_env
from optuna.trial import FixedTrial
from config.config import log
from pathlib import Path
from datetime import datetime
from gym import wrappers

In [None]:
# win_env = "envs/worm_dynamic_one_agent/win/UnityEnvironment"
# env = get_env(win_env, False)

# print(env.action_space.low, env.action_space.high)
# env.close()

In [None]:
import pickle

env_name = "Pendulum-v0"
name = f"DPPG-{env_name}-2"

env = gym.make(env_name)

with open("models/2020-07-08/DPPG-Pendulum-v0-2/ddpg_agent_training.pickle", 'rb') as f:
    ddpg_agent = pickle.load(f)

    ddpg_agent.run(env)

In [None]:
from config.config import log, logFormatter
from pathlib import Path
from datetime import datetime
from gym import wrappers

env_name = "Pendulum-v0"
name = f"DPPG-{env_name}-3"

folder = Path(f'models/{datetime.now().date()}/{name}/')
folder.mkdir(parents=True, exist_ok=True)

env = gym.make(env_name)

fh = log.FileHandler(f'models/{datetime.now().date()}/{name}/{datetime.now().date()}.log')
fh.setFormatter(logFormatter)
log.getLogger().addHandler(fh)


trainer = DDPGTrainer()

log.info(f"Start DDPG training ({env_name})...")

trainer.config["episodes"] = 70
trainer.config["training_steps"] = 50
trainer.config["evaluation_steps"] = 50 # To disable evaluation
# trainer.config["evaluation_lim"] = 100

trainer.train(env, name=name, render=False)

log.info("Training done!")

In [None]:
trainer

In [None]:
from config.config import log

env = "envs/worm_dynamic_one_agent/win/UnityEnvironment"
# env = "./envs/worm_dynamic_one_agent/linux/worm_dynamic"
env = get_env(env, False)

trainer = TD3Trainer()

log.info("Start TD3 training (WormDomain)...")

trainer.config["episodes"] = 5
trainer.config["training_steps"] = 10
trainer.config["training_episodes"] = 2
trainer.config["batch_size"] = 2
trainer.config["evaluation_lim"] = 5
trainer.train(env, name="TEST-TD3-WormDomain")

log.info("Training done!")

In [None]:
from config.config import log

env = "envs/worm_dynamic_one_agent/win/UnityEnvironment"
# env = "./envs/worm_dynamic_one_agent/linux/worm_dynamic"
env = get_env(env, False)

trainer = DDPGTrainer()

log.info("Start DDPG training (WormDomain)...")

trainer.config["episodes"] = 3
trainer.config["training_steps"] = 10
trainer.config["evaluation_lim"] = 10
trainer.config["batch_size"] = 3
trainer.train(env, name="TEST-DPPG-WormDomain")

log.info("Training done!")

In [None]:
env.close()

In [None]:
win_env = "envs/worm_dynamic_one_agent/win/UnityEnvironment"
env = get_env(win_env, True)

trainer = DDPGTrainer()

study = trainer.start_training(env, trials=2, render=False, name="WormDomain-5_Default", default=True)

In [None]:
trainer.training_rewards_df.plot()

In [None]:
import pandas as pd 

pd.read_csv(r"C:\Users\email\Documents\LMU\4_Semester\ASP\Project\AlphaWorm\dev\models\2020-06-26\WormDomain-5_Default\best_agent\rewards.csv").plot()

In [None]:
env.close()

In [None]:
import optuna
optuna.visualization.plot_optimization_history(study)

In [None]:
win_env = "envs/worm_dynamic_one_agent/win/UnityEnvironment"
env = get_env(win_env, True)

trainer = DDPGTrainer()

study = trainer.start_training(env, trials=2, render=False, name="WormDomain-4_HPO_Local", default=True)

In [None]:
env.close()

In [None]:
with open("../models/2020-06-17/WormDomain-1.pickle", 'rb') as f:
    best_agent = pickle.load(f)

In [None]:
win_env = "envs/worm_dynamic_one_agent/win/UnityEnvironment"
env = get_env(win_env, True)

best_agent.run(env, steps=1000)

In [None]:
env.close()

In [None]:
with open("models/2020-06-19/WormDomain-1.pickle", 'rb') as f:
    best_agent = pickle.load(f)

with open("models/2020-06-19/WormDomain-1_study.pickle", 'rb') as f:
    study = pickle.load(f)


In [None]:
study.best_params

In [None]:
win_env = "envs/worm_dynamic_one_agent/win/UnityEnvironment"
env = get_env(win_env, True)

trial  = FixedTrial(study.best_params)
trainer = DDPGTrainer()

trainer.train(trial, env,  render=False, name="WormDomain-1x", training_steps = 1000)
env.close()

In [None]:
env.close()

In [None]:
win_env = "envs/worm_dynamic_one_agent/win/UnityEnvironment"
env = get_env(None, True)

trainer.ddpg_agent.run(env, 1000)

In [None]:
import optuna

In [None]:
optuna.visualization.plot_intermediate_values(study)

In [None]:
optuna.visualization.plot_optimization_history(study)

In [None]:
optuna.visualization.plot_parallel_coordinate(study)

## Training Gym Domains

@author: jhuthmacher

In [None]:
import logging 
logging.basicConfig(level=logging.DEBUG)

import sys
sys.path.insert(0, "../")
import os
os.chdir("../")

import pickle

import gym
from trainer.ddpg_trainer import DDPGTrainer
from config.config import log

## Pendulum

In [None]:
############
# Penduluu #
############
env = gym.make("Pendulum-v0")

trainer = DDPGTrainer()

log.info("Start DDPG training (Pendulum)...")

trainer.config["training_steps"] = 200
trainer.config["episodes"] = 50

trainer.train_baseline(env, name="Test-File-Logging",render=False, nb_epochs=5,
                       nb_epoch_cycles=1, nb_rollout_steps=1,
                       nb_train_steps=1, nb_eval_steps=1)

log.info("Training done!")

In [None]:
import pickle


with open(r"C:\Users\email\Documents\LMU\4_Semester\ASP\Project\AlphaWorm\dev\models\2020-07-03\DDPG-Pendulum-2\ddpg_baseline_training.pickle", "wb+") as f:
    pickle.dump(trainer.ddpg_agent, f)

In [None]:
import pickle

import sys
sys.path.insert(0, "../")

with open(r"C:\Users\email\Documents\LMU\4_Semester\ASP\Project\AlphaWorm\dev\models\2020-07-03\DDPG-Pendulum-2\ddpg_baseline_training.pickle", "rb") as f:
    ddpg_agent = pickle.load(f)
    # ddpg_agent.run(env, steps=100, render=True)

In [None]:
ddpg_agent

---

## Mountain Car

In [None]:
env = gym.make("MountainCarContinuous-v0")

trainer = DDPGTrainer()

study = trainer.start_training(env, trials=10, render=False, name="MountainCar-1")

In [None]:
with open("../models/2020-06-17/MountainCar-1.pickle", 'rb') as f:
    best_agent = pickle.load(f)

In [None]:
best_agent.run(env, steps=1000)

In [None]:
env.close()

In [None]:
import optuna

# Read the study from a specific run to visualize.
with open("../models/2020-06-17/MountainCar_study.pickle", 'rb') as f:
    study = pickle.load(f)
optuna.visualization.plot_intermediate_values(study)

In [None]:
optuna.visualization.plot_optimization_history(study)

10.07.2020

In [None]:
import sys
sys.path.insert(0, "../")
# import os
# os.chdir("..")

import pickle
import optuna
import gym

env = gym.make("MountainCarContinuous-v0")

# trainer = DDPGTrainer()
# study = trainer.start_training(env, trials=10, render=False, name="MountainCar-1")env = gym.make("MountainCarContinuous-v0")
# trainer = DDPGTrainer()

##############
# Load Study #
##############
with open("../models/2020-06-17/MountainCar_study.pickle", 'rb') as f:
    study = pickle.load(f)


##############
# Load Agent #
##############
with open("../models/2020-06-17/MountainCar.pickle", 'rb') as f:
    agent = pickle.load(f)


In [None]:
agent.actor.to("cpu")
agent.actor.input_layer.to("cpu")
agent.critic.to("cpu")
agent.critic_target.to("cpu")
agent.actor_target.to("cpu")
agent.run(env)

In [None]:
env.close()

In [None]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
# It is important to append the parent directory to the Python path to import modules from our package.
import sys
sys.path.insert(0, "../")
# For correclty referencing the directoy e.g. when you save files, you have to change the working directoy of this notebook.
# Important: If you reimport you have to restart the kernel. Otherwise you would always go one directory above.
import os
os.chdir("../")

####################
# Default Packages #
####################
import pickle
from pathlib import Path
from datetime import datetime

##################
# ML/RL Packages #
##################
import gym
from gym import wrappers

################
# Our Packages #
################
from trainer import DDPGTrainer, TD3Trainer
from utils.mlagent_utils import get_env
from config.config import log, logFormatter# It is important to append the parent directory to the Python path to import modules from our package.
import sys
sys.path.insert(0, "../")
# For correclty referencing the directoy e.g. when you save files, you have to change the working directoy of this notebook.
# Important: If you reimport you have to restart the kernel. Otherwise you would always go one directory above.
import os
os.chdir("../")

####################
# Default Packages #
####################
import pickle
from pathlib import Path
from datetime import datetime

##################
# ML/RL Packages #
##################
import gym
from gym import wrappers

################
# Our Packages #
################
from trainer import DDPGTrainer, TD3Trainer
from utils.mlagent_utils import get_env
from config.config import log, logFormatter

In [None]:
######################
# Set Training Infos #
######################
env_name = "MountainCarContinuous-v0"
name = f"DPPG-{env_name}"

###########################################
# Ensure that the path exists for logging #
###########################################
folder = Path(f'models/{datetime.now().date()}/{name}/')
folder.mkdir(parents=True, exist_ok=True)

# Store logs directly nearby the results!
fh = log.FileHandler(f'models/{datetime.now().date()}/{name}/{datetime.now().date()}.log')
fh.setFormatter(logFormatter)
log.getLogger().addHandler(fh)

######################
# Create Environment #
######################
env = gym.make(env_name)

trainer = DDPGTrainer()

log.info(f"Start DDPG training ({env_name})...")

# If you want to customize the training.
trainer.config["episodes"] = 10
trainer.config["training_steps"] = 1000
trainer.config["evaluation_steps"] = 0 # To disable evaluation set to 0
trainer.config["explore_threshold"] = 0.1

trainer.train(env, name=name, render=True)

log.info("Training done!")

In [None]:
env.close()

In [None]:
agent = trainer.ddpg_agent

In [None]:
agent.run(env)