In [1]:
%cd ..

/Users/danorel/Workspace/Education/University/KMA/Research/aclarel


In [2]:
import functools
import pathlib
import os

from tqdm import tqdm

import environments.mountain_car.environment as mountain_car
import environments.mountain_car.experiments as experiments

## Curriculum Learning: Setup

### Data Frame

In [3]:
import pandas as pd

def data_frame_from_agent(agent):
    df = pd.DataFrame(agent.measurements)
    return df

## Reinforcement Learning: Experiments

In [4]:
DATASETS_DIR = pathlib.Path("datasets") / "mountain_car"
DATASETS_DIR.mkdir(parents=True, exist_ok=True)

In [5]:
COLUMNS = ['agent_name', 'evaluation', 'curriculum_name', 'aar', 'ses', 'learning_stability', 'mean_reward', 'std_reward', 'total_reward', 'success_rate']

### PPO

In [None]:
from environments.mountain_car.rl_methods.ppo import PPOAgent

ppo_agent = functools.partial(experiments.get_agent, agent_name='ppo')
ppo_path = DATASETS_DIR / 'ppo.csv'

agents = [
    ppo_agent(curriculum_name='baseline'),
    ppo_agent(curriculum_name='transfer-learning'),
    ppo_agent(curriculum_name='teacher-learning'),
    ppo_agent(curriculum_name='one-pass'),
    ppo_agent(curriculum_name='root-p'),
    ppo_agent(curriculum_name='hard'),
    ppo_agent(curriculum_name='linear'),
    ppo_agent(curriculum_name='logarithmic'),
    ppo_agent(curriculum_name='logistic'),
    ppo_agent(curriculum_name='mixture'),
    ppo_agent(curriculum_name='polynomial'),
    ppo_agent(curriculum_name='anti-curriculum')
]

file_exists_and_not_empty = os.path.exists(ppo_path) and os.path.getsize(ppo_path) > 0

with open(ppo_path, 'a') as f:
    if not file_exists_and_not_empty:
        f.write(f"{','.join(COLUMNS)}\n")
        f.flush()
    for agent in tqdm(agents):
        curriculum = experiments.get_curriculum(agent)
        mountain_car.train_evaluate(agent, curriculum)
        agent_df = data_frame_from_agent(agent)
        if f.tell() == 0:
            agent_df.to_csv(f, index=False)
        else:
            agent_df.to_csv(f, header=False, index=False)
        f.flush()

Device: cpu
Autocast gradients: False




Device: cpu
Autocast gradients: False
Device: cpu
Autocast gradients: False
Device: cpu
Autocast gradients: False
Device: cpu
Autocast gradients: False
Device: cpu
Autocast gradients: False
Device: cpu
Autocast gradients: False
Device: cpu
Autocast gradients: False
Device: cpu
Autocast gradients: False
Device: cpu
Autocast gradients: False
Device: cpu
Autocast gradients: False
Device: cpu
Autocast gradients: False


  0%|                                                                                                                   | 0/12 [00:00<?, ?it/s]
  if not isinstance(terminated, (bool, np.bool8)):
  self.measurements = pd.concat([self.measurements, measurement], ignore_index=True)

  4%|████                                                                                                       | 1/26 [01:39<41:28, 99.54s/it][A

Evaluation 0 (Epsilon=None):
	Training:
 		Gravity: 0.0025
 		Stability: 315.861
	Evalution:
 		AAR: -21.365
 		SES: 0
 		Mean Reward: -1000.0
 		Std Reward: 0.0



  8%|████████▏                                                                                                  | 2/26 [03:15<38:54, 97.27s/it][A

### DQN

#### Curriculum parameter: gravity

In [None]:
from environments.mountain_car.rl_methods.dqn import DQNAgent

dqn_agent = functools.partial(experiments.get_agent, agent_name='dqn')
dqn_path = DATASETS_DIR / 'dqn.csv'

agents = [
    dqn_agent(curriculum_name='transfer-learning'),
    dqn_agent(curriculum_name='teacher-learning'),
    dqn_agent(curriculum_name='one-pass'),
    dqn_agent(curriculum_name='root-p'),
    dqn_agent(curriculum_name='hard'),
    dqn_agent(curriculum_name='linear'),
    dqn_agent(curriculum_name='logarithmic'),
    dqn_agent(curriculum_name='logistic'),
    dqn_agent(curriculum_name='mixture'),
    dqn_agent(curriculum_name='polynomial'),
    dqn_agent(curriculum_name='anti-curriculum')
]

file_exists_and_not_empty = os.path.exists(dqn_path) and os.path.getsize(dqn_path) > 0

with open(dqn_path, 'a') as f:
    if not file_exists_and_not_empty:
        f.write(f"{','.join(COLUMNS)}\n")
        f.flush()
    for agent in tqdm(agents):
        print(f"Training of '{agent.metadata['agent_name']}' via CL method '{agent.metadata['curriculum_name']}' has started")
        curriculum = experiments.get_curriculum(agent)
        mountain_car.train_evaluate(agent, curriculum)
        agent_df = data_frame_from_agent(agent)
        if f.tell() == 0:
            agent_df.to_csv(f, index=False)
        else:
            agent_df.to_csv(f, header=False, index=False)
        f.flush()

### Q-Learning

#### Curriculum parameter: gravity

In [None]:
from environments.mountain_car.rl_methods.q_learning import QLearningAgent

q_learning_agent = functools.partial(experiments.get_agent, agent_name='q-learning')
q_learning_path = DATASETS_DIR / 'q-learning.csv'

agents = [
    q_learning_agent(curriculum_name='baseline'),
    q_learning_agent(curriculum_name='transfer-learning'),
    q_learning_agent(curriculum_name='teacher-learning'),
    q_learning_agent(curriculum_name='one-pass'),
    q_learning_agent(curriculum_name='root-p'),
    q_learning_agent(curriculum_name='hard'),
    q_learning_agent(curriculum_name='linear'),
    q_learning_agent(curriculum_name='logarithmic'),
    q_learning_agent(curriculum_name='logistic'),
    q_learning_agent(curriculum_name='mixture'),
    q_learning_agent(curriculum_name='polynomial'),
    q_learning_agent(curriculum_name='anti-curriculum')
]

file_exists_and_not_empty = os.path.exists(q_learning_path) and os.path.getsize(q_learning_path) > 0

with open(q_learning_path, 'a') as f:
    if not file_exists_and_not_empty:
        f.write(f"{','.join(COLUMNS)}\n")
        f.flush()
    for agent in tqdm(agents):
        print(f"Training of '{agent.metadata['agent_name']}' via CL method '{agent.metadata['curriculum_name']}' has started")
        curriculum = experiments.get_curriculum(agent)
        mountain_car.train_evaluate(agent, curriculum)
        agent_df = data_frame_from_agent(agent)
        if f.tell() == 0:
            agent_df.to_csv(f, index=False)
        else:
            agent_df.to_csv(f, header=False, index=False)
        f.flush()