# Training

This section contains code that will train a number of models. 

In [1]:
import gymnasium as gym

import numpy as np

from ar_bot_gym.ar_bot_gym import ARBotGym
from ar_bot_pybullet.ar_bot_pybullet import ARBotPybullet
from stable_baselines_models.train_arbot import TrainARBot

from stable_baselines3 import PPO

NUM_EPISODES = 1000


pybullet build time: Nov 28 2023 23:51:11


## Boxed Action Space

In [2]:
actions = gym.spaces.box.Box(
            low=np.array([-0.5, -0.5]),
            high=np.array([0.5, 0.5]),
        )

In [3]:
simple_ppo_boxed = TrainARBot(ARBotPybullet, ARBotGym, actions, PPO)
simple_ppo_boxed_total_sum_reward_tracker, simple_ppo_boxed_total_timestep_tracker = simple_ppo_boxed.train(NUM_EPISODES, "stable_baselines_models/trained_models/simple_ppo_boxed", "stable_baselines_models/training_data/simple_ppo_boxed.npy")

argv[0]=


In [4]:
complex_ppo_boxed = TrainARBot(ARBotPybullet, ARBotGym, actions, PPO)
complex_ppo_boxed_total_sum_reward_tracker, complex_ppo_boxed_total_timestep_tracker = complex_ppo_boxed.train(NUM_EPISODES, "stable_baselines_models/trained_models/complex_ppo_boxed", "stable_baselines_models/training_data/complex_ppo_boxed.npy", obstacle=True)

TypeError: train() got an unexpected keyword argument 'obstacles'

## Discrete Action Space

In [None]:
actions = gym.spaces.Discrete(4)

action_mapping = {
    0: (0.0, 0.5),
    1: (0.5, 0.0),
    2: (0.0, -0.5),
    3: (-0.5, 0.0)
}

In [None]:
simple_ppo_discrete = TrainARBot(ARBotPybullet, ARBotGym, actions, PPO, action_mapping)
simple_ppo_discrete_total_sum_reward_tracker, simple_ppo_discrete_total_timestep_tracker = simple_ppo_discrete.train(NUM_EPISODES, "stable_baselines_models/trained_models/simple_ppo_discrete", "stable_baselines_models/training_data/simple_ppo_discrete.npy")


In [None]:
complex_ppo_discrete = TrainARBot(ARBotPybullet, ARBotGym, actions, PPO, action_mapping)
complex_ppo_discrete_total_sum_reward_tracker, complex_ppo_discrete_total_timestep_tracker = complex_ppo_discrete.train(NUM_EPISODES, "stable_baselines_models/trained_models/complex_ppo_discrete", "stable_baselines_models/training_data/complex_ppo_discrete.npy", obstacle=True)


## MultiDiscrete Action Space

In [None]:
actions = gym.spaces.MultiDiscrete([3, 3])

action_mapping = {
    0: -0.5,
    1: 0,
    2: 0.5
}

In [None]:
simple_ppo_multi_discrete = TrainARBot(ARBotPybullet, ARBotGym, actions, PPO, action_mapping)
simple_ppo_multi_discrete_total_sum_reward_tracker, simple_ppo_multi_discrete_total_timestep_tracker = simple_ppo_multi_discrete.train(NUM_EPISODES, "stable_baselines_models/trained_models/simple_ppo_multi_discrete", "stable_baselines_models/training_data/simple_ppo_multi_discrete.npy")

In [None]:
complex_ppo_multi_discrete = TrainARBot(ARBotPybullet, ARBotGym, actions, PPO, action_mapping)
complex_ppo_multi_discrete_total_sum_reward_tracker, complex_ppo_multi_discrete_total_timestep_tracker = complex_ppo_multi_discrete.train(NUM_EPISODES, "stable_baselines_models/trained_models/complex_ppo_multi_discrete", "stable_baselines_models/training_data/complex_ppo_multi_discrete.npy", obstacle=True)

# Print Graphs



## Setup
The importing of numpy is redandant if running in the same kernal used for training

In [None]:
import numpy as np
import matplotlib.pyplot as plt

## Parse

Parse the data contained in the npy files created during training above. 

This step can be safely skipped if running in the same kernal that training was completed in.

Note that the file locations will needed to be tweaked if using a different training_data directory.

In [None]:
with open("stable_baselines_models/training_data/ppo_boxed.npy", "rb") as ppo_data:
    ppo_total_sum_reward_tracker = np.load(ppo_data)
    ppo_total_timestep_tracker = np.load(ppo_data)

print(ppo_total_timestep_tracker)

## Create Graphs

In [None]:
plt.plot(ppo_total_timestep_tracker, label="PPO")

plt.legend()
plt.ylabel("Sum of Rewards")
plt.xlabel("Episode Number")
plt.title("Sum Of Rewards Across Episodes")

plt.show()

In [None]:
plt.errorbar(x=np.arange(ppo_total_sum_reward_tracker.shape[1]),y=ppo_total_sum_reward_tracker, label="PPO")
plt.legend()
plt.ylabel("Sum of Rewards")
plt.xlabel("Episode Number")
plt.title("Sum Of Rewards Across Episodes")

plt.show()