Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collect environment and action metrics during reinforcement learning #182

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
759 changes: 759 additions & 0 deletions Imitation Learning Enjoy Metrics Dashboard.ipynb

Large diffs are not rendered by default.

599 changes: 599 additions & 0 deletions Reinforcement Learning Enjoy Metrics Dashboard.ipynb

Large diffs are not rendered by default.

709 changes: 709 additions & 0 deletions Reinforcement Learning Metrics Dashboard.ipynb

Large diffs are not rendered by default.

165 changes: 165 additions & 0 deletions Training Image Warping Tests.ipynb

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions learning/enjoy_imitation.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@

ls -l ./reinforcement/pytorch/models/ddpg_actor.pth
ls -l ./reinforcement/pytorch/models/ddpg_critic.pth

python3 -m imitation.pytorch.enjoy_imitation | tee enjoy_imitation.log ; say ok


7 changes: 7 additions & 0 deletions learning/enjoy_reinforcement.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@

ls -l ./reinforcement/pytorch/models/ddpg_actor.pth
ls -l ./reinforcement/pytorch/models/ddpg_critic.pth

python3 -m reinforcement.pytorch.enjoy_reinforcement | tee enjoy.log ; say ok


2 changes: 2 additions & 0 deletions learning/imitation.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
python3 -m imitation.pytorch.train_imitation

9 changes: 5 additions & 4 deletions learning/imitation/pytorch/train_imitation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from utils.env import launch_env
from utils.wrappers import NormalizeWrapper, ImgWrapper, \
DtRewardWrapper, ActionWrapper, ResizeWrapper
DtRewardWrapper, ActionWrapper, ResizeWrapper, MetricsWrapper
from utils.teacher import PurePursuitExpert

from imitation.pytorch.model import Model
Expand All @@ -29,10 +29,11 @@
def _train(args):
env = launch_env()
env = ResizeWrapper(env)
env = NormalizeWrapper(env)
env = NormalizeWrapper(env)
env = ImgWrapper(env)
env = ActionWrapper(env)
env = DtRewardWrapper(env)
env = MetricsWrapper(env)
env = ActionWrapper(env)
print("Initialized Wrappers")

observation_shape = (None, ) + env.observation_space.shape
Expand Down Expand Up @@ -104,4 +105,4 @@ def _train(args):

args = parser.parse_args()

_train(args)
_train(args)
2 changes: 2 additions & 0 deletions learning/reinforcement.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
python3 -m reinforcement.pytorch.train_reinforcement --batch_size=8

9 changes: 5 additions & 4 deletions learning/reinforcement/pytorch/enjoy_reinforcement.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
from reinforcement.pytorch.ddpg import DDPG
from utils.env import launch_env
from utils.wrappers import NormalizeWrapper, ImgWrapper, \
DtRewardWrapper, ActionWrapper, ResizeWrapper
DtRewardWrapper, ActionWrapper, ResizeWrapper, MetricsWrapper


def _enjoy():
def _enjoy():
# Launch the env with our helper function
env = launch_env()
print("Initialized environment")
Expand All @@ -21,8 +21,9 @@ def _enjoy():
env = ResizeWrapper(env)
env = NormalizeWrapper(env)
env = ImgWrapper(env) # to make the images from 160x120x3 into 3x160x120
env = ActionWrapper(env)
env = DtRewardWrapper(env)
env = MetricsWrapper(env)
env = ActionWrapper(env)
print("Initialized Wrappers")

state_dim = env.observation_space.shape
Expand All @@ -43,7 +44,7 @@ def _enjoy():
obs, reward, done, _ = env.step(action)
env.render()
done = False
obs = env.reset()
obs = env.reset()

if __name__ == '__main__':
_enjoy()
Binary file not shown.
Binary file not shown.
25 changes: 13 additions & 12 deletions learning/reinforcement/pytorch/train_reinforcement.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,17 @@
from reinforcement.pytorch.utils import seed, evaluate_policy, ReplayBuffer
from utils.env import launch_env
from utils.wrappers import NormalizeWrapper, ImgWrapper, \
DtRewardWrapper, ActionWrapper, ResizeWrapper
DtRewardWrapper, ActionWrapper, ResizeWrapper, MetricsWrapper

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

def _train(args):
def _train(args):
if not os.path.exists("./results"):
os.makedirs("./results")
if not os.path.exists(args.model_dir):
os.makedirs(args.model_dir)

# Launch the env with our helper function
env = launch_env()
print("Initialized environment")
Expand All @@ -29,10 +29,11 @@ def _train(args):
env = ResizeWrapper(env)
env = NormalizeWrapper(env)
env = ImgWrapper(env) # to make the images from 160x120x3 into 3x160x120
env = ActionWrapper(env)
env = DtRewardWrapper(env)
env = MetricsWrapper(env)
env = ActionWrapper(env)
print("Initialized Wrappers")

# Set seeds
seed(args.seed)

Expand All @@ -44,10 +45,10 @@ def _train(args):
policy = DDPG(state_dim, action_dim, max_action, net_type="cnn")
replay_buffer = ReplayBuffer(args.replay_buffer_max_size)
print("Initialized DDPG")

# Evaluate untrained policy
evaluations= [evaluate_policy(env, policy)]

total_timesteps = 0
timesteps_since_eval = 0
episode_num = 0
Expand All @@ -56,12 +57,12 @@ def _train(args):
env_counter = 0
reward = 0
episode_timesteps = 0

print("Starting training")
while total_timesteps < args.max_timesteps:

print("timestep: {} | reward: {}".format(total_timesteps, reward))

if done:
if total_timesteps != 0:
print(("Total T: %d Episode Num: %d Episode T: %d Reward: %f") % (
Expand Down Expand Up @@ -115,14 +116,14 @@ def _train(args):
episode_timesteps += 1
total_timesteps += 1
timesteps_since_eval += 1

print("Training done, about to save..")
policy.save(filename='ddpg', directory=args.model_dir)
print("Finished saving..should return now!")

if __name__ == '__main__':
parser = argparse.ArgumentParser()

# DDPG Args
parser.add_argument("--seed", default=0, type=int) # Sets Gym, PyTorch and Numpy seeds
parser.add_argument("--start_timesteps", default=1e4, type=int) # How many time steps purely random policy is run for
Expand Down
20 changes: 20 additions & 0 deletions learning/utils/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import csv
from os import path
from datetime import datetime

class Metrics:
def __init__(self):
self.filename = 'metrics-' + str(datetime.now()).replace(' ', '-').replace(':', '-') + '.csv'
with open(self.filename, mode='w') as metrics_file:
self.metrics_writer = csv.writer(metrics_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
self.metrics_writer.writerow(['datetime', 'step', 'x', 'y', 'angle', 'speed', 'steering',
'center_dist', 'center_angle', 'reward', 'total_reward'])

def record(self, step, x, y, angle, speed, steering, center_dist, center_angle, reward, total_reward):
now = str(datetime.now())
#print({now, step, speed, steering, center_dist, center_angle, reward, total_reward})

with open(self.filename, mode='a') as metrics_file:
self.metrics_writer = csv.writer(metrics_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
self.metrics_writer.writerow([now, step, x, y, angle, speed, steering,
center_dist, center_angle, reward, total_reward])
41 changes: 30 additions & 11 deletions learning/utils/wrappers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import gym
from gym import spaces
import numpy as np

from utils.metrics import Metrics

class ResizeWrapper(gym.ObservationWrapper):
def __init__(self, env=None, shape=(120, 160, 3)):
Expand Down Expand Up @@ -52,15 +52,10 @@ class DtRewardWrapper(gym.RewardWrapper):
def __init__(self, env):
super(DtRewardWrapper, self).__init__(env)

def reward(self, reward):
if reward == -1000:
reward = -10
elif reward > 0:
reward += 10
else:
reward += 4

return reward
def reward(self, rewardUnused):
lane_pose = self.env.get_lane_pos2(self.env.cur_pos, self.env.cur_angle)
reward = 20.0 - (10 * abs(lane_pose.angle_rad) * abs(lane_pose.angle_rad)) - (300 * abs(lane_pose.dist) * abs(lane_pose.dist))
return reward


# this is needed because at max speed the duckie can't turn anymore
Expand All @@ -69,5 +64,29 @@ def __init__(self, env):
super(ActionWrapper, self).__init__(env)

def action(self, action):
action_ = [action[0] * 0.8, action[1]]
action_ = [0.5 + action[0] / 4, action[1]] # speed from +0.25 to +0.75
return action_

class MetricsWrapper(gym.Wrapper):
def __init__(self, env=None):
super(MetricsWrapper, self).__init__(env)
self.metrics = Metrics()
self.count = 0
self.total_reward = 0

def step(self, action):
observation, reward, done, info = self.env.step(action)

self.count = self.count + 1
xy = self.env.cur_pos # xzy ??
angle = self.env.cur_angle
speed = action[0]
steering = action[1]
lane_pose = self.env.get_lane_pos2(xy, angle)
center_dist = lane_pose.dist
center_angle = lane_pose.angle_rad
self.total_reward = self.total_reward + reward

self.metrics.record(self.count, xy[0], xy[2], angle, speed, steering, center_dist, center_angle,
reward, self.total_reward)
return observation, reward, done, info