In [1]:
'''
Terminology:
	Episode: one go of the car trying to "live" in the simulation and earn max rewards.
				Episode start from spawning a new car and ends with either car crashing or episode duration limit running out

	Timestep: one frame through simulation: the car gets a obs, reward from prior step and then it makes a decision on control input and sends it to simulation

	Reward logic: each timestep a logic is applied to calculate a reward from latest step. 
				This logic represents you describing the desired behaviour the car needs to learn.

	Policy/model: our objective, what we need learn as part of RL. This is the latest set of rules on what to do at a given camera image.

	Iterations: RL training sessions (multiple episodes and timesteps) when a policy/model is saved. So the policy is changed throughout one iteration
				but then saved in a new file at the end of iteration. This allows to test all models later at different stages of training  
'''	

from stable_baselines3 import PPO #PPO
from stable_baselines3.common.env_checker import check_env

import os
from environment import CarEnv
import time

In [2]:

print('This is the start of training script')

print('setting folders for logs and models')


print('connecting to env..')


env = CarEnv()  # Try check_env(env, warn=True) 
check_env(env, warn=True) 
env.reset()


This is the start of training script
setting folders for logs and models
connecting to env..


2024-10-03 16:12:07,447 - INFO - Spawning vehicle at: Location(x=174.330048, y=237.429993, z=0.500000)
2024-10-03 16:12:07,452 - INFO - Current waypoint: Location(x=173.521225, y=236.948929, z=0.000000)
2024-10-03 16:12:07,453 - INFO - Target waypoint: Location(x=170.521225, y=236.947098, z=0.000000)
2024-10-03 16:12:07,455 - INFO - lateral_distance: -0.16019377572214058, speed: 0.0, heading: 0.00019132826063368057
2024-10-03 16:12:07,462 - INFO - Spawning vehicle at: Location(x=189.929993, y=142.190002, z=0.500000)
2024-10-03 16:12:07,465 - INFO - Current waypoint: Location(x=189.707352, y=142.087311, z=0.000000)
2024-10-03 16:12:07,466 - INFO - Target waypoint: Location(x=189.703278, y=145.087296, z=0.000000)
2024-10-03 16:12:07,468 - INFO - lateral_distance: -0.07425969993610584, speed: 0.0, heading: 0.0004326714409722222
2024-10-03 16:12:07,498 - INFO - Spawning vehicle at: Location(x=88.710045, y=237.429993, z=0.500000)
2024-10-03 16:12:07,502 - INFO - Current waypoint: Location(x

(array([0.0361    , 0.        , 0.00032294], dtype=float32), {})

In [3]:

print('Env has been reset as part of launch')

model = PPO(
    'MlpPolicy', 
    env, 
    verbose=1, 
    learning_rate=0.0003, 
    n_steps=1024, 
    batch_size=64, 
    clip_range=0.1, 
    gamma=0.99, 
    normalize_advantage=True,
)

	

Env has been reset as part of launch
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [3]:


# Directory containing saved models
models_dir = "models/0210/1727878874"
logdir = "logs/0210/1727878874"

# Ensure the directories exist
assert os.path.exists(models_dir), f"Model directory {models_dir} does not exist!"
assert os.path.exists(logdir), f"Log directory {logdir} does not exist!"

# List all the saved models in the directory
model_files = [f for f in sorted(os.listdir(models_dir)) if f.endswith('.zip')]
assert len(model_files) > 0, "No models found in the models directory!"

# Load the environment
env = CarEnv()
check_env(env)
env.reset()


2024-10-03 16:12:14,440 - INFO - Spawning vehicle at: Location(x=173.870056, y=105.550011, z=0.500000)
2024-10-03 16:12:14,447 - INFO - Current waypoint: Location(x=172.498642, y=105.426109, z=0.000000)
2024-10-03 16:12:14,448 - INFO - Target waypoint: Location(x=169.498642, y=105.428146, z=0.000000)
2024-10-03 16:12:14,451 - INFO - lateral_distance: -0.041611417809444225, speed: 0.0, heading: -0.00021828545464409722
2024-10-03 16:12:14,460 - INFO - Spawning vehicle at: Location(x=135.879990, y=215.269989, z=0.500000)
2024-10-03 16:12:14,468 - INFO - Current waypoint: Location(x=136.134064, y=215.182022, z=0.000000)
2024-10-03 16:12:14,469 - INFO - Target waypoint: Location(x=136.137115, y=212.182037, z=0.000000)
2024-10-03 16:12:14,472 - INFO - lateral_distance: -0.08466514813394915, speed: 0.0, heading: 0.0003216637505425347
2024-10-03 16:12:14,490 - INFO - Spawning vehicle at: Location(x=165.090042, y=187.119949, z=0.500000)
2024-10-03 16:12:14,497 - INFO - Current waypoint: Locatio

(array([0.13113587, 0.        , 0.00019438], dtype=float32), {})

In [4]:

# Select the most recent model to test
latest_model_path = os.path.join(models_dir, model_files[-1])
print(f"Loading model from {latest_model_path}")
model = PPO.load(latest_model_path, env=env)


Loading model from models/0210/1727878874\750000.zip
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [5]:

# Number of episodes to test
num_test_episodes = 1

for episode in range(num_test_episodes):
    obs, info = env.reset()
    done = False
    total_reward = 0
    timestep = 0

    print(f"Starting Episode {episode + 1}")

    while not done:
        # Get the action from the model
        action, _states = model.predict(obs)
        obs, reward, done, truncated, info = env.step(action)
        print(f"Action: {action}")
        print(f"Obs: {obs}")
        print(f"Reward: {reward}")
        total_reward += reward
        timestep += 1
        if done or truncated:
            break

    print(f"Episode {episode + 1} finished. Total Reward: {total_reward} after {timestep} timesteps.\n")

print("Testing completed.")
env.close()


2024-10-03 16:12:17,528 - INFO - Spawning vehicle at: Location(x=136.110016, y=306.420044, z=0.500000)
2024-10-03 16:12:17,535 - INFO - Current waypoint: Location(x=136.659302, y=306.559845, z=0.000000)
2024-10-03 16:12:17,536 - INFO - Target waypoint: Location(x=139.662720, y=306.556549, z=0.000000)
2024-10-03 16:12:17,540 - INFO - lateral_distance: -0.04712866668060092, speed: 0.0, heading: -0.000436303640405337


Starting Episode 1
Action: [8 2]
Obs: [-0.04712867  0.         -0.0004363 ]
Reward: -5.0
Action: [5 1]
Obs: [-0.04712867  0.01666667 -0.0004363 ]
Reward: -5.0
Action: [8 3]
Obs: [-0.04712867  0.01666667 -0.0004363 ]
Reward: -5.0
Action: [1 3]
Obs: [-0.04712867  0.01666667 -0.0004363 ]
Reward: -5.0
Action: [1 2]
Obs: [-0.04712867  0.03333334 -0.0004363 ]
Reward: -5.0
Action: [5 1]
Obs: [-0.04712867  0.03333334 -0.0004363 ]
Reward: -5.0
Action: [6 1]
Obs: [-0.04712867  0.03333334 -0.0004363 ]
Reward: -5.0
Action: [8 1]
Obs: [-0.04712867  0.05       -0.0004363 ]
Reward: -5.0
Action: [1 1]
Obs: [-0.04712867  0.05       -0.0004363 ]
Reward: -5.0
Action: [8 2]
Obs: [-0.04712867  0.05       -0.0004363 ]
Reward: -5.0
Action: [8 2]
Obs: [-0.04712867  0.06666667 -0.0004363 ]
Reward: -5.0
Action: [3 2]
Obs: [-0.04712867  0.06666667 -0.0004363 ]
Reward: -5.0
Action: [2 0]
Obs: [-0.04712867  0.06666667 -0.0004363 ]
Reward: -5.0
Action: [2 2]
Obs: [-0.04712867  0.08333334 -0.0004363 ]
Reward: -5.0
A

2024-10-03 16:12:19,450 - INFO - lateral_distance: -0.0470385668497924, speed: 0.05, heading: -0.0007792796111769147


Action: [7 1]
Obs: [-0.04705936  0.01666667 -0.00048587]
Reward: -5.0
Action: [2 2]
Obs: [-0.04708747  0.01666667 -0.00051017]
Reward: -5.0
Action: [0 3]
Obs: [-0.04713534  0.03333334 -0.00053177]
Reward: -5.0
Action: [6 0]
Obs: [-0.04717257  0.03333334 -0.00054763]
Reward: -5.0
Action: [3 1]
Obs: [-0.04719935  0.03333334 -0.00053129]
Reward: -5.0
Action: [8 1]
Obs: [-0.0471952   0.05       -0.00053812]
Reward: -5.0
Action: [6 3]
Obs: [-0.04718055  0.05       -0.00062026]
Reward: -5.0
Action: [8 2]
Obs: [-0.04709433  0.05       -0.00070369]
Reward: -5.0
s: 3.0, loc: Location(x=136.195694, y=306.420197, z=0.220957)
dest: Location(x=139.662720, y=306.556549, z=0.000000)
vehicle_transform: Transform(Location(x=136.195694, y=306.420197, z=0.220957), Rotation(pitch=0.042907, yaw=0.061553, roll=-0.002655)), self.dest_waypoint: Waypoint(Transform(Location(x=139.662720, y=306.556549, z=0.000000), Rotation(pitch=0.000000, yaw=-0.078718, roll=0.000000))), velocity: Vector3D(x=1.067896, y=0.02370

2024-10-03 16:12:20,996 - INFO - lateral_distance: -0.023859102530424014, speed: 0.2, heading: -0.008711344955696


Action: [5 2]
Obs: [-0.02752082  0.18333334 -0.00852293]
Reward: 4.999784888888889
Action: [8 2]
Obs: [-0.0268958   0.18333334 -0.00873206]
Reward: 4.999784888888889
Action: [3 2]
Obs: [-0.02634188  0.18333334 -0.00896048]
Reward: 4.999784888888889
Action: [0 3]
Obs: [-0.02585904  0.18333334 -0.0090253 ]
Reward: 4.999784888888889
Action: [0 1]
Obs: [-0.02551849  0.18333334 -0.00905511]
Reward: 4.999784888888889
Action: [5 3]
Obs: [-0.02515748  0.18333334 -0.00902239]
Reward: 4.999784888888889
Action: [8 3]
Obs: [-0.024776   0.2       -0.0090869]
Reward: 4.999831
Action: [3 0]
Obs: [-0.02443506  0.2        -0.00904453]
Reward: 4.999831
Action: [3 0]
Obs: [-0.02418555  0.2        -0.00899899]
Reward: 4.999831
Action: [1 1]
Obs: [-0.0240173   0.2        -0.00881861]
Reward: 4.999831
s: 12.0, loc: Location(x=138.653214, y=306.486359, z=0.221520)
dest: Location(x=139.662720, y=306.556549, z=0.000000)
vehicle_transform: Transform(Location(x=138.653214, y=306.486359, z=0.221520), Rotation(pit

2024-10-03 16:12:21,285 - INFO - Passed waypoint, moving to next. Reward: 14.999871555555556


Action: [7 1]
Obs: [-0.01389808  0.21666667 -0.01091816]
Reward: 14.999871555555556
Action: [8 2]
Obs: [-0.01333136  0.21666667 -0.01104795]
Reward: 4.999871555555556
Action: [7 0]
Obs: [-0.0126629   0.21666667 -0.01120385]
Reward: 4.999871555555556
Action: [7 2]
Obs: [-0.01189269  0.21666667 -0.0114902 ]
Reward: 4.999871555555556
Action: [6 2]
Obs: [-0.01103091  0.21666667 -0.0117901 ]
Reward: 4.999871555555556
Action: [8 1]
Obs: [-0.01007756  0.21666667 -0.01220881]
Reward: 4.999871555555556
Action: [3 0]
Obs: [-0.00910382  0.21666667 -0.01256305]
Reward: 4.999871555555556
Action: [0 0]
Obs: [-0.00822161  0.21666667 -0.01288143]
Reward: 4.999871555555556
Action: [6 3]
Obs: [-0.0073292   0.21666667 -0.01313049]
Reward: 4.999871555555556
Action: [5 2]
Obs: [-0.00645713  0.21666667 -0.0134138 ]
Reward: 4.999871555555556
Action: [5 3]
Obs: [-0.00557485  0.21666667 -0.01363908]
Reward: 4.999871555555556
Action: [3 2]
Obs: [-0.00477392  0.21666667 -0.01383292]
Reward: 4.999871555555556
Act

2024-10-03 16:12:22,451 - INFO - Passed waypoint, moving to next. Reward: 14.999906555555555
2024-10-03 16:12:22,511 - INFO - lateral_distance: 0.04166736163899851, speed: 0.23333333333333334, heading: -0.01585972338087029


Action: [0 2]
Obs: [ 0.03611816  0.21666667 -0.01685996]
Reward: 4.999871555555556
Action: [8 0]
Obs: [ 0.03684803  0.23333333 -0.01683905]
Reward: 4.9999065555555555
Action: [1 3]
Obs: [ 0.03746603  0.23333333 -0.01678945]
Reward: 4.9999065555555555
Action: [1 3]
Obs: [ 0.03800268  0.23333333 -0.01661171]
Reward: 4.9999065555555555
Action: [0 0]
Obs: [ 0.03839693  0.23333333 -0.01638192]
Reward: 4.9999065555555555
Action: [8 1]
Obs: [ 0.03880139  0.23333333 -0.01611601]
Reward: 4.9999065555555555
Action: [5 0]
Obs: [ 0.03924596  0.23333333 -0.01597076]
Reward: 14.999906555555555
Action: [5 0]
Obs: [ 0.03980303  0.23333333 -0.01587122]
Reward: 4.9999065555555555
Action: [7 0]
Obs: [ 0.04045163  0.23333333 -0.01591137]
Reward: 4.9999065555555555
Action: [3 0]
Obs: [ 0.04111037  0.23333333 -0.01589931]
Reward: 4.9999065555555555
s: 14.0, loc: Location(x=142.510132, y=306.677643, z=0.221676)
dest: Location(x=145.662720, y=306.548309, z=0.000000)
vehicle_transform: Transform(Location(x=142

2024-10-03 16:12:23,611 - INFO - Passed waypoint, moving to next. Reward: 14.999936


Action: [3 2]
Obs: [ 0.06109073  0.25       -0.00920735]
Reward: 4.999936
Action: [0 2]
Obs: [ 0.06127292  0.25       -0.00899189]
Reward: 4.999936
Action: [8 1]
Obs: [ 0.06146527  0.25       -0.00882964]
Reward: 4.999936
Action: [5 2]
Obs: [ 0.06174916  0.25       -0.00873002]
Reward: 4.999936
Action: [2 1]
Obs: [ 0.06200254  0.25       -0.00864673]
Reward: 4.999936
Action: [0 2]
Obs: [ 0.06221523  0.25       -0.0084867 ]
Reward: 4.999936
Action: [1 3]
Obs: [ 0.06232619  0.25       -0.00828605]
Reward: 4.999936
Action: [7 0]
Obs: [ 0.06245751  0.25       -0.00808476]
Reward: 4.999936
Action: [2 1]
Obs: [ 0.06255833  0.25       -0.00789259]
Reward: 4.999936
Action: [1 2]
Obs: [ 0.06259813  0.25       -0.00762059]
Reward: 4.999936
Action: [1 3]
Obs: [ 0.06252605  0.25       -0.00730323]
Reward: 4.999936
Action: [8 0]
Obs: [ 0.06247374  0.25       -0.00698449]
Reward: 14.999936
Action: [8 1]
Obs: [ 0.06249326  0.25       -0.00678458]
Reward: 4.999936
Action: [5 0]
Obs: [ 0.06267554  0.25

2024-10-03 16:12:24,018 - INFO - lateral_distance: 0.07267582729143404, speed: 0.25, heading: -0.008172625593013233


Action: [5 0]
Obs: [ 0.06878664  0.25       -0.00764735]
Reward: 4.999936
Action: [0 0]
Obs: [ 0.06922329  0.25       -0.00773774]
Reward: 4.999936
Action: [5 2]
Obs: [ 0.06969045  0.25       -0.00781619]
Reward: 4.999936
Action: [0 2]
Obs: [ 0.07007624  0.25       -0.007855  ]
Reward: 4.999936
Action: [8 3]
Obs: [ 0.0705129   0.25       -0.00789821]
Reward: 4.999936
Action: [7 0]
Obs: [ 0.0710004  0.25      -0.0080287]
Reward: 4.999936
Action: [3 2]
Obs: [ 0.07146757  0.25       -0.00809481]
Reward: 4.999936
Action: [6 2]
Obs: [ 0.07194491  0.25       -0.00820416]
Reward: 4.999936
Action: [2 1]
Obs: [ 0.07235106  0.25       -0.00821742]
Reward: 4.999936
s: 15.0, loc: Location(x=146.582092, y=306.765076, z=0.221663)
dest: Location(x=148.662720, y=306.544189, z=0.000000)
vehicle_transform: Transform(Location(x=146.582092, y=306.765076, z=0.221663), Rotation(pitch=0.002630, yaw=1.392355, roll=-0.003784)), self.dest_waypoint: Waypoint(Transform(Location(x=148.662720, y=306.544189, z=0.000

2024-10-03 16:12:24,662 - INFO - Passed waypoint, moving to next. Reward: 14.999959888888888


Action: [0 3]
Obs: [ 0.07052038  0.26666668 -0.00140409]
Reward: 4.999959888888889
Action: [7 2]
Obs: [ 0.07094803  0.26666668 -0.00171747]
Reward: 4.999959888888889
Action: [8 0]
Obs: [ 0.0714571   0.26666668 -0.00209128]
Reward: 4.999959888888889
Action: [7 2]
Obs: [ 0.07210866  0.26666668 -0.00254933]
Reward: 4.999959888888889
Action: [3 0]
Obs: [ 0.07276029  0.26666668 -0.00298846]
Reward: 4.999959888888889
Action: [0 2]
Obs: [ 0.0733408   0.26666668 -0.00333936]
Reward: 4.999959888888889
Action: [7 2]
Obs: [ 0.07392143  0.26666668 -0.00366576]
Reward: 4.999959888888889
Action: [1 1]
Obs: [ 0.07442016  0.26666668 -0.00389773]
Reward: 14.999959888888888
Action: [6 1]
Obs: [ 0.07488907  0.26666668 -0.00410147]
Reward: 4.999959888888889
Action: [1 0]
Obs: [ 0.07525634  0.26666668 -0.00420723]
Reward: 4.999959888888889
Action: [3 2]
Obs: [ 0.07551178  0.26666668 -0.00420592]
Reward: 4.999959888888889
Action: [2 3]
Obs: [ 0.07562488  0.26666668 -0.00409089]
Reward: 4.999959888888889
Act

2024-10-03 16:12:25,569 - INFO - lateral_distance: 0.056182502542124334, speed: 0.26666666666666666, heading: 0.008262393582198355


Action: [2 0]
Obs: [0.06221677 0.26666668 0.00625942]
Reward: 4.999959888888889
Action: [4 0]
Obs: [0.06188203 0.26666668 0.00627875]
Reward: 4.999959888888889
Action: [4 3]
Obs: [0.0615676  0.26666668 0.00629164]
Reward: 4.999959888888889
Action: [0 2]
Obs: [0.0612023  0.26666668 0.0063425 ]
Reward: 4.999959888888889
Action: [3 0]
Obs: [0.06076573 0.26666668 0.00645694]
Reward: 4.999959888888889
Action: [0 1]
Obs: [0.0602274  0.26666668 0.00665513]
Reward: 4.999959888888889
Action: [3 0]
Obs: [0.05964833 0.26666668 0.00687317]
Reward: 4.999959888888889
Action: [1 0]
Obs: [0.05899799 0.26666668 0.00714613]
Reward: 4.999959888888889
Action: [8 1]
Obs: [0.05836795 0.26666668 0.00737467]
Reward: 4.999959888888889
Action: [2 2]
Obs: [0.05771751 0.26666668 0.00761745]
Reward: 4.999959888888889
Action: [2 2]
Obs: [0.05699582 0.26666668 0.00790347]
Reward: 4.999959888888889
s: 16.0, loc: Location(x=151.195587, y=306.709259, z=0.221689)
dest: Location(x=151.662720, y=306.540070, z=0.000000)
ve

2024-10-03 16:12:25,631 - INFO - Passed waypoint, moving to next. Reward: 14.999959888888888


Action: [1 2]
Obs: [0.0539764  0.26666668 0.00902076]
Reward: 4.999959888888889
Action: [5 1]
Obs: [0.05338622 0.26666668 0.00912638]
Reward: 14.999959888888888
Action: [0 3]
Obs: [0.05278644 0.26666668 0.00923641]
Reward: 4.999959888888889
Action: [5 2]
Obs: [0.05226804 0.26666668 0.00927795]
Reward: 4.999959888888889
Action: [3 2]
Obs: [0.05171914 0.26666668 0.0093439 ]
Reward: 4.999959888888889
Action: [1 1]
Obs: [0.05109903 0.26666668 0.00947479]
Reward: 4.999959888888889
Action: [6 1]
Obs: [0.05051963 0.26666668 0.00956075]
Reward: 4.999959888888889
Action: [1 1]
Obs: [0.04991991 0.26666668 0.00966671]
Reward: 4.999959888888889
Action: [7 1]
Obs: [0.04939142 0.26666668 0.00969587]
Reward: 4.999959888888889
Action: [2 0]
Obs: [0.04884259 0.26666668 0.00976301]
Reward: 4.999959888888889
Action: [0 2]
Obs: [0.04821237 0.26666668 0.0098844 ]
Reward: 4.999959888888889
Action: [5 1]
Obs: [0.04763305 0.26666668 0.00997375]
Reward: 4.999959888888889
Action: [6 2]
Obs: [0.04714528 0.266666

2024-10-03 16:12:26,728 - INFO - Passed waypoint, moving to next. Reward: 14.999959888888888


Action: [7 3]
Obs: [0.01606024 0.26666668 0.01321251]
Reward: 4.999959888888889
Action: [0 3]
Obs: [0.01520667 0.26666668 0.01336849]
Reward: 4.999959888888889
Action: [2 2]
Obs: [0.01431183 0.26666668 0.01355431]
Reward: 14.999959888888888
Action: [6 0]
Obs: [0.01347865 0.26666668 0.0136891 ]
Reward: 4.999959888888889
Action: [8 2]
Obs: [0.01275739 0.26666668 0.01370503]
Reward: 4.999959888888889
Action: [2 0]
Obs: [0.01204634 0.26666668 0.01371979]
Reward: 4.999959888888889
Action: [0 1]
Obs: [0.01129464 0.26666668 0.01376844]
Reward: 4.999959888888889
Action: [2 3]
Obs: [0.01047174 0.26666668 0.01388628]
Reward: 4.999959888888889
Action: [8 1]
Obs: [0.00967941 0.26666668 0.01396579]
Reward: 4.999959888888889
Action: [7 2]
Obs: [0.008999   0.26666668 0.0139468 ]
Reward: 4.999959888888889
Action: [3 0]
Obs: [0.00830845 0.26666668 0.01393239]
Reward: 4.999959888888889
Action: [1 2]
Obs: [0.00756707 0.26666668 0.01396494]
Reward: 4.999959888888889
Action: [5 2]
Obs: [0.00688675 0.266666

2024-10-03 16:12:27,258 - INFO - lateral_distance: -0.0071058544894048135, speed: 0.2833333333333333, heading: 0.013887969363066886


Action: [1 2]
Obs: [0.00036373 0.28333333 0.01275713]
Reward: 4.999978222222222
Action: [7 2]
Obs: [-0.00040761  0.28333333  0.01288089]
Reward: 4.999978222222222
Action: [2 2]
Obs: [-0.00125013  0.28333333  0.01305094]
Reward: 4.999978222222222
Action: [3 0]
Obs: [-0.00213329  0.28333333  0.01325388]
Reward: 4.999978222222222
Action: [5 2]
Obs: [-0.0029757   0.28333333  0.01339782]
Reward: 4.999978222222222
Action: [2 0]
Obs: [-0.0038384   0.28333333  0.01354194]
Reward: 4.999978222222222
Action: [0 0]
Obs: [-0.00475194  0.28333333  0.01372674]
Reward: 4.999978222222222
Action: [7 0]
Obs: [-0.00562476  0.28333333  0.01385779]
Reward: 4.999978222222222
Action: [8 2]
Obs: [-0.00635515  0.28333333  0.01387542]
Reward: 4.999978222222222
s: 17.0, loc: Location(x=155.851608, y=306.513000, z=0.221627)
dest: Location(x=157.662704, y=306.531830, z=0.000000)
vehicle_transform: Transform(Location(x=155.851608, y=306.513000, z=0.221627), Rotation(pitch=0.008811, yaw=-2.578552, roll=0.006385)), se

2024-10-03 16:12:27,789 - INFO - Passed waypoint, moving to next. Reward: 14.999978222222222


Action: [2 0]
Obs: [-0.02577454  0.28333333  0.01314673]
Reward: 4.999978222222222
Action: [5 2]
Obs: [-0.02637237  0.28333333  0.01307959]
Reward: 4.999978222222222
Action: [0 2]
Obs: [-0.02704142  0.28333333  0.01307315]
Reward: 4.999978222222222
Action: [2 2]
Obs: [-0.02780264  0.28333333  0.01314758]
Reward: 14.999978222222222
Action: [8 1]
Obs: [-0.02853276  0.28333333  0.01320014]
Reward: 4.999978222222222
Action: [4 0]
Obs: [-0.02927305  0.28333333  0.01323727]
Reward: 4.999978222222222
Action: [2 3]
Obs: [-0.03002355  0.28333333  0.01330559]
Reward: 4.999978222222222
Action: [3 0]
Obs: [-0.03086562  0.28333333  0.01343529]
Reward: 4.999978222222222
Action: [2 0]
Obs: [-0.03179931  0.28333333  0.01364603]
Reward: 4.999978222222222
Action: [0 2]
Obs: [-0.03285512  0.28333333  0.01395172]
Reward: 4.999978222222222
Action: [7 1]
Obs: [-0.03393132  0.28333333  0.01425062]
Reward: 4.999978222222222
Action: [7 2]
Obs: [-0.03491599  0.28333333  0.01446509]
Reward: 4.999978222222222
Act

2024-10-03 16:12:28,940 - INFO - Passed waypoint, moving to next. Reward: 14.999991


Action: [8 0]
Obs: [-0.07842362  0.28333333  0.01705841]
Reward: 4.999978222222222
Action: [8 2]
Obs: [-0.07927592  0.28333333  0.0170128 ]
Reward: 4.999978222222222
Action: [1 2]
Obs: [-0.08012816  0.28333333  0.01698076]
Reward: 4.999978222222222
Action: [0 2]
Obs: [-0.08104136  0.28333333  0.01699958]
Reward: 4.999978222222222
Action: [0 2]
Obs: [-0.08203586  0.28333333  0.01709418]
Reward: 4.999978222222222
Action: [8 3]
Obs: [-0.08300991  0.28333333  0.01716132]
Reward: 4.999978222222222
Action: [3 1]
Obs: [-0.08402455  0.3         0.01725203]
Reward: 4.999991
Action: [5 1]
Obs: [-0.08495829  0.3         0.01726508]
Reward: 14.999991
Action: [0 2]
Obs: [-0.08595236  0.3         0.01732646]
Reward: 4.999991
Action: [8 2]
Obs: [-0.08688527  0.3         0.01730051]
Reward: 4.999991
Action: [8 2]
Obs: [-0.087696    0.3         0.01727797]
Reward: 4.999991
Action: [0 3]
Obs: [-0.08861864  0.3         0.01702603]
Reward: 4.999991


2024-10-03 16:12:29,034 - INFO - lateral_distance: -0.08942948437054248, speed: 0.3, heading: 0.017222189613514478


s: 18.0, loc: Location(x=160.746536, y=306.259308, z=0.221884)
dest: Location(x=163.662704, y=306.523590, z=0.000000)
vehicle_transform: Transform(Location(x=160.746536, y=306.259308, z=0.221884), Rotation(pitch=0.010928, yaw=-3.178712, roll=-0.003876)), self.dest_waypoint: Waypoint(Transform(Location(x=163.662704, y=306.523590, z=0.000000), Rotation(pitch=0.000000, yaw=-0.078718, roll=0.000000))), velocity: Vector3D(x=5.088342, y=-0.270948, z=0.002407)
obs: [-0.08942948  0.3         0.01722219]
Action: [0 0]
Obs: [-0.08942948  0.3         0.01722219]
Reward: 4.999991
Action: [1 2]
Obs: [-0.09044385  0.3         0.01705688]
Reward: 4.999991
Action: [0 0]
Obs: [-0.09140738  0.3         0.017368  ]
Reward: 4.999991
Action: [3 0]
Obs: [-0.09255406  0.3         0.01730865]
Reward: 4.999991
Action: [0 1]
Obs: [-0.09360923  0.3         0.01771233]
Reward: 4.999991
Action: [5 2]
Obs: [-0.09481701  0.3         0.01769843]
Reward: 4.999991
Action: [3 1]
Obs: [-0.09590273  0.3         0.01811127

2024-10-03 16:12:30,992 - INFO - lateral_distance: -0.1327652407777102, speed: 0.3, heading: 0.003848364328344663


s: 18.0, loc: Location(x=165.875824, y=306.122253, z=0.221858)
dest: Location(x=163.662704, y=306.523590, z=0.000000)
vehicle_transform: Transform(Location(x=165.875824, y=306.122253, z=0.221858), Rotation(pitch=-0.002199, yaw=-0.771423, roll=-0.016479)), self.dest_waypoint: Waypoint(Transform(Location(x=163.662704, y=306.523590, z=0.000000), Rotation(pitch=0.000000, yaw=-0.078718, roll=0.000000))), velocity: Vector3D(x=5.064521, y=0.017549, z=-0.000008)
obs: [-0.13276523  0.3         0.00384836]
Action: [5 3]
Obs: [-0.13276523  0.3         0.00384836]
Reward: 4.999991
Action: [8 1]
Obs: [-0.13266057  0.3         0.0035715 ]
Reward: 4.999991
Action: [6 2]
Obs: [-0.13252549  0.3         0.00326751]
Reward: 4.999991
Action: [5 2]
Obs: [-0.13234963  0.3         0.0029486 ]
Reward: 4.999991
Action: [5 3]
Obs: [-0.13218403  0.3         0.00264987]
Reward: 4.999991
Action: [5 3]
Obs: [-0.13197763  0.3         0.00233452]
Reward: 4.999991
Action: [6 2]
Obs: [-0.13174084  0.3         0.0020181

2024-10-03 16:12:33,015 - INFO - lateral_distance: -0.1697982276285053, speed: 0.36666666666666664, heading: 0.007100181819664107


Action: [8 3]
Obs: [-0.16909902  0.35        0.00782514]
Reward: 10.0
Action: [4 0]
Obs: [-0.1693051   0.35        0.00754625]
Reward: 10.0
Action: [1 1]
Obs: [-0.16955166  0.36666667  0.00731652]
Reward: 10.0
s: 22.0, loc: Location(x=171.506851, y=306.003418, z=0.221757)
dest: Location(x=163.662704, y=306.523590, z=0.000000)
vehicle_transform: Transform(Location(x=171.506851, y=306.003418, z=0.221757), Rotation(pitch=0.014883, yaw=-1.356750, roll=-0.018494)), self.dest_waypoint: Waypoint(Transform(Location(x=163.662704, y=306.523590, z=0.000000), Rotation(pitch=0.000000, yaw=-0.078718, roll=0.000000))), velocity: Vector3D(x=6.134828, y=-0.061303, z=-0.000001)
obs: [-0.16979823  0.36666667  0.00710018]
Action: [8 0]
Obs: [-0.16979823  0.36666667  0.00710018]
Reward: 10.0
Action: [0 3]
Obs: [-0.17012599  0.36666667  0.00696302]
Reward: 10.0
Action: [0 2]
Obs: [-0.17052497  0.36666667  0.0069103 ]
Reward: 10.0
Action: [8 0]
Obs: [-0.1709441   0.36666667  0.00686503]
Reward: 10.0
Action: 

2024-10-03 16:12:35,119 - INFO - lateral_distance: -0.12935691272967142, speed: 0.4166666666666667, heading: -0.013674591936998897


Action: [8 0]
Obs: [-0.13795725  0.41666666 -0.01447161]
Reward: 10.0
Action: [1 3]
Obs: [-0.1368979   0.41666666 -0.01448307]
Reward: 10.0
Action: [6 2]
Obs: [-0.13579777  0.41666666 -0.01453315]
Reward: 10.0
Action: [1 0]
Obs: [-0.13473831  0.41666666 -0.01454475]
Reward: 10.0
Action: [0 0]
Obs: [-0.13374993  0.41666666 -0.01447375]
Reward: 10.0
Action: [7 0]
Obs: [-0.13275142  0.41666666 -0.01441624]
Reward: 10.0
Action: [0 2]
Obs: [-0.13183415  0.41666666 -0.01428775]
Reward: 10.0
Action: [2 0]
Obs: [-0.13098814  0.41666666 -0.01409607]
Reward: 10.0
Action: [7 1]
Obs: [-0.13016234  0.41666666 -0.0138965 ]
Reward: 10.0
s: 25.0, loc: Location(x=178.160568, y=306.115601, z=0.221789)
dest: Location(x=163.662704, y=306.523590, z=0.000000)
vehicle_transform: Transform(Location(x=178.160568, y=306.115601, z=0.221789), Rotation(pitch=0.009432, yaw=2.382709, roll=0.013375)), self.dest_waypoint: Waypoint(Transform(Location(x=163.662704, y=306.523590, z=0.000000), Rotation(pitch=0.000000, yaw

2024-10-03 16:12:37,236 - INFO - lateral_distance: 4.640452831152034e-05, speed: 0.4, heading: -0.017060658956567446


Action: [1 0]
Obs: [-0.0043589  0.4       -0.0174556]
Reward: 10.0
Action: [0 2]
Obs: [-0.00312782  0.4        -0.01747066]
Reward: 10.0
Action: [3 3]
Obs: [-0.00196802  0.4        -0.01743324]
Reward: 10.0
Action: [1 2]
Obs: [-0.00092007  0.4        -0.01728353]
Reward: 10.0
s: 24.0, loc: Location(x=185.132568, y=306.494232, z=0.220653)
dest: Location(x=163.662704, y=306.523590, z=0.000000)
vehicle_transform: Transform(Location(x=185.132568, y=306.494232, z=0.220653), Rotation(pitch=-0.002964, yaw=2.992201, roll=-0.030457)), self.dest_waypoint: Waypoint(Transform(Location(x=163.662704, y=306.523590, z=0.000000), Rotation(pitch=0.000000, yaw=-0.078718, roll=0.000000))), velocity: Vector3D(x=6.682970, y=0.260352, z=-0.001794)
obs: [ 4.640453e-05  4.000000e-01 -1.706066e-02]
Action: [1 0]
Obs: [ 4.640453e-05  4.000000e-01 -1.706066e-02]
Reward: 10.0
Action: [5 0]
Obs: [ 0.00096208  0.4        -0.01679592]
Reward: 10.0
Action: [0 3]
Obs: [ 0.00182678  0.4        -0.01650413]
Reward: 10.0


2024-10-03 16:12:39,456 - INFO - lateral_distance: 0.018249127759476097, speed: 0.36666666666666664, heading: -0.00648773606452677


Action: [3 2]
Obs: [ 0.01559309  0.36666667 -0.00528958]
Reward: 10.0
Action: [1 3]
Obs: [ 0.01609981  0.36666667 -0.00564092]
Reward: 10.0
Action: [8 2]
Obs: [ 0.01668818  0.36666667 -0.00576251]
Reward: 10.0
Action: [2 1]
Obs: [ 0.01714398  0.36666667 -0.00605852]
Reward: 10.0
Action: [8 2]
Obs: [ 0.0177018   0.36666667 -0.00613754]
Reward: 10.0
s: 22.0, loc: Location(x=191.583160, y=306.539978, z=0.301692)
dest: Location(x=163.662704, y=306.523590, z=0.000000)
vehicle_transform: Transform(Location(x=191.583160, y=306.539978, z=0.301692), Rotation(pitch=0.913203, yaw=1.089075, roll=-1.787292)), self.dest_waypoint: Waypoint(Transform(Location(x=163.662704, y=306.523590, z=0.000000), Rotation(pitch=0.000000, yaw=-0.078718, roll=0.000000))), velocity: Vector3D(x=6.228670, y=0.196798, z=0.005401)
obs: [ 0.01824913  0.36666667 -0.00648774]
Action: [5 2]
Obs: [ 0.01824913  0.36666667 -0.00648774]
Reward: 10.0
Action: [6 2]
Obs: [ 0.01893915  0.36666667 -0.00666958]
Reward: 10.0
Action: [3 

2024-10-03 16:12:41,718 - INFO - lateral_distance: -0.06036763039815088, speed: 0.3333333333333333, heading: 0.009542094709144699


Action: [0 2]
Obs: [-0.05925506  0.35        0.01585313]
Reward: 10.0
Action: [7 2]
Obs: [-0.05960461  0.35        0.01497524]
Reward: 10.0
Action: [7 0]
Obs: [-0.05973037  0.35        0.01442237]
Reward: 10.0
Action: [3 0]
Obs: [-0.05993756  0.35        0.01349751]
Reward: 10.0
Action: [7 0]
Obs: [-0.0599515   0.35        0.01294413]
Reward: 10.0
Action: [0 1]
Obs: [-0.06010789  0.35        0.01204098]
Reward: 10.0
Action: [7 0]
Obs: [-0.06009139  0.33333334  0.01153371]
Reward: 10.0
Action: [0 2]
Obs: [-0.06022753  0.33333334  0.01069787]
Reward: 10.0
Action: [8 2]
Obs: [-0.06020091  0.33333334  0.01027537]
Reward: 10.0
s: 20.0, loc: Location(x=197.639450, y=306.295807, z=0.322973)
dest: Location(x=163.662704, y=306.523590, z=0.000000)
vehicle_transform: Transform(Location(x=197.639450, y=306.295807, z=0.322973), Rotation(pitch=-0.119419, yaw=-1.796295, roll=-0.042358)), self.dest_waypoint: Waypoint(Transform(Location(x=163.662704, y=306.523590, z=0.000000), Rotation(pitch=0.000000, 

2024-10-03 16:12:42,751 - INFO - Collision detected, ending episode


Action: [8 0]
Obs: [-0.0766603   0.31666666  0.00884969]
Reward: 4.999998222222223
Action: [4 2]
Obs: [-0.07719499  0.31666666  0.00882917]
Reward: 4.999998222222223
Action: [1 3]
Obs: [-0.07766913  0.31666666  0.00904737]
Reward: 4.999998222222223
Action: [5 2]
Obs: [-0.07820398  0.31666666  0.00898651]
Reward: 4.999998222222223
Action: [0 2]
Obs: [-0.07868849  0.3         0.0091786 ]
Reward: 4.999991
Action: [0 0]
Obs: [-0.07931499  0.31666666  0.00923539]
Reward: 4.999998222222223
Action: [6 2]
Obs: [-0.07985043  0.3         0.00948445]
Reward: 4.999991
Action: [3 1]
Obs: [-0.07985043  0.3         0.00948445]
Reward: -1005.0
Episode 1 finished. Total Reward: 7039.942743000017 after 1345 timesteps.

Testing completed.


## Train

In [None]:


TIMESTEPS = 250000 # how long is each training iteration - individual steps
iters = 0
while iters<4:  # how many training iterations you want 
	iters += 1
	print('Iteration ', iters,' is to commence...')
	model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name=f"PPO" )
	print('Iteration ', iters,' has been trained')
	model.save(f"{models_dir}/{TIMESTEPS*iters}")
