In [12]:
'''
Terminology:
	Episode: one go of the car trying to "live" in the simulation and earn max rewards.
				Episode start from spawning a new car and ends with either car crashing or episode duration limit running out

	Timestep: one frame through simulation: the car gets a obs, reward from prior step and then it makes a decision on control input and sends it to simulation

	Reward logic: each timestep a logic is applied to calculate a reward from latest step. 
				This logic represents you describing the desired behaviour the car needs to learn.

	Policy/model: our objective, what we need learn as part of RL. This is the latest set of rules on what to do at a given camera image.

	Iterations: RL training sessions (multiple episodes and timesteps) when a policy/model is saved. So the policy is changed throughout one iteration
				but then saved in a new file at the end of iteration. This allows to test all models later at different stages of training  
'''	

from stable_baselines3 import PPO #PPO
from stable_baselines3.common.env_checker import check_env

import os
from environment import CarEnv
import time

In [17]:


env = CarEnv()  # Try check_env(env, warn=True) 
check_env(env, warn=True) 
env.reset()


2024-10-23 08:45:39,790 - INFO - Starting episode 1
2024-10-23 08:45:39,829 - INFO - Spawning vehicle at: Location(x=173.870056, y=105.550011, z=0.500000)
2024-10-23 08:45:39,834 - INFO - Current waypoint: Location(x=172.498642, y=105.426109, z=0.000000)
2024-10-23 08:45:39,835 - INFO - Target waypoint: Location(x=169.498642, y=105.428146, z=0.000000)
2024-10-23 08:45:39,842 - INFO - lateral_distance: -0.041611417809444225, speed: 0.0, heading: -0.9999979654947917
2024-10-23 08:45:39,844 - INFO - Starting episode 2
2024-10-23 08:45:39,849 - INFO - Spawning vehicle at: Location(x=162.920029, y=237.429962, z=0.500000)
2024-10-23 08:45:39,855 - INFO - Current waypoint: Location(x=161.521225, y=236.941635, z=0.000000)
2024-10-23 08:45:39,856 - INFO - Target waypoint: Location(x=158.521225, y=236.939819, z=0.000000)
2024-10-23 08:45:39,865 - INFO - lateral_distance: -0.1624902950196552, speed: 0.0, heading: -0.9999979654947917
2024-10-23 08:45:39,874 - INFO - Starting episode 3
2024-10-23 0

(array([ 0.15974912,  0.        , -0.09865252], dtype=float32), {})

In [18]:

print('Env has been reset as part of launch')

model = PPO(
    'MlpPolicy', 
    env, 
    verbose=1, 
    learning_rate=0.0003, 
    n_steps=1024, 
    batch_size=64, 
    clip_range=0.1, 
    gamma=0.99, 
    normalize_advantage=True,
)

	

Env has been reset as part of launch
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [23]:
state = env.reset()
action = [2, 5]

obs, reward, done, truncated, info = env.step(action)

print(f"Initial State: {state}")
print(f"Action: {action}")
print(f"Next State: {obs}")
print(f"Reward: {reward}")
print(f"Done: {done}")
print(f"Truncated: {truncated}")
print(f"Info: {info}")


2024-10-23 08:46:17,350 - INFO - Starting episode 9
2024-10-23 08:46:17,356 - INFO - Spawning vehicle at: Location(x=111.080048, y=302.570007, z=0.500000)
2024-10-23 08:46:17,362 - INFO - Current waypoint: Location(x=109.660667, y=302.550690, z=0.000000)
2024-10-23 08:46:17,363 - INFO - Target waypoint: Location(x=106.660667, y=302.549683, z=0.000000)
2024-10-23 08:46:17,369 - INFO - lateral_distance: -0.006275688430543787, speed: 0.0, heading: -0.9999979654947917


Initial State: (array([-0.00627569,  0.        , -0.999998  ], dtype=float32), {})
Action: [2, 5]
Next State: [-0.00627569  0.02222222 -0.999998  ]
Reward: -9.999383014569144
Done: False
Truncated: False
Info: {}


Loading model from models/0210/1727878874\750000.zip
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [24]:

# Number of episodes to test
num_test_episodes = 1

for episode in range(num_test_episodes):
    obs, info = env.reset()
    done = False
    total_reward = 0
    timestep = 0

    print(f"Starting Episode {episode + 1}")

    while not done:
        # Get the action from the model
        action, _states = model.predict(obs)
        obs, reward, done, truncated, info = env.step(action)
        print(f"Action: {action}")
        print(f"Obs: {obs}")
        print(f"Reward: {reward}")
        total_reward += reward
        timestep += 1
        if done or truncated:
            break

    print(f"Episode {episode + 1} finished. Total Reward: {total_reward} after {timestep} timesteps.\n")

print("Testing completed.")
env.close()


2024-10-23 08:46:21,701 - INFO - Starting episode 10
2024-10-23 08:46:21,707 - INFO - Spawning vehicle at: Location(x=55.410038, y=105.549988, z=0.500000)
2024-10-23 08:46:21,713 - INFO - Current waypoint: Location(x=55.501392, y=105.394341, z=0.000000)
2024-10-23 08:46:21,714 - INFO - Target waypoint: Location(x=52.501400, y=105.392303, z=0.000000)
2024-10-23 08:46:21,721 - INFO - lateral_distance: -0.051902021572299395, speed: 0.0, heading: 0.9986363728841146


Starting Episode 1
Action: [3 4]
Obs: [-0.05190202  0.02222222  0.99863636]
Reward: -9.999374557346155
Action: [6 0]
Obs: [-0.05190202  0.02222222  0.99863636]
Reward: -9.999374557346155
Action: [6 0]
Obs: [-0.05190202  0.02222222  0.99863636]
Reward: -9.999374557346155
Action: [4 0]
Obs: [-0.05190202  0.04444445  0.99863636]
Reward: -9.999374557346155
Action: [3 1]
Obs: [-0.05190202  0.04444445  0.99863636]
Reward: -9.999374557346155
Action: [1 5]
Obs: [-0.05190202  0.04444445  0.99863636]
Reward: -9.999374557346155
Action: [7 4]
Obs: [-0.05190202  0.06666667  0.99863636]
Reward: -9.999374557346155
Action: [3 1]
Obs: [-0.05190202  0.06666667  0.99863636]
Reward: -9.999374557346155
Action: [3 5]
Obs: [-0.05190202  0.06666667  0.99863636]
Reward: -9.999374557346155
Action: [3 0]
Obs: [-0.05190202  0.08888889  0.99863636]
Reward: -9.999374557346155
Action: [6 2]
Obs: [-0.05190202  0.08888889  0.99863636]
Reward: -9.999374557346155
Action: [5 0]
Obs: [-0.05190202  0.08888889  0.99863636]


2024-10-23 08:46:22,463 - INFO - lateral_distance: -0.051915356436758396, speed: 0.0, heading: 0.9986137390136719
2024-10-23 08:46:22,465 - INFO - vehicle_transform: Transform(Location(x=55.407307, y=105.550026, z=0.221222), Rotation(pitch=0.003948, yaw=179.996292, roll=0.000336)), self.dest_waypoint: Waypoint(Transform(Location(x=52.501400, y=105.392303, z=0.000000), Rotation(pitch=0.000000, yaw=-179.961029, roll=0.000000))), velocity: Vector3D(x=-0.267123, y=0.008016, z=0.003632)


Action: [0 2]
Obs: [-0.05190202  0.          0.99863636]
Reward: -9.999374557346155
Action: [8 2]
Obs: [-0.05190202  0.          0.99863636]
Reward: -9.999374557346155
Action: [1 3]
Obs: [-0.05190202  0.          0.99863636]
Reward: -9.999374557346155
Action: [4 5]
Obs: [-0.05190202  0.          0.99863636]
Reward: -9.999374557346155
Action: [3 3]
Obs: [-0.05190202  0.          0.99863636]
Reward: -9.999374557346155
Action: [7 2]
Obs: [-0.05190202  0.          0.99863636]
Reward: -9.999374557346155
Action: [4 5]
Obs: [-0.05190202  0.          0.99863636]
Reward: -9.999374557346155
Action: [7 3]
Obs: [-0.05190202  0.          0.99863636]
Reward: -9.999374557346155
Action: [7 1]
Obs: [-0.05190202  0.          0.99863636]
Reward: -9.999374557346155
Action: [7 3]
Obs: [-0.05190202  0.          0.99863636]
Reward: -9.999374557346155
Action: [8 3]
Obs: [-0.05190202  0.          0.99863636]
Reward: -9.999374557346155
Action: [1 4]
Obs: [-0.05190202  0.          0.99863636]
Reward: -9.99937455

2024-10-23 08:46:23,247 - INFO - lateral_distance: -0.052780414490778034, speed: 0.17777777777777778, heading: 0.9976919386121962
2024-10-23 08:46:23,248 - INFO - vehicle_transform: Transform(Location(x=53.913616, y=105.551605, z=0.221523), Rotation(pitch=0.026535, yaw=179.830368, roll=0.003350)), self.dest_waypoint: Waypoint(Transform(Location(x=52.501400, y=105.392303, z=0.000000), Rotation(pitch=0.000000, yaw=-179.961029, roll=0.000000))), velocity: Vector3D(x=-2.480397, y=0.051550, z=-0.000001)


Action: [6 0]
Obs: [-0.05249098  0.17777778  0.997928  ]
Reward: 0.0003981720760659613
Action: [4 5]
Obs: [-0.05243792  0.17777778  0.9979989 ]
Reward: 0.00039772589749631493
Action: [3 5]
Obs: [-0.05242298  0.17777778  0.997976  ]
Reward: 0.0003978699636704164
Action: [0 5]
Obs: [-0.05249717  0.17777778  0.99793285]
Reward: 0.00039814164475693303
Action: [0 2]
Obs: [-0.05263238  0.17777778  0.99778533]
Reward: 0.0003990712633328286
Action: [7 1]
Obs: [-0.05278042  0.17777778  0.9976919 ]
Reward: 0.00039966073119757084
Action: [4 3]
Obs: [-0.05288267  0.17777778  0.9975951 ]
Reward: 0.0004002727121736882
Action: [0 0]
Obs: [-0.05300792  0.2         0.99753326]
Reward: 0.0004633793898678462
Action: [0 1]
Obs: [-0.0531535  0.2        0.9974002]
Reward: 0.00046422094400888625
Action: [8 1]
Obs: [-0.05328393  0.2         0.997345  ]
Reward: 0.00046457098219132575
Action: [2 5]
Obs: [-0.05340162  0.2         0.9972442 ]
Reward: 0.0004652098168858032
Action: [8 3]
Obs: [-0.05348126  0.2     

2024-10-23 08:46:23,598 - INFO - Passed waypoint. Reward: 0.000573149435231457


Action: [0 5]
Obs: [-0.05453997  0.22222222  0.9968848 ]
Reward: 0.0005203321300424335
Action: [2 2]
Obs: [-0.05446503  0.22222222  0.9970044 ]
Reward: 0.0005195718995150145
Action: [6 2]
Obs: [-0.05434936  0.22222222  0.9971059 ]
Reward: 0.000518926686601695
Action: [2 4]
Obs: [-0.05429227  0.22222222  0.99719906]
Reward: 0.0005183353684614644
Action: [0 4]
Obs: [-0.05428856  0.22222222  0.997195  ]
Reward: 0.0005183611834134183
Action: [1 4]
Obs: [-0.05438412  0.22222222  0.9971551 ]
Reward: 0.0005186145483433435
Action: [1 3]
Obs: [-0.05455342  0.22222222  0.9970061 ]
Reward: 0.0005195605837968742
Action: [6 1]
Obs: [-0.05473806  0.22222222  0.996904  ]
Reward: 0.000520210216345518
Action: [5 5]
Obs: [-0.05483878  0.22222222  0.9968378 ]
Reward: 0.0005206316189738658
Action: [4 2]
Obs: [-0.0549396   0.22222222  0.9968218 ]
Reward: 0.0005207330994201342
Action: [2 3]
Obs: [-0.05505821  0.22222222  0.9967488 ]
Reward: 0.0005211986051509854
Action: [0 5]
Obs: [-0.05524559  0.22222222  

2024-10-23 08:46:24,141 - INFO - lateral_distance: -0.05930876440722049, speed: 0.3333333333333333, heading: 0.9991171095106337
2024-10-23 08:46:24,142 - INFO - vehicle_transform: Transform(Location(x=50.561481, y=105.567924, z=0.221511), Rotation(pitch=0.028584, yaw=-179.992447, roll=0.000462)), self.dest_waypoint: Waypoint(Transform(Location(x=49.498619, y=105.390747, z=0.000000), Rotation(pitch=0.000000, yaw=-180.040375, roll=0.000000))), velocity: Vector3D(x=-4.359118, y=-0.001527, z=-0.000001)


Action: [5 3]
Obs: [-0.05999885  0.31111112  0.998692  ]
Reward: 0.0006215270853058286
Action: [6 2]
Obs: [-0.05992294  0.31111112  0.9987386 ]
Reward: 0.0006212357448758254
Action: [7 0]
Obs: [-0.05978087  0.33333334  0.9988499 ]
Reward: 5.000624108685867
Action: [4 5]
Obs: [-0.05966926  0.33333334  0.99892503]
Reward: 5.0006236401700495
Action: [3 2]
Obs: [-0.05967204  0.33333334  0.9989006 ]
Reward: 5.0006237924255394
Action: [4 5]
Obs: [-0.05967223  0.33333334  0.99888414]
Reward: 5.00062389500748
Action: [6 5]
Obs: [-0.05962405  0.33333334  0.9989134 ]
Reward: 5.000623712592505
Action: [6 4]
Obs: [-0.05949954  0.33333334  0.9990128 ]
Reward: 5.000623092780393
Action: [0 3]
Obs: [-0.05940294  0.33333334  0.9990801 ]
Reward: 5.000622673581094
Action: [4 3]
Obs: [-0.05932663  0.33333334  0.9991265 ]
Reward: 5.0006223849516855
Action: [0 0]
Obs: [-0.05930876  0.33333334  0.99911714]
Reward: 5.000622443511
Action: [4 5]
Obs: [-0.05929339  0.33333334  0.9991118 ]
Reward: 5.0006224767498

2024-10-23 08:46:24,312 - INFO - Passed waypoint. Reward: 5.000623062149231


Action: [0 4]
Obs: [-0.05672734  0.35555556 -0.9993312 ]
Reward: 5.000621112780155
Action: [5 2]
Obs: [-0.0564595   0.35555556 -0.9991949 ]
Reward: 5.000621959375209
Action: [5 0]
Obs: [-0.05708646  0.35555556 -0.9980929 ]
Reward: 5.000623062149231
Action: [2 5]
Obs: [-0.05677066  0.35555556 -0.99796873]
Reward: 5.00062963190772
Action: [8 0]
Obs: [-0.05644463  0.35555556 -0.9978399 ]
Reward: 5.000630443087186
Action: [1 2]
Obs: [-0.05619997  0.35555556 -0.99778926]
Reward: 5.000630762718947
Action: [6 5]
Obs: [-0.05592476  0.35555556 -0.9977112 ]
Reward: 5.000631255311593
Action: [8 3]
Obs: [-0.05558087  0.35555556 -0.9975761 ]
Reward: 5.0006321082285465
Action: [4 3]
Obs: [-0.05527764  0.35555556 -0.9974812 ]
Reward: 5.000632708583717
Action: [5 4]
Obs: [-0.05495147  0.35555556 -0.99737334]
Reward: 5.000633391107126
Action: [6 5]
Obs: [-0.05456424  0.35555556 -0.9972155 ]
Reward: 5.0006343915375595
Action: [2 3]
Obs: [-0.05422778  0.35555556 -0.9971055 ]
Reward: 5.000635089871943
Act

2024-10-23 08:46:24,740 - INFO - Passed waypoint. Reward: 5.00061772586383
2024-10-23 08:46:24,894 - INFO - lateral_distance: -0.037935730323818355, speed: 0.4, heading: -0.9970052083333333
2024-10-23 08:46:24,895 - INFO - vehicle_transform: Transform(Location(x=45.601917, y=105.516495, z=0.221847), Rotation(pitch=0.002138, yaw=-179.460938, roll=-0.016174)), self.dest_waypoint: Waypoint(Transform(Location(x=43.492844, y=105.410301, z=0.000000), Rotation(pitch=0.000000, yaw=179.793152, roll=0.000000))), velocity: Vector3D(x=-5.183213, y=-0.134098, z=-0.000110)


Action: [8 2]
Obs: [-0.04440439  0.4        -0.99985665]
Reward: 5.000617857822099
Action: [6 3]
Obs: [-0.0445452  0.4        0.9999534]
Reward: 5.0006172605693955
Action: [7 0]
Obs: [-0.04457676  0.4         0.9998711 ]
Reward: 5.000617768799538
Action: [7 2]
Obs: [-0.04444532  0.4         0.99994755]
Reward: 5.000617296670813
Action: [8 1]
Obs: [-0.04420879  0.4        -0.999878  ]
Reward: 5.00061772586383
Action: [5 3]
Obs: [-0.04388136  0.4        -0.9996271 ]
Reward: 5.0006192776299025
Action: [0 5]
Obs: [-0.04357956  0.4        -0.9994021 ]
Reward: 5.000620672290706
Action: [0 1]
Obs: [-0.04334608  0.4        -0.9992579 ]
Reward: 5.000621567806804
Action: [5 1]
Obs: [-0.0431102   0.4        -0.99911714]
Reward: 5.000622443511
Action: [7 0]
Obs: [-0.04281296  0.4        -0.9989275 ]
Reward: 5.000623624840825
Action: [6 4]
Obs: [-0.04247021  0.4        -0.9987116 ]
Reward: 5.00062497260702
Action: [1 0]
Obs: [-0.04211696  0.4        -0.9985002 ]
Reward: 5.000626294619057
Action: [2

2024-10-23 08:46:25,164 - INFO - Passed waypoint. Reward: 5.000669864510009


Action: [1 4]
Obs: [-0.02108266  0.4        -0.9918616 ]
Reward: 5.000669277687704
Action: [5 3]
Obs: [-0.02050206  0.4        -0.99179214]
Reward: 5.000669742449482
Action: [0 5]
Obs: [-0.01997103  0.4        -0.9917739 ]
Reward: 5.000669864510009
Action: [8 0]
Obs: [-0.01941575  0.4        -0.99173754]
Reward: 5.000670108129812
Action: [6 0]
Obs: [-0.01878388  0.4        -0.99163276]
Reward: 5.00067081052284
Action: [8 0]
Obs: [-0.01810395  0.4        -0.99148506]
Reward: 5.000671801715162
Action: [2 4]
Obs: [-0.01740082  0.4        -0.99132895]
Reward: 5.000672851400104
Action: [8 2]
Obs: [-0.01668015  0.4        -0.9911601 ]
Reward: 5.00067398841001
Action: [8 4]
Obs: [-0.0158676   0.4        -0.99091196]
Reward: 5.000675662593024
Action: [4 5]
Obs: [-0.01507302  0.4        -0.9906916 ]
Reward: 5.000677152651819
Action: [2 3]
Obs: [-0.01434932  0.4        -0.9905526 ]
Reward: 5.000678094586996
Action: [1 0]
Obs: [-0.01369951  0.4        -0.9904864 ]
Reward: 5.00067854361529
Action:

2024-10-23 08:46:25,522 - INFO - Passed waypoint. Reward: 5.000681512588056


Action: [8 1]
Obs: [-0.00255254  0.42222223 -0.99123967]
Reward: 5.000673452200826
Action: [1 1]
Obs: [-0.00195856  0.42222223 -0.99120235]
Reward: 5.000673703406528
Action: [5 4]
Obs: [-0.00133935  0.42222223 -0.99114776]
Reward: 5.000674071249255
Action: [4 4]
Obs: [-7.2745036e-04  4.2222223e-01 -9.9110359e-01]
Reward: 5.000674368983247
Action: [5 2]
Obs: [-9.5436932e-05  4.2222223e-01 -9.9104631e-01]
Reward: 5.000674755490525
Action: [7 5]
Obs: [ 6.1062980e-04  4.2222223e-01 -9.9092346e-01]
Reward: 5.000675584711825
Action: [5 0]
Obs: [ 0.00134446  0.42222223 -0.99077827]
Reward: 5.00067656632716
Action: [4 5]
Obs: [ 0.00205826  0.42222223 -0.9906616 ]
Reward: 5.000677355861192
Action: [6 2]
Obs: [ 0.00277692  0.42222223 -0.9905443 ]
Reward: 5.00067815091483
Action: [1 1]
Obs: [ 0.00343995  0.42222223 -0.99048734]
Reward: 5.000678537288883
Action: [1 5]
Obs: [ 0.00404176  0.42222223 -0.9904871 ]
Reward: 5.000678539014261
Action: [4 4]
Obs: [ 0.00465151  0.42222223 -0.9904863 ]
Rewar

2024-10-23 08:46:25,592 - INFO - lateral_distance: 0.0200652995115286, speed: 0.4222222222222222, heading: -0.9897510104709202
2024-10-23 08:46:25,593 - INFO - vehicle_transform: Transform(Location(x=40.303375, y=105.361618, z=0.221818), Rotation(pitch=0.004501, yaw=-178.155182, roll=0.002272)), self.dest_waypoint: Waypoint(Transform(Location(x=37.492882, y=105.431961, z=0.000000), Rotation(pitch=0.000000, yaw=179.793152, roll=0.000000))), velocity: Vector3D(x=-5.483558, y=-0.155798, z=0.000000)


Action: [1 1]
Obs: [ 0.01946272  0.42222223 -0.98969704]
Reward: 5.000683920418643
Action: [8 3]
Obs: [ 0.0200653   0.42222223 -0.989751  ]
Reward: 5.000683551257537
Action: [3 0]
Obs: [ 0.02063251  0.42222223 -0.9898336 ]
Reward: 5.000682987179746
Action: [1 3]
Obs: [ 0.02117141  0.42222223 -0.98994225]
Reward: 5.000682245437678
Action: [5 0]
Obs: [ 0.02172578  0.42222223 -0.99003005]
Reward: 5.0006816466152495
Action: [7 2]
Obs: [ 0.0223103   0.42222223 -0.99008155]
Reward: 5.000681296004985
Action: [5 5]
Obs: [ 0.02298654  0.42222223 -0.9900473 ]
Reward: 5.000681529340014
Action: [3 5]
Obs: [ 0.02365481  0.42222223 -0.99002075]
Reward: 5.000681710171837
Action: [1 3]
Obs: [ 0.02427241  0.42222223 -0.99003845]
Reward: 5.000681589419388
Action: [2 5]
Obs: [ 0.02484387  0.42222223 -0.9900922 ]
Reward: 5.000681223248521
Action: [6 5]
Obs: [ 0.02542827  0.42222223 -0.9901352 ]
Reward: 5.0006809305688815
Action: [1 1]
Obs: [ 0.02599196  0.42222223 -0.9901912 ]
Reward: 5.0006805491773285
A

2024-10-23 08:46:25,941 - INFO - Passed waypoint. Reward: 5.00069870518441


Action: [5 4]
Obs: [ 0.03816722  0.42222223 -0.98933345]
Reward: 5.0006864112212135
Action: [0 5]
Obs: [ 0.03904301  0.42222223 -0.98913085]
Reward: 5.000687803127878
Action: [8 4]
Obs: [ 0.03991085  0.42222223 -0.9889438 ]
Reward: 5.000689090379657
Action: [3 2]
Obs: [ 0.04070772  0.42222223 -0.9888302 ]
Reward: 5.000689873476208
Action: [8 0]
Obs: [ 0.04150938  0.42222223 -0.9887163 ]
Reward: 5.000690659804155
Action: [4 0]
Obs: [ 0.04230365  0.42222223 -0.9886239 ]
Reward: 5.000691298183541
Action: [5 5]
Obs: [ 0.04310264  0.42222223 -0.9885286 ]
Reward: 5.000691957093548
Action: [6 1]
Obs: [ 0.0439756   0.42222223 -0.98837525]
Reward: 5.000693018882343
Action: [0 2]
Obs: [ 0.04482028  0.42222223 -0.9882514 ]
Reward: 5.000693877602268
Action: [3 2]
Obs: [ 0.04560669  0.42222223 -0.9881905 ]
Reward: 5.000694300593508
Action: [4 3]
Obs: [ 0.04638006  0.42222223 -0.98813975]
Reward: 5.000694652596233
Action: [5 0]
Obs: [ 0.04719436  0.42222223 -0.98805964]
Reward: 5.000695209218547
Act

2024-10-23 08:46:26,303 - INFO - lateral_distance: 0.09176301426976317, speed: 0.4222222222222222, heading: -0.9879079182942708
2024-10-23 08:46:26,304 - INFO - vehicle_transform: Transform(Location(x=34.993904, y=105.165688, z=0.221812), Rotation(pitch=0.005416, yaw=-177.823425, roll=0.002033)), self.dest_waypoint: Waypoint(Transform(Location(x=34.492901, y=105.442787, z=0.000000), Rotation(pitch=0.000000, yaw=179.793152, roll=0.000000))), velocity: Vector3D(x=-5.370464, y=-0.197105, z=0.000001)
2024-10-23 08:46:26,352 - INFO - Passed waypoint. Reward: 5.000699175562913


Action: [4 0]
Obs: [ 0.08165672  0.42222223 -0.98838407]
Reward: 5.000692957795647
Action: [8 5]
Obs: [ 0.08241745  0.42222223 -0.9883393 ]
Reward: 5.00069326798383
Action: [8 0]
Obs: [ 0.08325478  0.42222223 -0.98822784]
Reward: 5.000694041120474
Action: [4 2]
Obs: [ 0.08406647  0.42222223 -0.98813915]
Reward: 5.000694656717723
Action: [6 3]
Obs: [ 0.08491156  0.42222223 -0.9880299 ]
Reward: 5.000695416077541
Action: [7 4]
Obs: [ 0.08578691  0.42222223 -0.98789513]
Reward: 5.000696353902192
Action: [0 2]
Obs: [ 0.08664225  0.42222223 -0.98778695]
Reward: 5.000697107435758
Action: [0 2]
Obs: [ 0.08745414  0.42222223 -0.98772323]
Reward: 5.000697551906277
Action: [1 5]
Obs: [ 0.08817987  0.42222223 -0.98774654]
Reward: 5.000697389334337
Action: [6 5]
Obs: [ 0.08890794  0.42222223 -0.9877633 ]
Reward: 5.000697272305999
Action: [1 4]
Obs: [ 0.08960584  0.42222223 -0.987815  ]
Reward: 5.000696911887564
Action: [7 2]
Obs: [ 0.09033658  0.42222223 -0.98783475]
Reward: 5.000696774268708
Actio

2024-10-23 08:46:26,968 - INFO - lateral_distance: 0.1927843708498129, speed: 0.4444444444444444, heading: -0.9838066948784723
2024-10-23 08:46:26,969 - INFO - vehicle_transform: Transform(Location(x=29.465506, y=104.882584, z=0.221816), Rotation(pitch=0.004815, yaw=-177.085205, roll=0.003676)), self.dest_waypoint: Waypoint(Transform(Location(x=31.492924, y=105.453621, z=0.000000), Rotation(pitch=0.000000, yaw=179.793152, roll=0.000000))), velocity: Vector3D(x=-5.678137, y=-0.292641, z=0.000001)


Action: [4 0]
Obs: [ 0.17148851  0.44444445 -0.98273164]
Reward: 5.0007332491331375
Action: [1 5]
Obs: [ 0.17263952  0.44444445 -0.9826664 ]
Reward: 5.000733727838023
Action: [0 0]
Obs: [ 0.17370696  0.44444445 -0.98268926]
Reward: 5.000733559944901
Action: [6 1]
Obs: [ 0.17478187  0.44444445 -0.9827062 ]
Reward: 5.000733435604392
Action: [4 1]
Obs: [ 0.17585967  0.44444445 -0.98271924]
Reward: 5.00073333987656
Action: [3 0]
Obs: [ 0.1769144   0.44444445 -0.98275554]
Reward: 5.000733073893381
Action: [1 1]
Obs: [ 0.17789565  0.44444445 -0.9828621 ]
Reward: 5.000732293280669
Action: [4 2]
Obs: [ 0.17889194  0.44444445 -0.98294675]
Reward: 5.000731673481965
Action: [3 3]
Obs: [ 0.179858    0.44444445 -0.9830541 ]
Reward: 5.000730888784817
Action: [2 5]
Obs: [ 0.18079583  0.44444445 -0.9831887 ]
Reward: 5.000729905693922
Action: [6 0]
Obs: [ 0.18175429  0.44444445 -0.9832978 ]
Reward: 5.000729109914615
Action: [6 3]
Obs: [ 0.18274553  0.44444445 -0.9833637 ]
Reward: 5.000728629899101
Acti

2024-10-23 08:46:27,660 - INFO - lateral_distance: 0.285023463124892, speed: 0.4, heading: -0.9865655687120226


Action: [8 4]
Obs: [ 0.26361045  0.42222223 -0.98171943]
Reward: -3.268620910130735
Action: [0 0]
Obs: [ 0.2647055   0.42222223 -0.9817234 ]
Reward: -3.280992110505977
Action: [0 2]
Obs: [ 0.26575693  0.42222223 -0.98176295]
Reward: -3.292830276359566
Action: [8 5]
Obs: [ 0.26681864  0.42222223 -0.9817941 ]
Reward: -3.304744516397669
Action: [0 3]
Obs: [ 0.26783922  0.42222223 -0.98185664]
Reward: -3.316159121937748
Action: [3 1]
Obs: [ 0.2687862   0.42222223 -0.9819846 ]
Reward: -3.326717031873786
Action: [2 2]
Obs: [ 0.269687    0.42222223 -0.9821448 ]
Reward: -3.336729553548236
Action: [2 0]
Obs: [ 0.27045056  0.42222223 -0.9824239 ]
Reward: -3.345194171469818
Action: [6 3]
Obs: [ 0.27121115  0.42222223 -0.9826899 ]
Reward: -3.3536041880709977
Action: [8 3]
Obs: [ 0.27201766  0.42222223 -0.9829042 ]
Reward: -3.3624976318804074
Action: [3 3]
Obs: [ 0.2728263   0.42222223 -0.98310053]
Reward: -3.3713895974852766
Action: [3 1]
Obs: [ 0.27358428  0.42222223 -0.98333657]
Reward: -3.37970

2024-10-23 08:46:27,661 - INFO - vehicle_transform: Transform(Location(x=23.863073, y=104.626091, z=0.221904), Rotation(pitch=-0.010314, yaw=-177.581802, roll=0.007123)), self.dest_waypoint: Waypoint(Transform(Location(x=31.492924, y=105.453621, z=0.000000), Rotation(pitch=0.000000, yaw=179.793152, roll=0.000000))), velocity: Vector3D(x=-5.186290, y=-0.175870, z=0.000001)


Action: [7 2]
Obs: [ 0.28502345  0.4        -0.9865656 ]
Reward: -3.50237612430833
Action: [0 1]
Obs: [ 0.2856838  0.4       -0.9866829]
Reward: -3.5092931924066044
Action: [2 4]
Obs: [ 0.28631592  0.4        -0.98682207]
Reward: -3.515897561020635
Action: [5 5]
Obs: [ 0.2869763  0.4       -0.9869337]
Reward: -3.522779047598864
Action: [2 2]
Obs: [ 0.28762376  0.4        -0.98704475]
Reward: -3.529507524048221
Action: [1 2]
Obs: [ 0.28821048  0.4        -0.9872103 ]
Reward: -3.5355899222743625
Action: [4 1]
Obs: [ 0.28882498  0.4        -0.9873431 ]
Reward: -3.5419439394530783
Action: [1 1]
Obs: [ 0.2894194   0.4        -0.98748744]
Reward: -3.5480755623301703
Action: [7 3]
Obs: [ 0.2900594  0.4       -0.9875819]
Reward: -3.5546592488142186
Action: [5 2]
Obs: [ 0.2907988  0.4       -0.9875834]
Reward: -3.5622429516693974
Action: [0 0]
Obs: [ 0.2915329  0.4       -0.9875858]
Reward: -3.569748521789396
Action: [7 4]
Obs: [ 0.29231304  0.4        -0.98754984]
Reward: -3.577698686351692
Ac

2024-10-23 08:46:28,370 - INFO - lateral_distance: 0.35966015806966256, speed: 0.4, heading: -0.986905754937066
2024-10-23 08:46:28,372 - INFO - vehicle_transform: Transform(Location(x=18.726797, y=104.420723, z=0.221857), Rotation(pitch=-0.001619, yaw=-177.643036, roll=-0.014648)), self.dest_waypoint: Waypoint(Transform(Location(x=31.492924, y=105.453621, z=0.000000), Rotation(pitch=0.000000, yaw=179.793152, roll=0.000000))), velocity: Vector3D(x=-5.035600, y=-0.260432, z=-0.000019)


Action: [4 5]
Obs: [ 0.35085383  0.4        -0.9886445 ]
Reward: -4.093531274695944
Action: [5 3]
Obs: [ 0.3515302  0.4       -0.9886289]
Reward: -4.098536764381622
Action: [8 1]
Obs: [ 0.3522495   0.4        -0.98857415]
Reward: -4.103836264057221
Action: [6 0]
Obs: [ 0.35308096  0.4        -0.98841953]
Reward: -4.109930852928519
Action: [7 2]
Obs: [ 0.3539706  0.4       -0.9882144]
Reward: -4.116415764510046
Action: [6 2]
Obs: [ 0.35499015  0.4        -0.98790306]
Reward: -4.123801290311721
Action: [5 0]
Obs: [ 0.35599676  0.4        -0.9876109 ]
Reward: -4.131045203110596
Action: [0 5]
Obs: [ 0.35696042  0.4        -0.9873765 ]
Reward: -4.137935864239188
Action: [7 0]
Obs: [ 0.357906  0.4      -0.987166]
Reward: -4.144655374879497
Action: [3 3]
Obs: [ 0.35878068  0.4        -0.9870336 ]
Reward: -4.15083409830531
Action: [8 0]
Obs: [ 0.35966015  0.4        -0.98690575]
Reward: -4.15701108883566
Action: [4 5]
Obs: [ 0.36052212  0.4        -0.9868031 ]
Reward: -4.1630302384049696
Actio

2024-10-23 08:46:29,071 - INFO - lateral_distance: 0.40164820525731737, speed: 0.4, heading: -0.9967601352267795
2024-10-23 08:46:29,073 - INFO - vehicle_transform: Transform(Location(x=13.615994, y=104.313210, z=0.211756), Rotation(pitch=-0.414306, yaw=-179.416824, roll=0.890776)), self.dest_waypoint: Waypoint(Transform(Location(x=31.492924, y=105.453621, z=0.000000), Rotation(pitch=0.000000, yaw=179.793152, roll=0.000000))), velocity: Vector3D(x=-5.165397, y=0.012236, z=-0.036701)


Action: [5 0]
Obs: [ 0.39817482  0.4        -0.99465615]
Reward: -4.393686243393631
Action: [6 1]
Obs: [ 0.39865142  0.4        -0.9948636 ]
Reward: -4.396219833841912
Action: [0 0]
Obs: [ 0.3989068  0.4       -0.9946883]
Reward: -4.397571876916964
Action: [1 0]
Obs: [ 0.3994599   0.4        -0.99493843]
Reward: -4.400495201302836
Action: [3 0]
Obs: [ 0.39973858  0.4        -0.9948778 ]
Reward: -4.4019622272415235
Action: [3 2]
Obs: [ 0.40013137  0.4        -0.9952625 ]
Reward: -4.404027857302877
Action: [6 1]
Obs: [ 0.40021917  0.4        -0.99528307]
Reward: -4.404488191510527
Action: [7 0]
Obs: [ 0.40045926  0.4        -0.9956621 ]
Reward: -4.405747882757694
Action: [7 3]
Obs: [ 0.40054432  0.4        -0.9956294 ]
Reward: -4.406192486938244
Action: [1 2]
Obs: [ 0.4007615  0.4       -0.9959695]
Reward: -4.407329045647723
Action: [5 3]
Obs: [ 0.4008134   0.4        -0.99591947]
Reward: -4.407599609703537
Action: [6 1]
Obs: [ 0.40109155  0.4        -0.9962025 ]
Reward: -4.4090510666475

2024-10-23 08:46:29,806 - INFO - lateral_distance: 0.40883276028467236, speed: 0.4222222222222222, heading: -0.9995240953233507
2024-10-23 08:46:29,808 - INFO - vehicle_transform: Transform(Location(x=8.258745, y=104.310997, z=0.210758), Rotation(pitch=-0.439181, yaw=-179.914337, roll=0.889301)), self.dest_waypoint: Waypoint(Transform(Location(x=31.492924, y=105.453621, z=0.000000), Rotation(pitch=0.000000, yaw=179.793152, roll=0.000000))), velocity: Vector3D(x=-5.538078, y=0.107022, z=0.039881)


Action: [2 4]
Obs: [ 0.40902027  0.42222223 -0.9995009 ]
Reward: -4.449116357179777
Action: [0 0]
Obs: [ 0.40883276  0.42222223 -0.9995241 ]
Reward: -4.448197566442798
Action: [6 0]
Obs: [ 0.4087902   0.42222223 -0.9999377 ]
Reward: -4.4479913479980135
Action: [3 3]
Obs: [ 0.40855682  0.42222223 -0.9999661 ]
Reward: -4.446845633458706
Action: [4 5]
Obs: [0.40850395 0.42222223 0.99964327]
Reward: -4.446583692718043
Action: [6 3]
Obs: [0.40839    0.42222223 0.9997406 ]
Reward: -4.446023748254829
Action: [2 3]
Obs: [0.4084311  0.42222223 0.99946177]
Reward: -4.44622433194389
Action: [8 5]
Obs: [0.4083908  0.42222223 0.999649  ]
Reward: -4.446027138372972
Action: [5 5]
Obs: [0.40855387 0.42222223 0.99950314]
Reward: -4.446828152856651
Action: [3 3]
Obs: [0.4085618  0.42222223 0.99974036]
Reward: -4.446868615818131
Action: [7 3]
Obs: [0.4087375  0.42222223 0.9996053 ]
Reward: -4.447730794709488
Action: [4 3]
Obs: [0.4087505  0.42222223 0.9998491 ]
Reward: -4.447796058127337
Action: [0 4]
Ob

2024-10-23 08:46:30,619 - INFO - lateral_distance: 0.4140084104901583, speed: 0.4666666666666667, heading: 0.9964202880859375
2024-10-23 08:46:30,620 - INFO - vehicle_transform: Transform(Location(x=2.411097, y=104.316582, z=0.240298), Rotation(pitch=0.865767, yaw=179.355652, roll=-1.634033)), self.dest_waypoint: Waypoint(Transform(Location(x=31.492924, y=105.453621, z=0.000000), Rotation(pitch=0.000000, yaw=179.793152, roll=0.000000))), velocity: Vector3D(x=-6.107675, y=0.116589, z=0.037476)


Action: [1 1]
Obs: [0.41974077 0.46666667 0.99887973]
Reward: -4.499458454661819
Action: [6 2]
Obs: [0.41980106 0.46666667 0.9988174 ]
Reward: -4.499729296668939
Action: [5 3]
Obs: [0.41986275 0.46666667 0.9990047 ]
Reward: -4.500007790312489
Action: [0 2]
Obs: [0.41998392 0.46666667 0.9989738 ]
Reward: -4.500551985239766
Action: [2 0]
Obs: [0.4200097  0.46666667 0.9991172 ]
Reward: -4.500668715786868
Action: [2 1]
Obs: [0.41999105 0.46666667 0.9990245 ]
Reward: -4.500584302305287
Action: [0 2]
Obs: [0.41984665 0.46666667 0.9990445 ]
Reward: -4.499935731785934
Action: [1 2]
Obs: [0.41961667 0.46666667 0.9987359 ]
Reward: -4.498898938710551
Action: [1 3]
Obs: [0.419243   0.46666667 0.99856347]
Reward: -4.497212469024575
Action: [1 2]
Obs: [0.41872302 0.46666667 0.9980828 ]
Reward: -4.494855647071051
Action: [3 5]
Obs: [0.418113   0.46666667 0.99775434]
Reward: -4.492079718584379
Action: [6 4]
Obs: [0.41754973 0.46666667 0.997235  ]
Reward: -4.489503074047129
Action: [3 5]
Obs: [0.416944

2024-10-23 08:46:31,382 - INFO - lateral_distance: 0.43332639475053036, speed: 0.5333333333333333, heading: -0.994012451171875
2024-10-23 08:46:31,383 - INFO - vehicle_transform: Transform(Location(x=-4.047334, y=104.281944, z=0.299722), Rotation(pitch=1.033449, yaw=-178.922241, roll=-1.487976)), self.dest_waypoint: Waypoint(Transform(Location(x=31.492924, y=105.453621, z=0.000000), Rotation(pitch=0.000000, yaw=179.793152, roll=0.000000))), velocity: Vector3D(x=-6.928198, y=-0.097530, z=-0.015622)


Action: [8 3]
Obs: [ 0.42868614  0.53333336 -0.9928097 ]
Reward: -4.5382522566647845
Action: [1 5]
Obs: [ 0.42917585  0.53333336 -0.99291146]
Reward: -4.540298568811235
Action: [2 0]
Obs: [ 0.4296621   0.53333336 -0.9932096 ]
Reward: -4.542323796555466
Action: [4 0]
Obs: [ 0.43012664  0.53333336 -0.9933154 ]
Reward: -4.544249665291501
Action: [3 0]
Obs: [ 0.43061817  0.53333336 -0.99359155]
Reward: -4.546280702875294
Action: [5 3]
Obs: [ 0.43112856  0.53333336 -0.99363494]
Reward: -4.54837945720324
Action: [7 1]
Obs: [ 0.43174732  0.53333336 -0.99377817]
Reward: -4.550912720251637
Action: [2 1]
Obs: [ 0.43231636  0.53333336 -0.9937595 ]
Reward: -4.553230084118968
Action: [0 0]
Obs: [ 0.43289208  0.53333336 -0.9939318 ]
Reward: -4.555564899563918
Action: [0 3]
Obs: [ 0.4333264   0.53333336 -0.9940125 ]
Reward: -4.557318618021924
Action: [7 2]
Obs: [ 0.43382326  0.53333336 -0.9942434 ]
Reward: -4.559318117792445
Action: [4 1]
Obs: [ 0.43428048  0.53333336 -0.99429286]
Reward: -4.56114989

2024-10-23 08:46:32,144 - INFO - lateral_distance: 0.46878090644771514, speed: 0.5333333333333333, heading: -0.9946311950683594


Action: [5 2]
Obs: [ 0.456532    0.53333336 -0.9952722 ]
Reward: -4.6423332238360375
Action: [6 1]
Obs: [ 0.457139    0.53333336 -0.99507236]
Reward: -4.64433902607498
Action: [2 4]
Obs: [ 0.45776165  0.53333336 -0.9948636 ]
Reward: -4.646385750001797
Action: [2 1]
Obs: [ 0.4583104   0.53333336 -0.99470735]
Reward: -4.6481806577110625
Action: [2 0]
Obs: [ 0.45875522  0.53333336 -0.9946624 ]
Reward: -4.649629939635697
Action: [1 1]
Obs: [ 0.45911592  0.53333336 -0.9947129 ]
Reward: -4.650801727381086
Action: [5 2]
Obs: [ 0.4595182   0.53333336 -0.99474275]
Reward: -4.652103954351264
Action: [6 1]
Obs: [ 0.45994034  0.53333336 -0.9947861 ]
Reward: -4.653465873719056
Action: [5 4]
Obs: [ 0.4604416   0.53333336 -0.9947484 ]
Reward: -4.65507599210536
Action: [8 4]
Obs: [ 0.46091717  0.53333336 -0.99462813]
Reward: -4.656596621206858
Action: [5 1]
Obs: [ 0.46150482  0.53333336 -0.9944535 ]
Reward: -4.658466854751703
Action: [2 2]
Obs: [ 0.46207702  0.53333336 -0.99430084]
Reward: -4.66027894

2024-10-23 08:46:32,146 - INFO - vehicle_transform: Transform(Location(x=-10.793550, y=104.199936, z=0.314866), Rotation(pitch=-0.125334, yaw=-179.033615, roll=0.216103)), self.dest_waypoint: Waypoint(Transform(Location(x=31.492924, y=105.453621, z=0.000000), Rotation(pitch=0.000000, yaw=179.793152, roll=0.000000))), velocity: Vector3D(x=-6.708284, y=-0.084481, z=-0.003537)
2024-10-23 08:46:32,275 - INFO - Collision detected, resetting vehicle.


Action: [3 4]
Obs: [ 0.4687809   0.53333336 -0.9946312 ]
Reward: -4.680866205294212
Action: [7 4]
Obs: [ 0.46919084  0.53333336 -0.9947126 ]
Reward: -4.682086931189857
Action: [5 5]
Obs: [ 0.4695767   0.53333336 -0.9945952 ]
Reward: -4.683230736299747
Action: [8 2]
Obs: [ 0.470119    0.53333336 -0.9945247 ]
Reward: -4.684832460959617
Action: [3 0]
Obs: [ 0.47059426  0.53333336 -0.99432933]
Reward: -4.686228979835479
Action: [8 3]
Obs: [ 0.4711698   0.53333336 -0.9942645 ]
Reward: -4.687913571674349
Action: [8 0]
Obs: [ 0.47172892  0.53333336 -0.994     ]
Reward: -4.689540723545562
Action: [7 3]
Obs: [ 0.47246993  0.53333336 -0.993766  ]
Reward: -4.691685647076683
Action: [5 0]
Obs: [ 0.47318968  0.53333336 -0.99337703]
Reward: -4.693754613319678
Action: [2 3]
Obs: [ 0.47391143  0.53333336 -0.99323136]
Reward: -4.695817933549538
Action: [1 3]
Obs: [ 0.47455335  0.53333336 -0.99294555]
Reward: -4.697640973437631
Action: [6 5]
Obs: [ 0.47542036  0.53333336 -0.992791  ]
Reward: -4.70008844

2024-10-23 08:46:33,117 - INFO - lateral_distance: 0.009083859818539831, speed: 0.044444444444444446, heading: 0.9997745937771267


Action: [3 3]
Obs: [0.00904984 0.02222222 0.9999955 ]
Reward: -9.999382999403105
Action: [2 0]
Obs: [0.00906201 0.         0.999957  ]
Reward: -9.999382761927523
Action: [5 4]
Obs: [0.00906644 0.02222222 0.9999325 ]
Reward: -9.999382610711544
Action: [0 4]
Obs: [0.00904283 0.02222222 0.99995404]
Reward: -9.99938274361614
Action: [2 2]
Obs: [0.00901155 0.02222222 0.99992645]
Reward: -9.999382573032596
Action: [7 5]
Obs: [0.00901835 0.02222222 0.9998347 ]
Reward: -9.999382006524755
Action: [5 1]
Obs: [0.00899965 0.02222222 0.9998649 ]
Reward: -9.9993821929747
Action: [7 3]
Obs: [0.00902148 0.02222222 0.9998858 ]
Reward: -9.99938232230439
Action: [4 5]
Obs: [0.0090408  0.02222222 0.9998743 ]
Reward: -9.999382251097868
Action: [3 3]
Obs: [0.00901941 0.02222222 0.99993616]
Reward: -9.999382633213154
Action: [4 4]
Obs: [0.00901565 0.02222222 0.99995315]
Reward: -9.999382737861023
Action: [1 5]
Obs: [0.00901439 0.04444445 0.999938  ]
Reward: -9.999382644725287
Action: [2 2]
Obs: [0.00899023 0

2024-10-23 08:46:33,118 - INFO - vehicle_transform: Transform(Location(x=55.361015, y=105.366997, z=0.221757), Rotation(pitch=0.041063, yaw=-179.794754, roll=-0.027191)), self.dest_waypoint: Waypoint(Transform(Location(x=52.501400, y=105.392303, z=0.000000), Rotation(pitch=0.000000, yaw=-179.961029, roll=0.000000))), velocity: Vector3D(x=-0.730028, y=-0.018008, z=-0.002652)


Action: [5 0]
Obs: [0.00908386 0.04444445 0.9997746 ]
Reward: -9.999381635028453
Action: [1 1]
Obs: [0.00912281 0.04444445 0.99976647]
Reward: -9.999381584709962
Action: [3 3]
Obs: [0.00911845 0.04444445 0.99973613]
Reward: -9.999381397027808
Action: [6 0]
Obs: [0.00911703 0.04444445 0.99971664]
Reward: -9.999381276419841
Action: [7 1]
Obs: [0.00913585 0.04444445 0.9997207 ]
Reward: -9.999381301592141
Action: [3 4]
Obs: [0.00913423 0.04444445 0.99971074]
Reward: -9.999381240232866
Action: [7 3]
Obs: [0.0091478  0.04444445 0.99971867]
Reward: -9.999381289006118
Action: [8 5]
Obs: [0.00918168 0.04444445 0.9997767 ]
Reward: -9.999381648131557
Action: [3 1]
Obs: [0.0092104  0.04444445 0.99979264]
Reward: -9.999381746657996
Action: [0 0]
Obs: [0.00922136 0.04444445 0.9997376 ]
Reward: -9.9993814064657
Action: [2 1]
Obs: [0.00918396 0.04444445 0.999671  ]
Reward: -9.999380994210245
Action: [1 0]
Obs: [0.00910085 0.04444445 0.9996239 ]
Reward: -9.999380702423458
Action: [2 0]
Obs: [0.00902321

2024-10-23 08:46:33,858 - INFO - lateral_distance: 0.009078046042978894, speed: 0.022222222222222223, heading: 0.9995452033148872
2024-10-23 08:46:33,859 - INFO - vehicle_transform: Transform(Location(x=55.155888, y=105.366875, z=0.221630), Rotation(pitch=0.008237, yaw=-179.836044, roll=0.000906)), self.dest_waypoint: Waypoint(Transform(Location(x=52.501400, y=105.392303, z=0.000000), Rotation(pitch=0.000000, yaw=-179.961029, roll=0.000000))), velocity: Vector3D(x=-0.336746, y=0.010512, z=-0.000002)


Action: [1 2]
Obs: [0.00901319 0.         0.99954927]
Reward: -9.999380240321528
Action: [5 3]
Obs: [0.00903579 0.         0.999509  ]
Reward: -9.999379990748256
Action: [3 2]
Obs: [0.00902525 0.         0.99953985]
Reward: -9.99938018200919
Action: [8 4]
Obs: [0.00902238 0.         0.9995498 ]
Reward: -9.999380243473393
Action: [7 3]
Obs: [0.00905501 0.         0.9995129 ]
Reward: -9.99938001492185
Action: [1 3]
Obs: [0.0090545  0.         0.99955004]
Reward: -9.999380245049316
Action: [4 5]
Obs: [0.00905658 0.         0.99956375]
Reward: -9.999380330143316
Action: [7 2]
Obs: [0.00908651 0.         0.9995282 ]
Reward: -9.999380110030543
Action: [7 0]
Obs: [0.00909372 0.         0.9995463 ]
Reward: -9.999380221935349
Action: [7 1]
Obs: [0.00910857 0.         0.99956226]
Reward: -9.999380320689003
Action: [0 3]
Obs: [0.00911578 0.         0.99958473]
Reward: -9.999380459862918
Action: [1 2]
Obs: [0.00912279 0.         0.9995866 ]
Reward: -9.999380471415574
Action: [2 1]
Obs: [0.00911187

2024-10-23 08:46:34,565 - INFO - lateral_distance: 0.009741542567412676, speed: 0.022222222222222223, heading: 0.9990277608235677
2024-10-23 08:46:34,566 - INFO - vehicle_transform: Transform(Location(x=54.818207, y=105.364655, z=0.221629), Rotation(pitch=0.008428, yaw=-179.929184, roll=0.001070)), self.dest_waypoint: Waypoint(Transform(Location(x=52.501400, y=105.392303, z=0.000000), Rotation(pitch=0.000000, yaw=-179.961029, roll=0.000000))), velocity: Vector3D(x=-0.538304, y=-0.000267, z=0.000001)


Action: [6 3]
Obs: [0.0091125  0.02222222 0.9988168 ]
Reward: -9.999375684441855
Action: [4 5]
Obs: [0.00911929 0.02222222 0.998811  ]
Reward: -9.999375648457072
Action: [6 2]
Obs: [0.00910067 0.02222222 0.99889433]
Reward: -9.999376168447256
Action: [8 2]
Obs: [0.00913281 0.02222222 0.99889916]
Reward: -9.999376198585995
Action: [2 4]
Obs: [0.00917256 0.02222222 0.9988494 ]
Reward: -9.99937588814014
Action: [6 2]
Obs: [0.0091842  0.02222222 0.9988676 ]
Reward: -9.999376001335905
Action: [8 3]
Obs: [0.00919839 0.02222222 0.99891174]
Reward: -9.99937627683417
Action: [6 5]
Obs: [0.00926336 0.02222222 0.9989123 ]
Reward: -9.999376280534854
Action: [0 3]
Obs: [0.00928766 0.02222222 0.99898124]
Reward: -9.999376710193562
Action: [3 5]
Obs: [0.00928888 0.04444445 0.999008  ]
Reward: -9.9993768771151
Action: [8 5]
Obs: [0.00930524 0.04444445 0.99902904]
Reward: -9.999377008085508
Action: [6 0]
Obs: [0.00931186 0.02222222 0.9990359 ]
Reward: -9.99937705085617
Action: [8 1]
Obs: [0.00935395 0.

2024-10-23 08:46:35,241 - INFO - lateral_distance: 0.012308977794676596, speed: 0.15555555555555556, heading: 0.9998773362901475


Action: [6 0]
Obs: [0.01002417 0.13333334 0.99851996]
Reward: 0.0002393931654340875
Action: [1 2]
Obs: [0.01009911 0.13333334 0.9985917 ]
Reward: 0.0002389443159156457
Action: [5 2]
Obs: [0.01020694 0.13333334 0.99868274]
Reward: 0.00023837496326439833
Action: [2 1]
Obs: [0.01026908 0.13333334 0.9987352 ]
Reward: 0.00023804705214924837
Action: [8 0]
Obs: [0.0103692  0.13333334 0.9988149 ]
Reward: 0.00023754942227505182
Action: [6 0]
Obs: [0.0105152  0.13333334 0.9989265 ]
Reward: 0.00023685287753671957
Action: [1 1]
Obs: [0.01063815 0.13333334 0.9990181 ]
Reward: 0.0002362822590153968
Action: [7 3]
Obs: [0.01077392 0.13333334 0.999116  ]
Reward: 0.00023567259188084222
Action: [8 0]
Obs: [0.01095277 0.13333334 0.99924815]
Reward: 0.00023485061904526816
Action: [0 1]
Obs: [0.01109866 0.13333334 0.99934834]
Reward: 0.00023422814050899632
Action: [7 5]
Obs: [0.0112622  0.13333334 0.99945796]
Reward: 0.00023354791963914323
Action: [2 2]
Obs: [0.01137499 0.13333334 0.99952316]
Reward: 0.0002

2024-10-23 08:46:35,242 - INFO - vehicle_transform: Transform(Location(x=53.511982, y=105.356064, z=0.221589), Rotation(pitch=0.015190, yaw=-179.776260, roll=0.003246)), self.dest_waypoint: Waypoint(Transform(Location(x=52.501400, y=105.392303, z=0.000000), Rotation(pitch=0.000000, yaw=-179.961029, roll=0.000000))), velocity: Vector3D(x=-2.051816, y=-0.007015, z=0.000001)


Action: [6 2]
Obs: [0.01230898 0.15555556 0.99987733]
Reward: 0.0003134214105724453
Action: [6 5]
Obs: [0.01238327 0.15555556 0.99992514]
Reward: 0.00031312617549961175
Action: [0 3]
Obs: [0.0124396  0.15555556 0.9999542 ]
Reward: 0.0003129466955051896
Action: [8 3]
Obs: [0.01251382 0.15555556 0.99999624]
Reward: 0.0003126872481793441
Action: [7 4]
Obs: [ 0.0126362   0.15555556 -0.99992436]
Reward: 0.0003131308855897075
Action: [5 4]
Obs: [ 0.01278154  0.15555556 -0.9998269 ]
Reward: 0.0003137330261120397
Action: [3 2]
Obs: [ 0.01289365  0.15555556 -0.99976784]
Reward: 0.00031409826136474805
Action: [6 4]
Obs: [ 0.01300835  0.15555556 -0.9997066 ]
Reward: 0.0003144768244398577
Action: [8 5]
Obs: [ 0.01316358  0.15555556 -0.99961346]
Reward: 0.000315053503096685
Action: [2 5]
Obs: [ 0.01327815  0.15555556 -0.9995581 ]
Reward: 0.00031539640648947653
Action: [1 5]
Obs: [ 0.01330609  0.17777778 -0.99958235]
Reward: 0.0003878387913909265
Action: [6 4]
Obs: [ 0.01333912  0.17777778 -0.999596

2024-10-23 08:46:35,438 - INFO - Passed waypoint. Reward: 0.0005011304215472023


Action: [2 0]
Obs: [ 0.01210985  0.22222222 -0.9996107 ]
Reward: 0.0005011304215472023
Action: [7 5]
Obs: [ 0.01209104  0.22222222 -0.9996601 ]
Reward: 0.000502912982411452
Action: [6 3]
Obs: [ 0.01215619  0.22222222 -0.99963313]
Reward: 0.0005030798690732752
Action: [4 1]
Obs: [ 0.01221633  0.22222222 -0.99961513]
Reward: 0.0005031911518393883
Action: [3 5]
Obs: [ 0.01223582  0.22222222 -0.99963397]
Reward: 0.0005030746203811276
Action: [2 0]
Obs: [ 0.01218926  0.22222222 -0.9997097 ]
Reward: 0.0005026055669095086
Action: [6 5]
Obs: [ 0.01217071  0.22222222 -0.999759  ]
Reward: 0.0005023009250946231
Action: [7 3]
Obs: [ 0.01220818  0.22222222 -0.99975926]
Reward: 0.0005022993524619324
Action: [6 1]
Obs: [ 0.01231182  0.22222222 -0.9996981 ]
Reward: 0.0005026774234124787
Action: [4 5]
Obs: [ 0.01240027  0.22222222 -0.99965787]
Reward: 0.0005029266255442977
Action: [6 3]
Obs: [ 0.0125142   0.22222222 -0.9996    ]
Reward: 0.0005032851278556905
Action: [1 2]
Obs: [ 0.01257479  0.24444444 

2024-10-23 08:46:35,981 - INFO - lateral_distance: 0.019833834817413232, speed: 0.3111111111111111, heading: -0.9978635999891493
2024-10-23 08:46:35,983 - INFO - vehicle_transform: Transform(Location(x=50.354195, y=105.330643, z=0.221592), Rotation(pitch=0.014794, yaw=-179.448975, roll=0.005888)), self.dest_waypoint: Waypoint(Transform(Location(x=49.498619, y=105.390747, z=0.000000), Rotation(pitch=0.000000, yaw=-180.040375, roll=0.000000))), velocity: Vector3D(x=-3.915017, y=-0.031510, z=0.000000)


Action: [7 0]
Obs: [ 0.01785169  0.2888889  -0.99777925]
Reward: 0.0006139245746670241
Action: [2 3]
Obs: [ 0.01808685  0.2888889  -0.99770755]
Reward: 0.000614377084774631
Action: [8 5]
Obs: [ 0.01833984  0.2888889  -0.9976213 ]
Reward: 0.0006149214893040877
Action: [3 0]
Obs: [ 0.01852418  0.2888889  -0.9975982 ]
Reward: 0.0006150677072938038
Action: [7 5]
Obs: [ 0.0187416   0.2888889  -0.99754494]
Reward: 0.0006154041906789587
Action: [3 4]
Obs: [ 0.01889545  0.2888889  -0.9975446 ]
Reward: 0.0006154063344592231
Action: [8 5]
Obs: [ 0.01909765  0.2888889  -0.99750644]
Reward: 0.0006156475561462926
Action: [2 5]
Obs: [ 0.01924392  0.2888889  -0.99751705]
Reward: 0.0006155805408933901
Action: [0 1]
Obs: [ 0.01930629  0.2888889  -0.9975982 ]
Reward: 0.0006150677072938038
Action: [6 4]
Obs: [ 0.01939158  0.2888889  -0.99765277]
Reward: 0.0006147228370219082
Action: [8 5]
Obs: [ 0.01953285  0.2888889  -0.9976521 ]
Reward: 0.0006147271199692383
Action: [0 5]
Obs: [ 0.01963346  0.2888889  

2024-10-23 08:46:36,101 - INFO - Passed waypoint. Reward: 0.0006265359549306737


Action: [7 5]
Obs: [ 0.02138003  0.31111112 -0.9977927 ]
Reward: 0.0006271728979871938
Action: [0 0]
Obs: [ 0.02154231  0.31111112 -0.9977813 ]
Reward: 0.0006272450755666625
Action: [0 5]
Obs: [ 0.0216156   0.31111112 -0.9978481 ]
Reward: 0.0006268238889166611
Action: [7 2]
Obs: [ 0.02071746  0.31111112 -0.9969689 ]
Reward: 0.0006265359549306737
Action: [0 3]
Obs: [ 0.0208183   0.31111112 -0.9970392 ]
Reward: 0.0006319430639374346
Action: [2 2]
Obs: [ 0.02085315  0.31111112 -0.9971653 ]
Reward: 0.0006311420417777214
Action: [7 0]
Obs: [ 0.02092116  0.33333334 -0.99725145]
Reward: 5.00063416358857
Action: [6 0]
Obs: [ 0.02106044  0.33333334 -0.9972715 ]
Reward: 5.000634036746784
Action: [5 0]
Obs: [ 0.0212862   0.33333334 -0.9972108 ]
Reward: 5.000634421650153
Action: [1 3]
Obs: [ 0.02150181  0.33333334 -0.99716944]
Reward: 5.000634684120402
Action: [3 4]
Obs: [ 0.02166407  0.33333334 -0.9971793 ]
Reward: 5.000634621182191
Action: [4 2]
Obs: [ 0.02182894  0.33333334 -0.997186  ]
Reward:

2024-10-23 08:46:36,533 - INFO - Passed waypoint. Reward: 5.0006693372545215
2024-10-23 08:46:36,661 - INFO - Reward: 105.00066247789866, Episode time limit reached


Action: [7 1]
Obs: [ 0.04502368  0.37777779 -0.9923718 ]
Reward: 5.000665871936496
Action: [2 5]
Obs: [ 0.04564089  0.37777779 -0.9922291 ]
Reward: 5.000666822485058
Action: [4 0]
Obs: [ 0.04622265  0.37777779 -0.99212813]
Reward: 5.000667495974567
Action: [5 1]
Obs: [ 0.04682486  0.37777779 -0.992015  ]
Reward: 5.000668251702022
Action: [6 3]
Obs: [ 0.04748918  0.37777779 -0.99185264]
Reward: 5.0006693372545215
Action: [8 4]
Obs: [ 0.04824669  0.37777779 -0.99161726]
Reward: 5.00067091458047
Action: [4 0]
Obs: [ 0.04898649  0.37777779 -0.9914101 ]
Reward: 5.000672305838106
Action: [0 4]
Obs: [ 0.04962973  0.37777779 -0.9913003 ]
Reward: 5.000673044191293
Action: [1 4]
Obs: [ 0.0501586   0.37777779 -0.9912989 ]
Reward: 5.000673053318872
Action: [0 1]
Obs: [ 0.05055274  0.37777779 -0.99141216]
Reward: 5.000672291592154
Action: [0 3]
Obs: [ 0.05080194  0.37777779 -0.99164444]
Reward: 5.000670732063826
Action: [1 4]
Obs: [ 0.05089347  0.37777779 -0.9919988 ]
Reward: 5.00066835989476
Actio

## Train

In [None]:


TIMESTEPS = 250000 # how long is each training iteration - individual steps
iters = 0
while iters<4:  # how many training iterations you want 
	iters += 1
	print('Iteration ', iters,' is to commence...')
	model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name=f"PPO" )
	print('Iteration ', iters,' has been trained')
	model.save(f"{models_dir}/{TIMESTEPS*iters}")
