In [1]:
from Environment import Car, CarEnv
from ModelsTorch import Actor, Critic
from AgentTorch import PPOAgent
import pygame
from utils import compute_borders, scale_image
import numpy as np
import random
import torch

import os
import re
import wandb

pygame 2.1.3 (SDL 2.0.22, Python 3.11.5)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [31]:
def get_run(sweep_id='62savqn0', name=None):
    # Initialize the W&B API
    api = wandb.Api()
    # Retrieve the sweep object
    sweep_id = "antoniosg00/TFM_project/" + sweep_id
    sweep = api.sweep(sweep_id)
    if name is None:
        run = sweep.best_run()
    else:
        run = [r for r in sweep.runs if r.name==name][-1]

    print()
    print(run.name, '\n')
    print(f"Run ID: {run.id}")
    print(f"Best Validation Reward: {run.history()['Reward/Mean_val_reward'].max()}")
    print("Run Config Hyperparameters:")
    for key, value in run.config.items():
        print(f"{key}: {value}")

    return run

best_run = get_run()
best_run.name

[34m[1mwandb[0m: Sorting runs by -summary_metrics.Reward/Mean_val_reward



glowing-sweep-2 

Run ID: k9ou4bcn
Best Validation Reward: 548.787109375
Run Config Hyperparameters:
T: 1024
bn: True
gamma: 0.95
lrelu: 0.1
epochs: 10
adv_std: False
entropy: 0.018354509685286766
updates: 200
actor_lr: 0.0006178173389342254
momentum: 0.99
critic_lr: 0.003042924238226867
l1_factor: 2.985720538166668e-06
l2_factor: 3.2565996268418414e-05
target_kl: 0.03
GAE_lambda: 0.95
activation: tanh
input_size: 10
output_size: 6
decay_method: exponential
dropout_prob: 0
hidden_sizes: [350, 350, 150]
val_episodes: 10
initialization: uniform
minibatch_size: 64
updates_per_val: 1
clipping_epsilon: 0.2
value_loss_factor: 1
exponential_factor: 0.9132888346855556
early_stopping_delta: 0
early_stopping_patience: 30


'glowing-sweep-2'

In [4]:
# Training cell

seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms(True)

experiment = int(input('Número del experimento'))
wandb.tensorboard.patch(root_logdir='runs\\ppo_experiment_tfm\\training\\'+str(experiment), tensorboard_x=False, save=False)

actor = Actor(**best_run.config)
critic = Critic(**best_run.config)

circuit_path = 'images\\circuits\\level3.png'
circuit_edges, finish_edges, finish_position = compute_borders(circuit_path, "images\\finish_template.png")
track_img = pygame.image.load(circuit_path)
finish_img = pygame.image.load("images\\finish_image.png")
car_img = scale_image(pygame.image.load("images\\red-car.png"), 0.35)
images = [(track_img, (0, 0)), (finish_img, finish_position)]

car = Car(car_img, acceleration=0.2, num_radars=9)
env = CarEnv(car, circuit_edges, finish_edges, num_actions=6)

wandb.init(
    project='TFM_project', 
    entity='antoniosg00', 
    name='Job_'+str(experiment),
    config=best_run.config,
)

wandb.watch(actor, log_freq=10)
wandb.watch(critic, log_freq=10)

saves = 'saves_tfm\\' + str(experiment)
agent = PPOAgent(actor, critic, log_dir='runs\\ppo_experiment_tfm\\training\\'+str(experiment), **best_run.config)

update_rewards, val_rewards = agent.train(env, env, images, save_path=saves, updates_per_flush=20, val_fps=None, val_plot=False, val_verbose=False)
agent.close_writer()
wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mantoniosg00[0m. Use [1m`wandb login --relogin`[0m to force relogin


cpu
Update [1/200]
Actor Loss: -0.034408312290906906
Critic Loss: 23.194528579711914

New best validation reward reached in update [1/200]

Update [2/200]
Actor Loss: -0.037445612251758575
Critic Loss: 15.564847946166992

New best validation reward reached in update [2/200]

Update [3/200]
Actor Loss: 0.0075699929147958755
Critic Loss: 8.681387901306152

New best validation reward reached in update [3/200]

Update [4/200]
Actor Loss: -0.025292839854955673
Critic Loss: 10.776342391967773

New best validation reward reached in update [4/200]

Update [5/200]
Actor Loss: 0.01025007851421833
Critic Loss: 8.873035430908203

Update [6/200]
Actor Loss: 0.0011148592457175255
Critic Loss: 10.080915451049805

New best validation reward reached in update [6/200]

Update [7/200]
Actor Loss: -0.0012286603450775146
Critic Loss: 6.988119602203369

New best validation reward reached in update [7/200]

Update [8/200]
Actor Loss: -0.015909193083643913
Critic Loss: 5.781500339508057

Update [9/200]
Actor 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Duration/Mean_train_ep_duration,▁▂▂▂▂▂▂▂▂▂▃▃▄▄▂▂▃▂▃█▄▄▅▄▄▃▄▃▇▅▄▇▃██▃██▅▅
Duration/Mean_val_ep_duration,▁▂▃▂▂▂▂▂▃▃▃▄▅▄▃▄▆▆▆▆▇▄▆▇█▇▆█▇▇▆▇▇█▇█▆▆▇▅
Learning_rate/Actor,█▇▆▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Learning_rate/Critic,█▇▆▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Loss/Actor_loss,▁▄▃▆▆▃▅▅▅▅▅▆▅▆▆▆▇▇▇▅▆▆▆▆▇▆▇▇▇▇▇▇▆▇▇▇▇█▆█
Loss/Critic_loss,█▅▄▂▂▂▂▂▃▁▁▂▃▁▅▃▁▂▂▁▃▂▃▂▄▂▃▂▂▄▃▃▁▁▂▂▂▃▃▁
Loss/Entropy_bonus,█▇▅▄▁▂▂▂▂▂▂▂▃▂▂▂▂▂▂▂▂▁▁▁▂▂▁▁▂▂▁▁▁▂▂▁▁▁▂▂
Loss/KL_divergence,▅▅▆█▆▁▄▄▄▄▃▄▄▃▃▃▂▃▄▄▃▄▅▃▃▃▃▃▄▃▄▄▃▃▃▄▅▃▃▄
Loss/Policy_loss,▁▅▂▆▅▁▄▄▄▄▄▅▄▅▅▆▆▇▇▄▆▆▅▆▆▅▆▆▆▇▆▇▆▇▇▆▆█▆█
Loss/Regularized_Actor_loss,▁▄▂▇▆▃▅▅▅▅▅▅▄▆▆▆▆▇▆▄▆▆▆▆▆▅▆▆▆▇▆▇▆▇▇▆▆█▆█

0,1
Duration/Mean_train_ep_duration,240.5
Duration/Mean_val_ep_duration,191.10001
Learning_rate/Actor,0.0
Learning_rate/Critic,0.0
Loss/Actor_loss,0.00794
Loss/Critic_loss,3.49813
Loss/Entropy_bonus,0.48926
Loss/KL_divergence,-0.00122
Loss/Policy_loss,0.02102
Loss/Regularized_Actor_loss,0.04165


In [5]:
def obtener_numero_archivo(nombre_archivo):
    return int(re.search(r'checkpoint_(\d+)', nombre_archivo).group(1))

def obtener_archivo_mayor_numero(directorio):
    archivos = os.listdir(directorio)
    archivos_checkpoint = [archivo for archivo in archivos if re.match(r'checkpoint_\d+', archivo)]
    archivo_mayor = max(archivos_checkpoint, key=obtener_numero_archivo)
    return archivo_mayor

# Locating checkpoint
directorio = 'saves_tfm\\' + str(experiment)
archivo_mayor = obtener_archivo_mayor_numero(directorio)
print(archivo_mayor)

checkpoint_171_518.59.pth


In [None]:
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms(True)

# Loading models
actor = Actor(**best_run.config)
critic = Critic(**best_run.config)

# Loading Agent and saved checkpoint
agent = PPOAgent(actor, critic, log_dir='runs\\ppo_experiment_tfm\\training\\'+str(experiment), **best_run.config)
agent.load_checkpoint(directorio + '\\' + archivo_mayor)

In [14]:
# Agent validation cell
# 5 levels of circuits can be chosen (training with level 3)

val_circuit_path = 'images\\circuits\\level3.png'  # Modify level here
val_circuit_edges, val_finish_edges, val_finish_position = compute_borders(val_circuit_path, "images\\finish_template.png")
val_track_img = pygame.image.load(val_circuit_path)
val_finish_img = pygame.image.load("images\\finish_image.png")
val_car_img = scale_image(pygame.image.load("images\\red-car.png"), 0.35)
val_images = [(val_track_img, (0, 0)), (val_finish_img, val_finish_position)]

val_car = Car(car_img, acceleration=0.2, num_radars=9)
val_env = CarEnv(val_car, val_circuit_edges, val_finish_edges, num_actions=6)

seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms(True)

rew = agent.validation(val_env, val_images, fps=15, episodes=1, plot=True)
pygame.quit()