In [1]:
from Environment import Car, CarEnv
from ModelsTorch import Actor, Critic
from AgentTorch import PPOAgent
import pygame
from utils import compute_borders, scale_image
import wandb
import pprint
import itertools
import random
import torch
import numpy as np  

pygame 2.1.3 (SDL 2.0.22, Python 3.11.5)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms(True)

circuit_path = 'images\\circuits\\level3.png'
circuit_edges, finish_edges, finish_position = compute_borders(circuit_path, "images\\finish_template.png")
track_img = pygame.image.load(circuit_path)
finish_img = pygame.image.load("images\\finish_image.png")
car_img = scale_image(pygame.image.load("images\\red-car.png"), 0.35)
images = [(track_img, (0, 0)), (finish_img, finish_position)]

car = Car(car_img, acceleration=0.2, num_radars=9)
env = CarEnv(car, circuit_edges, finish_edges, num_actions=6)

experiment = int(input('Número del experimento'))
saves = 'saves_tfm\\' + str(experiment)
wandb.tensorboard.patch(root_logdir='runs\\ppo_experiment_tfm\\'+str(experiment), tensorboard_x=False, save=False)

def hp_opt(config=None):
    # Seeds set within the function so that all trials are reproducible (resetting the state of the pseudo-random number generators).
    seed = 42
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.use_deterministic_algorithms(True)
    # Initialize a new wandb run
    with wandb.init(config=config):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config

        actor = Actor(**config)
        critic = Critic(**config)

        wandb.watch(actor, log_freq=10)
        wandb.watch(critic, log_freq=10)

        agent = PPOAgent(actor, critic, log_dir='runs\\ppo_experiment_tfm\\'+str(experiment), **config)
        _, _ = agent.train(env, env, images, save_path=None, updates_per_flush=20, val_fps=None, val_plot=False, val_verbose=False)

In [3]:
def get_sweep_config(sweep_decay=None):
    combinations = []
    number_hidden_layers = [2, 3, 4]  # Possible number of hidden layers
    possible_hidden_sizes = [150, 250, 350]  # Possible neurons of hidden layers
    for size in number_hidden_layers:
        combinations.extend(itertools.product(possible_hidden_sizes, repeat=size))
    combinations = [list(comb) for comb in combinations]  # Every possible combination
    
    sweep_config = {
        "name": "Sweep_TFM",
        "method": "random",
        "metric": {
            "goal": "maximize",
            "name": "Reward/Mean_val_reward"
        },
        "parameters": {
            # Models
            "input_size": {
                "value": 10  # Must be even (odd number of radars + current velocity)
            },
            "hidden_sizes": {
                "values": combinations
            },
            "output_size": {
                "value": 6  # Number of actions
            },
            "dropout_prob": {
                "values": [0, 0.1, 0.2, 0.3]
            },
            "activation": {
                "values": ["tanh", "lrelu"]  # Activation layers
            },
            "lrelu": {
                "values": [0.001, 0.01, 0.1]  # If activation=LeakyReLU, alpha parameter
            },
            "momentum": {
                "values": [0.8, 0.9, 0.95, 0.99]  # Momentum of Batch Normalization layers (if there are)
            },
            "bn": {
                "values": [True, False]  # If there are Batch Normalization layers
            },
            "initialization": {
                "values": ["orthogonal", "normal", "uniform"]  # Initialization method (layer weights)
            },
                   
            # Agent + training
            'actor_lr': {
                'distribution': 'log_uniform_values',   
                'min': 1e-4,
                'max': 1e-2
            },
            'critic_lr': {   
                'distribution': 'log_uniform_values',  
                'min': 1e-4,
                'max': 1e-2
            },
            "value_loss_factor": {
                'value': 1
            },
            "entropy": {
                "distribution": "uniform",
                "min": 1e-4,
                "max": 5e-2
            },
            "gamma":  {
                "values": [0.9, 0.95, 0.99]
            },
            "GAE_lambda": {
                "value": 0.95
            },
            "clipping_epsilon": {
                "value": 0.2
            },
            "l1_factor": {
                "distribution": "log_uniform_values",
                "min": 1e-6,
                "max": 1e-3
            },
            "l2_factor": {
                "distribution": "log_uniform_values",
                "min": 1e-6,
                "max": 1e-3
            },
            "T": {
                "values": [256, 512, 768, 1024]
            },
            'minibatch_size': {
                "values": [32, 64, 128, 256]
            },
            "epochs": {
                "value": 10 #[5, 10, 15, 20]
            },
            "updates": {
                "value": 200
            },
            "val_episodes": {
                "value": 10
            },
            "updates_per_val": {
                "value": 1
            },
            "target_kl": {
                "values": [0.01, 0.02, 0.03]
            },
            "adv_std": {
                "values": [True, False]
            },
            "early_stopping_patience": {
                "value": 30  # If it is too low, it may interfere with plateau reduction.
            },
            "early_stopping_delta": {
                "value": 0.00
            },
        },
    }

    if sweep_decay == 'plateau':
        sweep_config["parameters"].update({
            "decay_method": {
                "value": "plateau"
            },
            "plateau_factor": {
                "distribution": "uniform",
                "min": 0.01,
                "max": 0.9
            },
            "plateau_patience": {
                "values": [5, 10]
            }
        })

    elif sweep_decay == 'exponential':
        sweep_config["parameters"].update({
            "decay_method": {
                "value": "exponential"
            },
            "exponential_factor": {
                "distribution": "log_uniform_values",
                "min": 0.85,
                "max": 0.999
            }
        })

    elif sweep_decay == 'linear':
        sweep_config["parameters"].update({
            "decay_method": {
                "value": "linear"
            },
            "linear_end_factor": {
                "distribution": "log_uniform_values",
                "min": 0.85,
                "max": 0.999
            }
        })

    else:
        sweep_config["parameters"].update({
            "decay_method": {
                "value": None
            },
        })

    return sweep_config

sweep_config = get_sweep_config(sweep_decay='exponential')
sweep_id = wandb.sweep(sweep=sweep_config, project="TFM_project")
pprint.pprint(sweep_config)
sweep_id

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: 62savqn0
Sweep URL: https://wandb.ai/antoniosg00/TFM_project/sweeps/62savqn0
{'method': 'random',
 'metric': {'goal': 'maximize', 'name': 'Reward/Mean_val_reward'},
 'name': 'Sweep_TFM',
 'parameters': {'GAE_lambda': {'value': 0.95},
                'T': {'values': [256, 512, 768, 1024]},
                'activation': {'values': ['tanh', 'lrelu']},
                'actor_lr': {'distribution': 'log_uniform_values',
                             'max': 0.01,
                             'min': 0.0001},
                'adv_std': {'values': [True, False]},
                'bn': {'values': [True, False]},
                'clipping_epsilon': {'value': 0.2},
                'critic_lr': {'distribution': 'log_uniform_values',
                              'max': 0.01,
                              'min': 0.0001},
                'decay_method': {'value': 'exponential'},
                'dropout_prob': {'values': [0, 0.1, 0.2, 0.3]},
                'early_stopping_delta':

'62savqn0'

In [None]:
wandb.agent(sweep_id, hp_opt, project="TFM_project", count=100)
wandb.finish()