In [None]:
import numpy as np
import pandas as pd
import sympy as sym
import pybullet as p
import gym
import os
import random
import time
import pickle as plk
from robot_env_base_movil import robot_env
import matplotlib.pyplot as plt
import tensorflow as tf

from collections import deque
import pickle as plk

from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Activation, Flatten, Input, Concatenate, Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import Callback, History
from MemorySaverCallback import MemorySaverCallback
from HistorySaverCallback import HistorySaverCallback

from rl.agents import DDPGAgent_mod
from rl.memory_mod import SequentialMemory
from rl.random import OrnsteinUhlenbeckProcess
from rl.callbacks import FileLogger, ModelIntervalCheckpoint

Algoritmo DDPG

Entorno

In [2]:
memory = SequentialMemory(limit=100000, initial_dict = None, window_length=1)

In [None]:
#Registro del entorno
gym.envs.registration.register(
    id='RobotEnv-v0',  # Identificador único del entorno
    entry_point='robot_env_base_movil:robot_env',  # Módulo y clase del entorno
    max_episode_steps=250,  # Número máximo de pasos por episodio
)

# Crear el entorno
env = gym.make('RobotEnv-v0')


Agente

In [None]:
#Número de acciones
nb_actions=6

In [None]:
#Dimensiones del vector de estado: (3,)
actor_model=Sequential()

actor_model.add(Input(shape=(3,)))  # Vector de estado de dimensión (3,)
actor_model.add(Dense(512, activation='relu'))
actor_model.add(Dense(256, activation='relu'))
actor_model.add(Dense(64, activation='relu'))
actor_model.add(Dense(nb_actions, activation='tanh')) #Capa de salida
print(actor_model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 512)               2048      
                                                                 
 dense_1 (Dense)             (None, 256)               131328    
                                                                 
 dense_2 (Dense)             (None, 64)                16448     
                                                                 
 dense_3 (Dense)             (None, 6)                 390       
                                                                 
Total params: 150,214
Trainable params: 150,214
Non-trainable params: 0
_________________________________________________________________
None


Critic

Implementación de la solución DDPG:

In [8]:
#Recibe como entrada el vector de estado y el vector de acciones

action_input=Input(shape=(nb_actions,), name='action_input')
obs_input=Input(shape=(3,), name='obs_input')

x=Concatenate()([action_input, obs_input])
print(x.shape)

x=Dense(9, input_shape=(9,), activation='relu')(x) #Capa de entrada
x=Dense(512, activation='relu')(x)
x=Dense(256, activation='relu')(x)
x=Dense(64, activation='relu')(x)
output=Dense(1, activation='linear')(x) #Capa de salida

critic_model = Model(inputs=[action_input, obs_input], outputs=output)

print(critic_model.summary())

(None, 9)
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 action_input (InputLayer)      [(None, 6)]          0           []                               
                                                                                                  
 obs_input (InputLayer)         [(None, 3)]          0           []                               
                                                                                                  
 concatenate (Concatenate)      (None, 9)            0           ['action_input[0][0]',           
                                                                  'obs_input[0][0]']              
                                                                                                  
 dense_4 (Dense)                (None, 9)            90          ['concatenate[0][0]

In [None]:
#Proceso aleatorio
random_process=OrnsteinUhlenbeckProcess(size=nb_actions, theta=0.3, mu=0., sigma=env.sigma) #Antes: theta=0.15, sigma=0.2

In [None]:
#Instancia agente
ddpg=DDPGAgent_her(nb_actions=nb_actions, actor=actor_model, critic=critic_model, critic_action_input=action_input,
               memory=memory, gamma=0.99, batch_size=128, nb_steps_warmup_critic=130,
               nb_steps_warmup_actor=130, train_interval=1, memory_interval=1,
               delta_range=None, random_process=random_process,
               target_model_update=0.001)

ddpg.compile(Adam(learning_rate=.001), metrics=['mae'])

In [None]:
#Callbacks

weights_filename = 'ddpg_{}_weights.h5f'.format('RobotEnv-v0')
checkpoint_weights_filename = '.\checkpoint_weights_1\ddpg_RobotEnv-v0_weights_{step}.h5f'
log_filename = 'ddpg_{}_log_primera_parte.json'.format('RobotEnv-v0')
history=History()
memory_saver = MemorySaverCallback(memory=memory, save_dir =".\MemorySavings", save_interval=5000, initial_steps = 0)
history_saver = HistorySaverCallback(history=history, save_dir=".\historial_1", save_interval=10000, initial_steps = 0)
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=5000)]
callbacks += [FileLogger(log_filename, interval=100)]
callbacks += [history]
callbacks += [memory_saver]
callbacks += [history_saver]


ddpg.fit(env=env, callbacks=callbacks, nb_steps=1000000, visualize=False, verbose=1, nb_max_episode_steps=250)

## Testing

In [None]:
weights_filename = "./checkpoint_weights_1/ddpg_RobotEnv-v0_weights_70000.h5f"
ddpg.load_weights(weights_filename)

In [None]:
# Configurar el agente para el modo de prueba
ddpg.training = False  # Desactivar el entrenamiento
ddpg.test(env, nb_episodes=5, visualize=False)

In [None]:
env.close()