# Test environnement Tracking avec CACLA

Reproduction de l'environnement de test pour le papier https://dspace.library.uu.nl/bitstream/handle/1874/25514/wiering_07_reinforcementlearning.pdf

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import torch
import sys

sys.path.insert(0, '../')
from utils.Tracking import Tracking
from utils.Critic import CriticNetwork
from utils.Actor import ActorNetwork
from utils.CACLA import CACLA

## hyper paramètres

In [None]:
env = Tracking()
actor_network = ActorNetwork(
    nb_neurons = 12,
    action_space = env.action_space,
    observation_space = env.observation_space
)

critic_network = CriticNetwork(
    nb_neurons = 12,
    observation_space = env.observation_space
)

cacla = CACLA(
    learning_rate_critic = 0.01,
    learning_rate_actor = 0.01,
    discount_factor = 0.95,
    sigma = 0.1,
    nb_episode = 500,
    test_frequency = 10,
    env = env,
    actor_network = actor_network,
    critic_network = critic_network,
)

## Apprentissage

In [None]:
cacla.learning()

## Affichage des rewards

In [None]:
plt.figure(figsize=(10,7))
plt.plot(cacla.list_rewards, c= 'r',label = 'rewards')
plt.legend()
plt.xlabel('episode')
plt.ylabel('reward')
plt.title('Continuous Actor Critic (CACLA) - Tracking - Rewards')
plt.show()

## Démonstration de la simulation de l'agent dans l'environement

In [None]:
state = env.reset()
done = False

list_x_agent = list()
list_y_agent = list()
list_x_target = list()
list_y_target = list()

nb_iter = 0
r = 0.0
list_x_agent.append(env.agent[0])
list_y_agent.append(env.agent[1])
list_x_target.append(env.target[0])
list_y_target.append(env.target[1])
while not done :
    state_t = torch.as_tensor(state , dtype=torch.float32)
    action =  cacla.best_model(state_t).detach().numpy()
    new_state, reward, done = env.step(action)
    r += reward
    list_x_agent.append(env.agent[0])
    list_y_agent.append(env.agent[1])
    list_x_target.append(env.target[0])
    list_y_target.append(env.target[1])
    state = new_state
    nb_iter += 1
    
print(f"iteration : {nb_iter}, reward : ",(r/300))
plt.figure(figsize=(5,5))
plt.scatter(list_x_agent,list_y_agent , label="agent")
plt.scatter(list_x_target,list_y_target, label='target')
rect=mpatches.Rectangle((4,5),5,1, 
                            fill=False,
                            color="purple",
                           linewidth=2)
                           #facecolor="red")
plt.gca().add_patch(rect)
plt.xticks([0, 2, 4, 6, 8, 10])
plt.yticks([0, 2, 4, 6, 8, 10])
plt.xlabel("x")
plt.ylabel("y")
plt.title("Tracking simulation with the agent's policy")
plt.legend()
plt.show()
print((env.agent[0] - env.target[0])**2 + (env.agent[1] - env.target[1])**2)

## Etape par étape

In [None]:
state = env.reset()
done = False

list_x_agent = list()
list_y_agent = list()
list_x_target = list()
list_y_target = list()

iteration = 0
while not done :
    state_t = torch.as_tensor(state , dtype=torch.float32)
    action =  cacla.best_model(state_t).detach().numpy()
    new_state, reward, done = env.step(action)
    state = new_state
    iteration += 1
    
    plt.figure(figsize=(4,4))
    # plt.scatter(list_x_target[0],list_y_target[0], color="red",label="first position target")
    plt.scatter(env.agent[0] ,env.agent[1], label="agent")
    plt.scatter(env.target[0],env.target[1], label='target')
    rect=mpatches.Rectangle((4,5),5,1, 
                            fill=False,
                            color="purple",
                           linewidth=2)
                           #facecolor="red")
    plt.gca().add_patch(rect)
    plt.xticks([0, 2, 4, 6, 8, 10])
    plt.yticks([0, 2, 4, 6, 8, 10])
    plt.legend()
    plt.show()