In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import torch
import sys
import numpy as np

sys.path.insert(0, '../')
from utils.Tracking import Tracking
from utils.Critic import CriticNetwork
from utils.Actor import ActorNetwork
from utils.CAC import CAC
from utils.CACLA import CACLA
from utils.CACLAVAR import CACLAVAR

from tqdm import tqdm

In [None]:
success_cac = 0
fails_cac = 0
nb_tests = 50

matrice_simulation_rewards_cac = list()

for i in tqdm(range(nb_tests)) : 
    env = Tracking()
    actor_network = ActorNetwork(
        nb_neurons = 12,
        action_space = env.action_space,
        observation_space = env.observation_space
    )
    critic_network = CriticNetwork(
        nb_neurons = 12,
        observation_space = env.observation_space
    )
    cac = CAC(
        learning_rate_critic = 0.01,
        learning_rate_actor = 0.01,
        discount_factor = 0.9,
        sigma = 0.1,
        nb_episode = 500,
        test_frequency = 1,
        env = env,
        actor_network = actor_network,
        critic_network = critic_network,
        verbose_mode = False
    )
    cac.learning()
    matrice_simulation_rewards_cac.append(cac.list_rewards)
    
    state = env.reset()
    done = False
    nb_iter = 0
    while not done :
        state_t = torch.as_tensor(state , dtype=torch.float32)
        action =  cac.best_model(state_t).detach().numpy()
        new_state, reward, done = env.step(action)
        state = new_state
        nb_iter += 1
    
    if nb_iter == env.max_iteration :
        fails_cac += 1
    else :
        success_cac += 1    

In [None]:
success_cacla = 0
fails_cacla = 0
nb_tests = 50

matrice_simulation_rewards_cacla = list()

for i in tqdm(range(nb_tests)) : 
    env = Tracking()
    actor_network = ActorNetwork(
        nb_neurons = 12,
        action_space = env.action_space,
        observation_space = env.observation_space
    )
    critic_network = CriticNetwork(
        nb_neurons = 12,
        observation_space = env.observation_space
    )
    cacla = CACLA(
        learning_rate_critic = 0.01,
        learning_rate_actor = 0.01,
        discount_factor = 0.95,
        sigma = 0.1,
        nb_episode = 500,
        test_frequency = 1,
        env = env,
        actor_network = actor_network,
        critic_network = critic_network,
        verbose_mode = False
    )
    cacla.learning()
    matrice_simulation_rewards_cacla.append(cacla.list_rewards)
    
    state = env.reset()
    done = False
    nb_iter = 0
    while not done :
        state_t = torch.as_tensor(state , dtype=torch.float32)
        action =  cacla.best_model(state_t).detach().numpy()
        new_state, reward, done = env.step(action)
        state = new_state
        nb_iter += 1
    
    if nb_iter == env.max_iteration :
        fails_cacla += 1
    else :
        success_cacla += 1    

In [None]:
success_caclavar = 0
fails_caclavar = 0
nb_tests = 50

matrice_simulation_rewards_caclavar = list()

for i in tqdm(range(nb_tests)) : 
    env = Tracking()
    actor_network = ActorNetwork(
        nb_neurons = 12,
        action_space = env.action_space,
        observation_space = env.observation_space
    )
    critic_network = CriticNetwork(
        nb_neurons = 12,
        observation_space = env.observation_space
    )
    caclavar = CACLAVAR(
        learning_rate_critic = 0.01,
        learning_rate_actor = 0.01,
        discount_factor = 0.8,
        sigma = 0.1,
        nb_episode = 500,
        test_frequency = 1,
        env = env,
        actor_network = actor_network,
        critic_network = critic_network,
        verbose_mode = False
    )
    caclavar.learning()
    matrice_simulation_rewards_caclavar.append(caclavar.list_rewards)
    
    state = env.reset()
    done = False
    nb_iter = 0
    while not done :
        state_t = torch.as_tensor(state , dtype=torch.float32)
        action =  caclavar.best_model(state_t).detach().numpy()
        new_state, reward, done = env.step(action)
        state = new_state
        nb_iter += 1
    
    if nb_iter == env.max_iteration :
        fails_caclavar += 1
    else :
        success_caclavar += 1    

In [None]:
print(f"Nombre de tests : {nb_tests}")
print("------------------------------------")
print(f"Nombre de succes CAC: {success_cac}")
print(f"Nombre d'echecs CAC: {fails_cac}")
print(f"Ratio de succes pour CAC: {success_cac/(success_cac+fails_cac)*100}%")
print("------------------------------------")
print(f"Nombre de succes CACLA: {success_cacla}")
print(f"Nombre d'echecs CACLA: {fails_cacla}")
print(f"Ratio de succes pour CACLA: {success_cacla/(success_cacla+fails_cacla)*100}%")
print("------------------------------------")
print(f"Nombre de succes CACLAVAR: {success_caclavar}")
print(f"Nombre d'echecs CACLAVAR: {fails_caclavar}")
print(f"Ratio de succes pour CACLAVAR: {success_caclavar/(success_caclavar+fails_caclavar)*100}%")

In [None]:
def rewards_normalization(matrice_simulation_rewards) :
    dist_max = -200
    dist_min = 0
    arr = np.array(matrice_simulation_rewards)
    return 1 - ( arr / (dist_max - dist_min)).mean(axis=0)

In [None]:
l_cac = rewards_normalization(matrice_simulation_rewards)
l_cacla = rewards_normalization(matrice_simulation_rewards)
l_caclavar = rewards_normalization(matrice_simulation_rewards)

In [None]:
plt.figure()
plt.title(f"evolution of the rewards with {nb_tests} simulations on Tracking")
plt.xlabel("episode")
plt.ylabel("rewards normalized between 0 and 1")
plt.plot(l_cac, label="rewards CAC")
plt.plot(l_cacla, label="rewards CACLA")
plt.plot(l_caclavar, label="rewards CACLAVAR")
plt.legend()
plt.show()

In [None]:
print("Resultats : ")
print("CAC : mean rewards -> ",l_cac.mean())
print("CAC : std rewards -> ",l_cac.std())
print("CACLA : mean rewards -> ",l_cacla.mean())
print("CACLA : std rewards -> ",l_cacla.std())
print("CACLAVAR : mean rewards -> ",l_caclavar.mean())
print("CACLAVAR : std rewards -> ",l_caclavar.std())