In [1]:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import torch
import sys
import numpy as np

sys.path.insert(0, '../')
from utils.Tracking import Tracking
from utils.Critic import CriticNetwork
from utils.Actor import ActorNetwork
from utils.CACLAVAR import CACLAVAR

from tqdm import tqdm

In [None]:
success = 0
fails = 0
nb_tests = 20


matrice_simulation_rewards = list()

for i in tqdm(range(nb_tests)) : 
    env = Tracking()
    actor_network = ActorNetwork(
        nb_neurons = 12,
        action_space = env.action_space,
        observation_space = env.observation_space
    )
    critic_network = CriticNetwork(
        nb_neurons = 12,
        observation_space = env.observation_space
    )
    caclavar = CACLAVAR(
        learning_rate_critic = 0.01,
        learning_rate_actor = 0.01,
        discount_factor = 0.8,
        sigma = 0.1,
        nb_episode = 500,
        test_frequency = 1,
        env = env,
        actor_network = actor_network,
        critic_network = critic_network,
        verbose_mode = False
    )
    caclavar.learning()
    matrice_simulation_rewards.append(caclavar.list_rewards)
    
    state = env.reset()
    done = False
    nb_iter = 0
    while not done :
        state_t = torch.as_tensor(state , dtype=torch.float32)
        action =  caclavar.best_model(state_t).detach().numpy()
        new_state, reward, done = env.step(action)
        state = new_state
        nb_iter += 1
    
    if nb_iter == env.max_iteration :
        fails += 1
    else :
        success += 1    

  0%|                                                    | 0/20 [00:00<?, ?it/s]

In [None]:
print(f"nombre de succes : {success}")
print(f"nombre d'echecs : {fails}")
print(f"ratio : {success/(success+fails)*100}%")

In [None]:
def rewards_normalization(matrice_simulation_rewards) :
    dist_max = -200
    dist_min = 0
    arr = np.array(matrice_simulation_rewards)
    return 1 - ( arr / (dist_max - dist_min)).mean(axis=0)
l = rewards_normalization(matrice_simulation_rewards)

In [None]:
plt.figure()
plt.plot(l)
plt.show()

In [None]:
print("mean rewards : ",l.mean())
print("std rewards : ",l.std())