In [40]:
import os
import typing
import numpy as np

In [41]:
from diploma.noise_learning.utils import NoiseLearningAgents, ExchangeTypes
from diploma.noise_learning.visualizer import Visualizer
from diploma.noise_learning.metrics_manager import AgentMetrics, Metrics

In [42]:
agents_number = 10
env_name = 'CartPole-v1'
agent = NoiseLearningAgents['DQN']
noise_env_step = 0.1

exchange_type = ExchangeTypes['SMART']
exchange_delta = 0.1
exchange_items_reward_count = 30

detailed_agents_plots = False
metrics_number_of_elements = 100
metrics_number_of_iterations = 50

executions_count = None
executions_from = None
execution_date = None

In [43]:
visualizer = Visualizer(
    exchange_type=exchange_type, exchange_delta=exchange_delta, exchange_items_reward_count=exchange_items_reward_count, 
    agents_number=agents_number, env_name=env_name, noise_learning_agent=agent, metrics_number_of_elements=metrics_number_of_elements, 
    metrics_number_of_iterations=metrics_number_of_iterations, detailed_agents_plots=detailed_agents_plots, 
    noise_env_step=noise_env_step, executions_count=executions_count, executions_from=executions_from, execution_date=execution_date
)

In [44]:
visualizer.set_train_metrics()
visualizer.set_play_metrics()

Loaded diploma/temp_results/2020-01-08_00-07-14_DQN_SMART_1/agent0.txt results
Loaded diploma/temp_results/2020-01-08_00-07-14_DQN_SMART_1/agent1.txt results
Loaded diploma/temp_results/2020-01-08_00-07-14_DQN_SMART_1/agent2.txt results
Loaded diploma/temp_results/2020-01-08_00-07-14_DQN_SMART_1/agent3.txt results
Loaded diploma/temp_results/2020-01-08_00-07-14_DQN_SMART_1/agent4.txt results
Loaded diploma/temp_results/2020-01-08_00-07-14_DQN_SMART_1/agent5.txt results
Loaded diploma/temp_results/2020-01-08_00-07-14_DQN_SMART_1/agent6.txt results
Loaded diploma/temp_results/2020-01-08_00-07-14_DQN_SMART_1/agent7.txt results
Loaded diploma/temp_results/2020-01-08_00-07-14_DQN_SMART_1/agent8.txt results
Loaded diploma/temp_results/2020-01-08_00-07-14_DQN_SMART_1/agent9.txt results
Loaded diploma/temp_results/2020-01-08_00-07-14_DQN_SMART_2/agent0.txt results
Loaded diploma/temp_results/2020-01-08_00-07-14_DQN_SMART_2/agent1.txt results
Loaded diploma/temp_results/2020-01-08_00-07-14_DQN_

In [45]:
train_metrics: typing.List[AgentMetrics] = visualizer.agent_metrics
play_metrics: typing.List[AgentMetrics] = visualizer.agent_play_metrics

In [46]:
def get_all_metrics(metrics: typing.List[AgentMetrics], metric_name: str) -> Metrics:
    all_metrics: Metrics = Metrics()
    for i in range(len(metrics)):
        all_metrics.extend(getattr(metrics[i], metric_name))
    return all_metrics

In [47]:
all_train_scores = get_all_metrics(train_metrics, 'scores')
all_play_scores = get_all_metrics(play_metrics, 'scores')

In [48]:
def print_iteration_mean(all_scores: Metrics, start: int, end: int, step: int):
    scores = [metric.value for metric in all_scores.metrics if metric.iteration == start]
    print(f"Mean at {start} iteration: {np.array(scores).mean()}")
    for i in range(step, end, step):
        scores = [metric.value for metric in all_scores.metrics if metric.iteration == i]
        print(f"Mean at {i} iteration: {np.array(scores).mean()}")

In [49]:
print("Train scores:")
print_iteration_mean(all_train_scores, 1, 5500, 500)

Train scores:
Mean at 1 iteration: 11.2
Mean at 500 iteration: 119.8
Mean at 1000 iteration: 195.4
Mean at 1500 iteration: 187.65
Mean at 2000 iteration: 172.7
Mean at 2500 iteration: 168.35
Mean at 3000 iteration: 157.15
Mean at 3500 iteration: 199.35
Mean at 4000 iteration: 254.8
Mean at 4500 iteration: 216.7
Mean at 5000 iteration: 113.75


In [50]:
print("Play scores:")
print_iteration_mean(all_play_scores, 1, 600, 100)

Play scores:
Mean at 1 iteration: 158.2
Mean at 100 iteration: 143.9
Mean at 200 iteration: 155.35
Mean at 300 iteration: 161.25
Mean at 400 iteration: 185.4
Mean at 500 iteration: 166.65


In [51]:
print("Train average:")
np.array([metric.value for metric in all_train_scores.metrics]).mean()

Train average:


167.79787

In [52]:
print("Play average:")
np.array([metric.value for metric in all_play_scores.metrics]).mean()

Play average:


160.8771