In [1]:
from environments import ZeroDCleanupEnv
from agents import QAgent
import numpy as np
from collections import defaultdict
import torch
import sys
from tqdm import tqdm



In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
np.set_printoptions(threshold=sys.maxsize)

cpu


In [3]:
num_agents = 10
agent_ids = [str(i) for i in range(num_agents)]
reward_multiplier = 10

In [4]:
pp = False
verbose = False

In [5]:
num_episodes = 200
verbose_episode = num_episodes - 1
max_steps_per_episode = 1000
state_dim = 4
action_dim = 2

In [6]:
thresholdDepletion = 0.4
thresholdRestoration = 0.0
wasteSpawnProbability = 0.5
appleRespawnProbability = 0.05
dirt_multiplier = 10

area = 150

In [7]:
lr = 0.01
gamma = 0.99
epsilon = 0.5
epsilon_decay = 0.9999
epsilon_min = 0.05
batch_size = 128

In [8]:
env = ZeroDCleanupEnv(agent_ids, 
                      num_agents=num_agents, 
                      area=area, 
                      thresholdDepletion=thresholdDepletion,
                      thresholdRestoration=thresholdRestoration,
                      wasteSpawnProbability=wasteSpawnProbability,
                      appleRespawnProbability=appleRespawnProbability,
                      dirt_multiplier=dirt_multiplier)

agents = {
    agent_id: QAgent(device=device,
                    num_action_outputs=1,
                    action_size=action_dim,
                    state_dim=state_dim,
                    lr=lr,
                    gamma=gamma,
                    epsilon=epsilon,
                    epsilon_decay=epsilon_decay,
                    epsilon_min=epsilon_min,
                    batch_size=batch_size,
                    q_layers=[
                        (state_dim, 16),
                        (16, 8),
                        #(64, 64),
                        #(16, 16),
                        (8, action_dim)
                    ],
                    verbose=False)
    for agent_id in agent_ids
}

In [9]:
ending_ep_rewards = []
avg_agent_rewards = []
agent_reward_variances = []
weight_graph = defaultdict(list)

In [10]:
import uuid
import os

if not os.path.exists("results"):
    os.mkdir("results")

test_id = uuid.uuid4()
os.mkdir("results/q_simple_" + str(test_id))
f = open("results/q_simple_" + str(test_id) + "/logs.txt", "x")

In [11]:
for episode in range(num_episodes):
    print(f"========= Episode {episode} =========")
    f.write(f"========= Episode {episode} =========\n")

    states, info = env.reset()

    print(f"info: {info}")
    f.write(f"info: {info}\n")

    for step in tqdm(range(max_steps_per_episode)):
        actions = {agent_id: agents[agent_id].act(states[agent_id]) for agent_id in agent_ids}
        next_states, rewards, dones, _, info = env.step(actions)
        
        for agent_id in agent_ids:
            agents[agent_id].step(states[agent_id], actions[agent_id], rewards[agent_id], next_states[agent_id])

        if episode > verbose_episode:
            print(f"========= Step {step} =========")
            print(f"info: {info}")
            f.write(f"========= Step {step} =========\n")
            f.write(f"info: {info}\n")

        states = next_states

        if dones["__all__"]:
            break 

    ending_reward = info["total_apple_consumed"]
    total_reward_by_agent = info["total_reward_by_agent"]

    print(f"ending reward: {ending_reward}")
    #print(f"reward graph: {reward_graph}")
    print(f"========= End of Episode {episode} =========")
    
    f.write(f"ending reward: {ending_reward}\n")
    #f.write(f"reward graph: {reward_graph}\n")
    f.write(f"========= End of Episode {episode} =========\n")

    ending_ep_rewards.append(ending_reward)
    avg_agent_rewards.append(ending_reward / num_agents)
    agent_reward_variances.append(np.var(list(total_reward_by_agent.values())))
        

info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


 13%|█▎        | 129/1000 [00:00<00:00, 939.51it/s]

100%|█████████▉| 999/1000 [00:08<00:00, 112.88it/s]


ending reward: 2614
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 101.49it/s]


ending reward: 2508
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 99.07it/s] 


ending reward: 2426
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 102.87it/s]


ending reward: 2069
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 97.31it/s] 


ending reward: 1896
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 104.09it/s]


ending reward: 1513
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 99.09it/s] 


ending reward: 1463
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 103.25it/s]


ending reward: 1109
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 103.96it/s]


ending reward: 901
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 101.98it/s]


ending reward: 684
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 104.43it/s]


ending reward: 515
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 107.00it/s]


ending reward: 597
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 102.52it/s]


ending reward: 398
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:11<00:00, 84.26it/s] 


ending reward: 520
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:12<00:00, 81.04it/s] 


ending reward: 259
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:11<00:00, 85.67it/s] 


ending reward: 203
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:11<00:00, 85.70it/s] 


ending reward: 243
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:11<00:00, 83.61it/s] 


ending reward: 134
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 107.14it/s]


ending reward: 131
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:12<00:00, 80.11it/s] 


ending reward: 90
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:11<00:00, 86.18it/s] 


ending reward: 60
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 104.27it/s]


ending reward: 44
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 103.05it/s]


ending reward: 18
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 99.57it/s] 


ending reward: 27
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 100.76it/s]


ending reward: 63
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 103.39it/s]


ending reward: 28
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 103.36it/s]


ending reward: 56
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 97.46it/s] 


ending reward: 33
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 96.59it/s] 


ending reward: 41
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 108.10it/s]


ending reward: 85
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:12<00:00, 79.92it/s] 


ending reward: 27
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:11<00:00, 86.67it/s] 


ending reward: 244
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 92.15it/s] 


ending reward: 45
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:12<00:00, 78.00it/s]


ending reward: 31
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:11<00:00, 88.33it/s] 


ending reward: 35
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:12<00:00, 81.25it/s] 


ending reward: 63
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:12<00:00, 82.94it/s] 


ending reward: 32
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 99.74it/s] 


ending reward: 69
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 98.05it/s] 


ending reward: 29
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 102.37it/s]


ending reward: 54
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 97.65it/s] 


ending reward: 37
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 93.75it/s] 


ending reward: 48
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 93.15it/s] 


ending reward: 59
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 98.15it/s] 


ending reward: 396
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 98.19it/s] 


ending reward: 32
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 96.29it/s] 


ending reward: 22
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 102.84it/s]


ending reward: 97
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 97.82it/s] 


ending reward: 42
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 99.28it/s] 


ending reward: 32
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 96.90it/s] 


ending reward: 77
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 94.12it/s] 


ending reward: 15
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 105.37it/s]


ending reward: 69
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 101.57it/s]


ending reward: 42
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 98.82it/s] 


ending reward: 40
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 106.17it/s]


ending reward: 62
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 102.66it/s]


ending reward: 38
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 101.41it/s]


ending reward: 26
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 97.88it/s] 


ending reward: 38
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 104.97it/s]


ending reward: 38
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:08<00:00, 112.29it/s]


ending reward: 35
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 105.66it/s]


ending reward: 42
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 102.29it/s]


ending reward: 30
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:12<00:00, 80.48it/s] 


ending reward: 18
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:12<00:00, 80.28it/s] 


ending reward: 57
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:11<00:00, 84.93it/s] 


ending reward: 49
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:11<00:00, 87.66it/s] 


ending reward: 50
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 90.89it/s] 


ending reward: 32
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 105.54it/s]


ending reward: 32
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 103.10it/s]


ending reward: 37
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 100.42it/s]


ending reward: 64
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 100.24it/s]


ending reward: 56
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 99.59it/s] 


ending reward: 49
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:11<00:00, 89.02it/s] 


ending reward: 32
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:11<00:00, 88.47it/s] 


ending reward: 39
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:11<00:00, 83.68it/s] 


ending reward: 54
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 103.84it/s]


ending reward: 32
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:11<00:00, 86.72it/s] 


ending reward: 24
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:12<00:00, 83.12it/s] 


ending reward: 33
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:13<00:00, 75.89it/s]


ending reward: 48
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:13<00:00, 74.41it/s] 


ending reward: 71
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:13<00:00, 74.69it/s]


ending reward: 53
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:11<00:00, 88.15it/s]


ending reward: 22
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:14<00:00, 69.99it/s]


ending reward: 59
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:14<00:00, 70.08it/s]


ending reward: 34
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:13<00:00, 74.64it/s] 


ending reward: 42
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:15<00:00, 64.61it/s]


ending reward: 49
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:16<00:00, 59.14it/s]


ending reward: 28
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:17<00:00, 58.20it/s]


ending reward: 55
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:14<00:00, 67.66it/s]


ending reward: 55
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:14<00:00, 67.81it/s]


ending reward: 38
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:15<00:00, 65.19it/s]


ending reward: 41
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:17<00:00, 56.18it/s]


ending reward: 27
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:17<00:00, 58.08it/s]


ending reward: 72
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:15<00:00, 65.22it/s]


ending reward: 23
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:16<00:00, 60.83it/s]


ending reward: 37
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:17<00:00, 57.34it/s]


ending reward: 72
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:14<00:00, 69.33it/s]


ending reward: 17
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:15<00:00, 64.09it/s]


ending reward: 31
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:15<00:00, 65.32it/s]


ending reward: 65
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:14<00:00, 71.30it/s]


ending reward: 49
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:13<00:00, 72.59it/s]


ending reward: 30
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:15<00:00, 64.90it/s]


ending reward: 82
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:15<00:00, 62.47it/s]


ending reward: 67
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:14<00:00, 70.40it/s]


ending reward: 94
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:13<00:00, 72.29it/s]


ending reward: 176
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:16<00:00, 61.69it/s]


ending reward: 25
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:14<00:00, 68.80it/s]


ending reward: 71
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:12<00:00, 82.09it/s]


ending reward: 40
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:14<00:00, 68.44it/s]


ending reward: 54
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:13<00:00, 73.21it/s]


ending reward: 34
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:12<00:00, 78.98it/s]


ending reward: 29
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:12<00:00, 78.97it/s]


ending reward: 81
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:13<00:00, 75.84it/s]


ending reward: 61
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:13<00:00, 76.41it/s] 


ending reward: 14
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 106.51it/s]


ending reward: 82
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 97.22it/s] 


ending reward: 34
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 104.05it/s]


ending reward: 54
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 105.29it/s]


ending reward: 63
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 92.66it/s] 


ending reward: 54
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 95.42it/s] 


ending reward: 42
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 102.02it/s]


ending reward: 49
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 101.83it/s]


ending reward: 31
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 102.09it/s]


ending reward: 43
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 107.48it/s]


ending reward: 28
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 100.02it/s]


ending reward: 46
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:12<00:00, 81.31it/s] 


ending reward: 41
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:11<00:00, 88.05it/s] 


ending reward: 38
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 107.87it/s]


ending reward: 39
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 104.22it/s]


ending reward: 22
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 104.50it/s]


ending reward: 49
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 106.97it/s]


ending reward: 52
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 101.48it/s]


ending reward: 48
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 103.70it/s]


ending reward: 33
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 105.58it/s]


ending reward: 50
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 104.43it/s]


ending reward: 22
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:11<00:00, 89.47it/s] 


ending reward: 72
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 100.54it/s]


ending reward: 22
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 101.28it/s]


ending reward: 65
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 102.33it/s]


ending reward: 31
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 96.48it/s] 


ending reward: 39
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 94.46it/s] 


ending reward: 59
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 93.72it/s] 


ending reward: 35
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 103.60it/s]


ending reward: 48
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 98.58it/s] 


ending reward: 36
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:12<00:00, 82.84it/s] 


ending reward: 31
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 105.14it/s]


ending reward: 76
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 103.65it/s]


ending reward: 70
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:08<00:00, 112.30it/s]


ending reward: 32
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 104.05it/s]


ending reward: 37
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:08<00:00, 117.96it/s]


ending reward: 38
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 104.00it/s]


ending reward: 34
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 93.58it/s] 


ending reward: 34
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:11<00:00, 90.54it/s] 


ending reward: 36
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 105.90it/s]


ending reward: 47
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 107.49it/s]


ending reward: 63
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 102.95it/s]


ending reward: 30
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 104.53it/s]


ending reward: 63
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 103.32it/s]


ending reward: 47
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 101.55it/s]


ending reward: 41
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 106.13it/s]


ending reward: 38
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:11<00:00, 90.57it/s] 


ending reward: 40
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 106.62it/s]


ending reward: 60
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 93.86it/s] 


ending reward: 57
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:10<00:00, 99.00it/s] 


ending reward: 49
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 101.76it/s]


ending reward: 23
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 103.89it/s]


ending reward: 43
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:08<00:00, 116.25it/s]


ending reward: 63
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 109.18it/s]


ending reward: 59
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:08<00:00, 111.21it/s]


ending reward: 34
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:08<00:00, 119.17it/s]


ending reward: 140
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:08<00:00, 114.52it/s]


ending reward: 38
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 101.47it/s]


ending reward: 37
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 107.44it/s]


ending reward: 44
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 103.40it/s]


ending reward: 39
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 107.71it/s]


ending reward: 29
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 106.87it/s]


ending reward: 38
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 104.68it/s]


ending reward: 74
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


100%|█████████▉| 999/1000 [00:09<00:00, 107.10it/s]


ending reward: 36
info: {'total_apple_consumed': 0, 'step_apple_consumed': 0, 'apple': 0, 'dirt': 78, 'picker': 0, 'cleaner': 0, 'total_reward_by_agent': {'7': 0, '1': 0, '8': 0, '6': 0, '5': 0, '0': 0, '4': 0, '2': 0, '3': 0, '9': 0}}


 89%|████████▊ | 886/1000 [00:08<00:00, 115.98it/s]

In [None]:
test_stats = []
num_test_episodes = 15
print("========= Testing =========")
f.write("========= Testing =========\n")

for episode in range(num_test_episodes):
    test_stats.append({
        "num_apples": [],
        "num_dirt": [],
        "pickers": [],
        "cleaners": [],
        "total_reward": 0,
        "avg_agent_rewards": 0,
        "agent_reward_variances": 0,
        "total_reward_by_agent": [],
    })

    states, info = env.reset()
    test_stats[-1]["num_apples"].append(info["apple"])
    test_stats[-1]["num_dirt"].append(info["dirt"])
    test_stats[-1]["pickers"].append(info["picker"])
    test_stats[-1]["cleaners"].append(info["cleaner"])

    print(f"info: {info}")
    f.write(f"info: {info}\n")

    for step in tqdm(range(max_steps_per_episode)):
        actions = {agent_id: agents[agent_id].act(states[agent_id]) for agent_id in agent_ids}
        next_states, rewards, dones, _, info = env.step(actions)
        test_stats[-1]["num_apples"].append(info["apple"])
        test_stats[-1]["num_dirt"].append(info["dirt"])
        test_stats[-1]["pickers"].append(info["picker"])
        test_stats[-1]["cleaners"].append(info["cleaner"])

        if episode > verbose_episode:
            print(f"========= Step {step} =========")
            print(f"info: {info}")
            f.write(f"========= Step {step} =========\n")
            f.write(f"info: {info}\n")

        states = next_states

        if dones["__all__"]:
            break 

    print(f"ending info: {info}")

    test_reward = info["total_apple_consumed"]

    test_stats[-1]["total_reward"] = test_reward
    test_stats[-1]["avg_agent_rewards"] = test_reward / num_agents
    test_stats[-1]["agent_reward_variances"] = np.var(list(info["total_reward_by_agent"].values()))
    test_stats[-1]["total_reward_by_agent"] = list(info["total_reward_by_agent"].values())

    print('total reward:', test_reward)
    print('avg agent reward:', test_reward / num_agents)
    print('reward by agent:', info["total_reward_by_agent"])
    print('agent reward variances:', np.var(list(info["total_reward_by_agent"].values())))
    #print(f"reward graph: {reward_graph}")
    print(f"========= End of Test =========")

    f.write(f"ending info: {info}\n")
    #f.write(f"reward graph: {reward_graph}\n")
    f.write(f"========= End of Test =========\n")



In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.title("Reward by Training Episode")
plt.xlabel("Episode")
plt.ylabel("Reward")
plt.plot(ending_ep_rewards)
plt.show()

In [None]:
plt.figure()
plt.title("Reward by Test Episode")
plt.xlabel("Episode")
plt.ylabel("Reward")
plt.plot([stat['total_reward'] for stat in test_stats])
plt.show()

In [None]:
plt.figure()
plt.title("Average Number of Apples and Dirt by Test Step")
plt.xlabel("Step")
plt.ylabel("Number")
avg_num_apples = []
avg_num_dirt = []
for i in range(max_steps_per_episode):
    avg_num_apples.append(np.mean([stat['num_apples'][i] for stat in test_stats]))
    avg_num_dirt.append(np.mean([stat['num_dirt'][i] for stat in test_stats]))
plt.plot(avg_num_apples, label="Apples")
plt.plot(avg_num_dirt, label="Dirt")
plt.legend()
plt.show()

In [None]:
plt.figure()
plt.title("Average Number of Pickers and Cleaners by Test Step")
plt.xlabel("Step")
plt.ylabel("Number")
avg_num_pickers = []
avg_num_cleaners = []
for i in range(max_steps_per_episode):
    avg_num_pickers.append(np.mean([stat['pickers'][i] for stat in test_stats]))
    avg_num_cleaners.append(np.mean([stat['cleaners'][i] for stat in test_stats]))
plt.plot(avg_num_pickers, label="Pickers")
plt.plot(avg_num_cleaners, label="Cleaners")
plt.legend()
plt.show()

In [None]:
plt.figure()
plt.title("Average Ratios by Test Step")
plt.xlabel("Step")
plt.ylabel("Ratio")
avg_apple_dirt_ratios = []
apple_dirt_steps = []
avg_picker_cleaner_ratios = []
picker_cleaner_steps = []
for i in range(max_steps_per_episode):
    #avg_apple_dirt_ratios.append(np.mean([stat['num_apples'][i] / (stat['num_dirt'][i] + 1e-8) for stat in test_stats]))
    #avg_picker_cleaner_ratios.append(np.mean([stat['pickers'][i] / (stat['cleaners'][i]) for stat in test_stats if stat['cleaners'][i] > 0]))
    apple_dirt = []
    picker_cleaner = []
    for stat in test_stats:
        if stat['num_dirt'][i] > 0:
            apple_dirt.append(stat['num_apples'][i] / stat['num_dirt'][i])
        if stat['cleaners'][i] > 0:
            picker_cleaner.append(stat['pickers'][i] / stat['cleaners'][i])
    if len(apple_dirt) > 0:
        avg_apple_dirt_ratios.append(np.mean(apple_dirt))
        apple_dirt_steps.append(i)
    if len(picker_cleaner) > 0:
        avg_picker_cleaner_ratios.append(np.mean(picker_cleaner))
        picker_cleaner_steps.append(i)

plt.plot(apple_dirt_steps, avg_apple_dirt_ratios, label="Apples/Dirt")
plt.plot(picker_cleaner_steps, avg_picker_cleaner_ratios, label="Pickers/Cleaners")
plt.legend()
plt.show()

In [None]:
plt.figure()
plt.title("Reward Variance by Training Episode")
plt.xlabel("Episode")
plt.ylabel("Variance")
plt.plot(agent_reward_variances)
plt.show()

In [None]:
plt.figure()
plt.title("Reward Standard Deviation by Test Episode")
plt.xlabel("Episode")
plt.ylabel("Variance")
plt.plot([np.sqrt(stat['agent_reward_variances']) for stat in test_stats])
plt.show()

In [None]:
plt.figure()
plt.title("Average Reward Curve")
plt.xlabel("Reward")
plt.ylabel("Frequency")
reward_curve = [0.] * num_agents
for stat in test_stats:
    reward_by_agent = sorted(stat['total_reward_by_agent'])
    for i in range(num_agents):
        reward_curve[i] += reward_by_agent[i]
for i in range(num_agents):
    reward_curve[i] /= num_test_episodes

plt.hist(reward_curve, bins=num_agents)
plt.show()