In [9]:
from utils.CoopEnv import CoopEnv
from utils.IACagent import Agent

In [10]:
import random
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot')

In [11]:
# Set constant hyperparameters
n = 5
num_of_tasks  = 3
num_of_sims = 1
step_limit = 20000

In [12]:
move_space = num_of_tasks
comm_space = 101 # number of divisions to divide range

lower_bound = 0
upper_bound = 1
comm_array = np.arange(start=lower_bound, stop=upper_bound+0.01, step=0.01)

In [13]:
env = CoopEnv(n=n, num_of_tasks=num_of_tasks)

In [14]:
steps_to_complete = np.zeros((num_of_sims, step_limit+1))
comm_rec_actions = np.zeros((step_limit, env.n))
move_rec_actions = np.zeros((step_limit, env.n))

In [15]:
def main():

    for s in range(num_of_sims):

        print("----------------------------------")
        print(f"Starting training simulation {s}:")
        print("----------------------------------")

        # reinitialise agent list each sim
        agentlist = {i: Agent(move_space, comm_space) for i in range(env.n)}
        # reset the environment
        observations = env.reset(n=n, num_of_tasks =num_of_tasks )

        # begin training loop
        t = 0
        while t < step_limit:
            print(t, end='\r')

            sys_actions = np.zeros((env.n, 2)) # column 0 for move actions and column 1 for comm actions
            # collect move and comm  action for each agent
            sys_actions = np.array([agentlist[i].choose_action(observations[i]) for i in range(env.n)])

            action_move = sys_actions[:,0]
            action_comm = comm_array[sys_actions[:,1]]

            next_observations, rewards, termination, info = env.step(action_move, action_comm) # env.step

            comm_rec_actions[t] = sys_actions[:,1] # output from actor (comm head)
            move_rec_actions[t] = sys_actions[:,0]

            for i in range(env.n): # train each agent
                agentlist[i].train(observations[i], sys_actions[i, :], rewards[i], next_observations[i])

            # advance state
            observations = next_observations
            # record average system reward score
            steps_to_complete[s, t+1] = (sum(rewards)/env.n) + steps_to_complete[s, t]

            # increment timestep and loop
            t +=1




In [16]:
if __name__ == "__main__":

    #cProfile.run('main()')
    main()


----------------------------------
Starting training simulation 0:
----------------------------------
[[ 2 19]
 [ 0 92]
 [ 1 83]
 [ 0 30]
 [ 1  1]]
[19 92 83 30  1]
[0.19 0.92 0.83 0.3  0.01]
[[ 1 61]
 [ 0 19]
 [ 0 15]
 [ 0 23]
 [ 1 25]]
[61 19 15 23 25]
[0.61 0.19 0.15 0.23 0.25]
[[ 0 59]
 [ 0 66]
 [ 2  6]
 [ 0 16]
 [ 1  2]]
[59 66  6 16  2]
[0.59 0.66 0.06 0.16 0.02]
[[ 1 65]
 [ 2 57]
 [ 1 96]
 [ 2 51]
 [ 1 49]]
[65 57 96 51 49]
[0.65 0.57 0.96 0.51 0.49]
[[ 1 90]
 [ 0 99]
 [ 0 41]
 [ 0 13]
 [ 0 98]]
[90 99 41 13 98]
[0.9  0.99 0.41 0.13 0.98]
[[ 0 10]
 [ 2 82]
 [ 1 83]
 [ 1 14]
 [ 2 29]]
[10 82 83 14 29]
[0.1  0.82 0.83 0.14 0.29]
[[ 1 87]
 [ 1 79]
 [ 2 46]
 [ 2 16]
 [ 2  8]]
[87 79 46 16  8]
[0.87 0.79 0.46 0.16 0.08]
[[ 1 10]
 [ 0 32]
 [ 1 80]
 [ 1 10]
 [ 1  4]]
[10 32 80 10  4]
[0.1  0.32 0.8  0.1  0.04]
[[ 2 29]
 [ 1 39]
 [ 2 18]
 [ 0  4]
 [ 1 99]]
[29 39 18  4 99]
[0.29 0.39 0.18 0.04 0.99]


KeyboardInterrupt: 

In [None]:
np.save(f'n{n}t{num_of_tasks}div{comm_space}_cum_rew', steps_to_complete)
np.save(f'n{n}t{num_of_tasks}div{comm_space}_actions', comm_rec_actions)

In [None]:
plt.style.use('ggplot')
b = np.mean(steps_to_complete, axis=0)
s_dev = np.std(steps_to_complete, axis=0)
plt.xlabel('Number of Steps Played')
plt.ylabel('Cumulative Average System Reward')
plt.title(f'IAC on cooperative game; n={n}, tasks={num_of_tasks}')
fig = plt.gcf()
fig.set_size_inches(10.5, 10.5)
plt.plot(b)

In [None]:
plt.xlabel('Number of Steps Played')
plt.ylabel('Comm Bias')
plt.title(f'Communication action for cooperative game, action_space=[{lower_bound},{upper_bound}](div={comm_space})')
fig = plt.gcf()
fig.set_size_inches(10.5, 10.5)

plt.plot(comm_rec_actions)

In [None]:
# Get coalition structure
print(env.CS)
