In [1]:
from utils.CoopEnv import CoopEnv
from utils.IACagent import Agent

In [2]:
import random
import numpy as np
import matplotlib.pyplot as plt

In [3]:
n = 20
num_of_tasks = 10
cnf = 0
env = CoopEnv(n=n, num_of_tasks=num_of_tasks, cnf=cnf)
action_size = env.num_of_tasks

In [4]:
num_of_sims = 1
step_limit = 10000
steps_to_complete = np.zeros((num_of_sims, step_limit+1))

In [5]:
def main():

    for s in range(num_of_sims):

        print("----------------------------------")
        print(f"Starting training simulation {s}:")
        print("----------------------------------")

        # reinitialise agent list each sim
        agentlist = {}
        for agent in range(env.n):
            agentlist[agent] = Agent(action_size) # instantiate an Agent() class for each player

        t = 0
        observations = env.reset(n=n, num_of_tasks=num_of_tasks, cnf=cnf)

        while t < step_limit:
            print(t, end='\r')

            actions = [agentlist[agent].choose_action(observations[agent]) for agent in range(env.n)]
            next_observations, rewards, termination, info = env.step(actions)

            for i in agentlist:
                agent = agentlist[i]
                agent.train(observations[i], actions[i], rewards[i], next_observations[i], termination)

            observations = next_observations
            steps_to_complete[s, t+1] = sum(rewards)/env.n + steps_to_complete[s, t]
            t +=1

    np.save(f'n{n}t{num_of_tasks}_cum_rew_cnf0.npy', steps_to_complete)


In [6]:
if __name__ == "__main__":

    #cProfile.run('main()')
    main()


----------------------------------
Starting training simulation 0:
----------------------------------
[{'6', '10', '5', '16', '20'}, {'13', '15', '9', '17'}, {'18'}, {'3', '14', '1', '4'}, {'8', '7'}, {'12'}, set(), set(), set(), {'11', '2', '19'}]
[0, 2, 1, 2, 0, 1, 1, 2, 2, 0, 2, 1, 1, 2, 2, 2, 1, 2, 2, 1]
[{'13', '11', '10', '5', '19', '17', '2'}, {'16'}, {'15', '3', '9'}, {'18', '1', '7'}, {'14', '12', '4'}, {'8'}, set(), set(), set(), {'20', '6'}]
[1, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 1, 0, 2, 1, 0, 2, 0, 0, 0]
[{'13', '11', '10', '19'}, {'5', '16', '15', '17'}, {'1', '9'}, {'18', '3', '12', '7'}, set(), {'14', '8', '4'}, set(), set(), set(), {'6', '20', '2'}]
[1, 2, 2, 0, 2, 2, 0, 2, 1, 2, 1, 0, 2, 1, 0, 0, 0, 0, 0, 2]
[{'6', '19', '20', '2'}, {'13', '15', '10', '1', '16', '9', '17'}, {'5'}, {'18', '12', '7'}, {'14', '3'}, {'4'}, {'8'}, set(), set(), {'11'}]
[1, 2, 1, 2, 1, 0, 1, 0, 2, 0, 2, 1, 1, 0, 0, 1, 2, 2, 2, 1]
[{'13', '11', '6', '16', '1'}, {'15', '10', '5', '19', '2'}, {'7',

KeyboardInterrupt: 

In [None]:
b = np.mean(steps_to_complete, axis=0)
s_dev = np.std(steps_to_complete, axis=0)
plt.xlabel('Number of Steps Played')
plt.ylabel('Cumulative Average System Reward')
plt.title(f'Training graph for IAC on cooperative game; n={n}, tasks={num_of_tasks}, cnf={cnf}')
fig = plt.gcf()
fig.set_size_inches(10.5, 10.5)
plt.plot(b)

In [None]:
print(f"Converged coalition structure is: {env.CS}")