In [13]:
from utils.CoopEnv import CoopEnv
from utils.IACagent import Agent

In [14]:
import random
import numpy as np
import matplotlib.pyplot as plt

In [15]:
n = 5
tasks = 3
cnf = 0
env = CoopEnv(n=n, tasks=tasks, cnf=cnf)
action_size = env.num_of_tasks

In [16]:
num_of_sims = 3
step_limit = 5000
steps_to_complete = np.zeros((num_of_sims, step_limit+1))

In [17]:
def main():

    for s in range(num_of_sims):

        print("----------------------------------")
        print(f"Starting training simulation {s}:")
        print("----------------------------------")

        # reinitialise agent list each sim
        agentlist = {}
        for agent in range(env.n):
            agentlist[agent] = Agent(action_size) # instantiate an Agent() class for each player

        t = 0
        observations = env.reset(n=n, tasks=tasks, cnf=cnf)

        while t < step_limit:
            print(t, end='\r')

            actions = [agentlist[agent].choose_action(observations[agent]) for agent in range(env.n)]

            next_observations, rewards, termination, info = env.step(actions)

            for i in agentlist:
                agent = agentlist[i]
                agent.train(observations[i], actions[i], rewards[i], next_observations[i], termination)

            observations = next_observations
            steps_to_complete[s, t+1] = sum(rewards) + steps_to_complete[s, t]
            t +=1




In [18]:
if __name__ == "__main__":

    #cProfile.run('main()')
    main()


----------------------------------
Starting training simulation 0:
----------------------------------
[{'3'}, {'5', '4'}, {'2', '1'}]
[0.        0.        0.8343971 0.        0.       ]
[set(), {'2', '4'}, {'3', '5', '1'}]
[0. 0. 0. 0. 0.]
[{'2', '1', '4'}, {'5', '3'}, set()]
[0. 0. 0. 0. 0.]
[{'5', '1', '4'}, set(), {'2', '3'}]
[0. 0. 0. 0. 0.]
[set(), {'2', '4'}, {'5', '3', '1'}]
[0. 0. 0. 0. 0.]
[{'3', '2', '1'}, {'4'}, {'5'}]
[0.         0.         0.         0.03088947 0.95887477]
[{'4'}, {'3', '5', '1'}, {'2'}]
[1.26156397 0.68089458 1.15899599 0.03088947 1.33189822]
[{'3', '4'}, {'2', '1'}, {'5'}]
[0.         0.         0.         0.         0.95887477]
[{'5', '3'}, {'4', '1'}, {'2'}]
[0.         0.68089458 0.         0.         0.        ]
[{'2'}, set(), {'3', '5', '1', '4'}]
[3.50668208 0.68089458 3.22158099 0.11926325 3.70218535]
[{'2', '5'}, {'3', '1'}, {'4'}]
[0.         0.         0.         0.03088947 0.        ]
[set(), {'3', '4', '1'}, {'2', '5'}]
[1.26156397 0.        

KeyboardInterrupt: 

In [None]:
b = np.mean(steps_to_complete, axis=0)
s_dev = np.std(steps_to_complete, axis=0)
plt.xlabel('Number of Steps Played')
plt.ylabel('Cumulative Reward')
plt.title(f'Training graph for IAC on cooperative game; n={n}, tasks={tasks}, cnf={cnf}')
fig = plt.gcf()
fig.set_size_inches(10.5, 10.5)
plt.plot(b)

In [None]:
def get_biases(num_of_players, num_of_tasks):

    biases = np.zeros((num_of_tasks, num_of_players))
    for task in range(num_of_tasks):

        for length in range(num_of_players):

            a = task*num_of_players + (length+1)
            random.seed(a+1)
            biases[task, length] = random.uniform(0,length+1)

    biases[:, 0] = 1
    return biases


print(get_biases(n, tasks))
print(env.CS)
