In [2]:
from kaggle_environments import make, evaluate
import random

# always return rock
def agent_rock (obs, conf) :
    return 0 

# always return paper
def agent_paper (obs, conf) :
    return 1

# always return sscissors
def agent_scissors (obs, conf) :
    return 2

# return random action
def agent_random (obs, conf) :
    return random.randrange (3)

# return random action but not paper
def agent_not_paper (obs, conf) :
    return (random.randrange (2) - 1) % 3

# return random action but not rock
def agent_not_rock (obs, conf) :
    return random.randrange (2) + 1

# return random action but not scissors
def agent_not_scissors (obs, conf) :
    return random.randrange (2)

# return opponent's last action (first action is random)
def agent_monkey (obs, conf) :
    if obs.step == 0 :
        return random.randrange (3)
    else :
        return obs.lastOpponentAction

# return counter to opponent's last action (first action is random)
def agent_megabrain (obs, conf) :
    if obs.step == 0 :
        return random.randrange (3)
    else :
        return (obs.lastOpponentAction + 1) % 3

# cycles rock paper scissors
def agent_cycle (obs, conf) :
    return obs.step % 3

# return rock if loosing, paper if tie, scissors if winning (first action is random)
def agent_mood (obs, conf) :
    if obs.step == 0 :
        return random.randrange (3)
    else :
        if obs.reward < 0 :
            return 0
        elif obs.reward == 0 :
            return 1
        else :
            return 2

# return counter to opponent's last action but with 25% chance to return random (first action is random)
def agent_random_megabrain (obs, conf) :
    if obs.step == 0 :
        return random.randrange (3)
    elif random.randrange (4) == 0 :
        return random.randrange (3)
    else :
        return (obs.lastOpponentAction + 1) % 3


# counters opponent's last action but goes crazy if looses more than 3 points and starts using only rocks (first action is always rock)
def agent_psycho_megabrain (obs, conf) :
    if obs.step == 0 or obs.reward < -3 :
        return 0
    
    return (obs.lastOpponentAction + 1) % 3


def sign (a) :
    if a > 0 :
        return 1
    elif a < 0 :
        return -1
    else :
        return 0


def get_winner (evaluation) :
    first_winner = sum ([1 for episode in evaluation if episode [0] > 0])     # number of wins by first
    second_winner = sum ([1 for episode in evaluation if episode [0] < 0])    # number of wins by second
    return sign (first_winner - second_winner)

config = {'episodeSteps': 20, 'tieRewardThreshold': 1} # run 20 rounds in each episode, no threshold for tie

all_agents_names = ['agent_rock', 'agent_paper', 'agent_scissors', 'agent_random', 'agent_not_paper', 'agent_not_rock', 'agent_not_scissors', 'agent_monkey', 'agent_megabrain', 'agent_cycle', 'agent_mood', 'agent_random_megabrain', 'agent_psycho_megabrain']
all_agents = [agent_rock, agent_paper, agent_scissors, agent_random, agent_not_paper, agent_not_rock, agent_not_scissors, agent_monkey, agent_megabrain, agent_cycle, agent_mood, agent_random_megabrain, agent_psycho_megabrain]

num_agents = len (all_agents)

tournament = [[None for i in all_agents] for i in all_agents] # Matrix (num_agents x num_agents)


# Fill tournament matrix: element (i, j) == 1 if agent i won against agent j, -1 if lost, 0 if tie, None for diagonal elements
for i in range (num_agents - 1) :
    for j in range (i + 1, num_agents) :
        cur_eval = evaluate ('rps', [all_agents [i], all_agents [j]], config, num_episodes=5)
        tournament [i][j] = get_winner (cur_eval)
        tournament [j][i] = -tournament [i][j]


results = [row.count (1) for row in tournament]    # list where element i is number of wins of agent i
leaderboard = sorted ([agent for agent in enumerate (results)], key = lambda pair : pair [1], reverse = True)    # list where element i is tuple (agent number, number of wins for that agent), sorted descendantly by number of wins
number_of_winners = results.count (leaderboard [0][1])    # number of agents who has maximum of wins

# print tournament results
print ('Tournament table:')
for i, row in enumerate (tournament) :
    print (f'{row} - {all_agents_names [i]}: {results [i]} wins')

print ('\nLeaderboard (agent name, number of wins):\n', [(all_agents_names [elem [0]], elem [1]) for elem in leaderboard], sep = '')


print (f'\nWinner{"s" if number_of_winners > 1 else ""} (won against {leaderboard [0][1]} agents):')
for i in range (number_of_winners - 1) :
    print (all_agents_names [leaderboard [i][0]], ', ', sep ='', end='')
else :
    print (all_agents_names [leaderboard [number_of_winners -1][0]])


Tournament table:
[None, -1, 1, -1, 1, 1, -1, -1, -1, 0, 1, -1, -1] - agent_rock: 4 wins
[1, None, -1, 0, 1, -1, 1, -1, -1, 1, 0, -1, -1] - agent_paper: 4 wins
[-1, 1, None, 0, -1, 1, -1, 1, -1, -1, -1, -1, -1] - agent_scissors: 3 wins
[1, 0, 0, None, 0, -1, 1, 1, -1, 0, 1, 1, 0] - agent_random: 5 wins
[-1, -1, 1, 0, None, 1, -1, -1, -1, 1, -1, -1, -1] - agent_not_paper: 3 wins
[-1, 1, -1, 1, -1, None, 1, 1, -1, -1, 1, -1, -1] - agent_not_rock: 5 wins
[1, -1, 1, -1, 1, -1, None, -1, -1, 1, -1, -1, -1] - agent_not_scissors: 4 wins
[1, 1, -1, -1, 1, -1, 1, None, 1, -1, -1, -1, -1] - agent_monkey: 5 wins
[1, 1, 1, 1, 1, 1, 1, -1, None, 1, 1, -1, 1] - agent_megabrain: 10 wins
[0, -1, 1, 0, -1, 1, -1, 1, -1, None, 1, -1, 0] - agent_cycle: 4 wins
[-1, 0, 1, -1, 1, -1, 1, 1, -1, -1, None, 0, -1] - agent_mood: 4 wins
[1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 0, None, 0] - agent_random_megabrain: 9 wins
[1, 1, 1, 0, 1, 1, 1, 1, -1, 0, 1, 0, None] - agent_psycho_megabrain: 8 wins

Leaderboard (agent name,