In [1]:
!pip install -q -U kaggle_environments



In [2]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from kaggle_environments import make, evaluate

In [3]:
%%writefile rock_only.py
# Agent 1 rock_only.py -> only 0 - rock
def rock_only(observation, configuration):
    return 0

Overwriting rock_only.py


In [4]:
%%writefile paper_only.py
# Agent 2 paper_only.py -> only 1 - paper
def paper_only(observation, configuration):
    return 1

Overwriting paper_only.py


In [5]:
%%writefile scissors_only.py
# Agent 3 scissors_only.py -> only 2 - scissors
def scissors_only(observation, configuration):
    return 2

Overwriting scissors_only.py


In [6]:
%%writefile copy_opponent.py
# Agent 4 copy_opponent.py -> it returns opponent's previous action
import random
def copy_opponent(observation, configuration):
    if observation.step > 0: # Current step within the episode.
        return observation.lastOpponentAction # Symbol returned by opponent last step. None on the first step.
    else:
        return random.randrange(0, configuration.signs) # Number of choices each step (default 3 - rock, paper, scissors).

Overwriting copy_opponent.py


In [7]:
%%writefile submission.py
# Agent 5 submission.py -> choice is based on the opponent's last move, so tie/lose occurs when the opponent changes move,
# and win occurs when the opponent doesn't change move.
import random
def submission(observation, configuration):
    if observation.step > 0: # Current step within the episode.
        last_opponent_action = observation.lastOpponentAction # Symbol returned by opponent last step. None on the first step.
        return (last_opponent_action + 1) % configuration.signs
    else:
        return random.randrange(0, configuration.signs) # Number of choices each step (default 3 - rock, paper, scissors).

Overwriting submission.py


In [8]:
%%writefile submission_shift_2.py
# Agent 6 submission_shift_2.py -> choice is based on the opponent's last move, so lose occurs when the opponent doesn't
# change move, and win/tie occurs when the opponent changes move.
import random
def submission_shift(observation, configuration):
    if observation.step > 0: # Current step within the episode.
        last_opponent_action = observation.lastOpponentAction # Symbol returned by opponent last step. None on the first step.
        return (last_opponent_action + 2) % configuration.signs
    else:
        return random.randrange(0, configuration.signs) # Number of choices each step (default 3 - rock, paper, scissors).

Overwriting submission_shift_2.py


In [9]:
%%writefile based_win_lose.py
# Agent 7 based_win_lose.py -> choice is based on the outcome of the previous turn, so if the bot won, then the bot's
# next move is the opponent's losing move, and if the bot loses/tie, then the move that was not on the previous move.
import random
from kaggle_environments.envs.rps.utils import get_score
last_bot_action = None
def based_win_lose(observation, configuration):
    global last_bot_action
    if observation.step == 0: # Current step within the episode.
        # configuration.signs - number of choices each step (default 3 - rock, paper, scissors).
        last_bot_action = random.randrange(0, configuration.signs) 
    elif get_score(last_bot_action, observation.lastOpponentAction) == 1: # Return tie (0), lose left arg (-1), win left arg (1)
        last_bot_action = observation.lastOpponentAction # Symbol returned by opponent last step. None on the first step.
    else:
        last_bot_action = (observation.lastOpponentAction + 1) % configuration.signs
    return last_bot_action

Overwriting based_win_lose.py


In [10]:
%%writefile vs_frequently_move.py
# Agent 8 vs_frequently_move.py -> choice is based on calculating the maximum count of opponent's move mode in each step
# and returns the mode that wins mode with the maximum count.
import random
count_rps_move = dict()
def vs_frequently_move(observation, configuration):
    global count_rps_move
    if observation.step == 0: # Current step within the episode.
        return random.randrange(0, configuration.signs) # Number of choices each step (default 3 - rock, paper, scissors).
    last_action = observation.lastOpponentAction # Symbol returned by opponent last step. None on the first step.
    if last_action not in count_rps_move:
        count_rps_move[last_action] = 0
    count_rps_move[last_action] += 1
    max_rps_move = max(count_rps_move, key=count_rps_move.get)
    return (max_rps_move + 1) % configuration.signs

Overwriting vs_frequently_move.py


In [11]:
%%writefile random_shuffle.py
# Agent 9 random_shuffle.py -> it is based on generating a random list of moves and returns items of list
# (the index of the list is the step of the game).
#!pip install pydash
import pydash
import random
from itertools import combinations_with_replacement
actions = list(combinations_with_replacement([0, 1, 2],3))*100
random.shuffle(actions)
actions = pydash.flatten(actions)
def random_shuffle(observation, configuration):    
    return int(actions[observation.step]) # Current step within the episode.

Overwriting random_shuffle.py


In [12]:
%%writefile prime_odd_even.py
# Agent 10 prime_odd_even.py -> from the first step, if the step number is a prime number, then scissors,
# if not prime and even, then a rock, otherwise paper.
import random
def is_prime(num):
    d = 2
    while num != 1 and num % d != 0:
        d += 1
    return d == num
def prime_odd_even(observation, configuration):
    step = observation.step # Current step within the episode.
    if step == 0:
        return random.randrange(0, configuration.signs) # Number of choices each step (default 3 - rock, paper, scissors).
    if is_prime(step):
        return 2
    elif step % 2 == 0:
        return 0
    else:
        return 1

Overwriting prime_odd_even.py


In [13]:
%%writefile beat_not_beat.py
# Agent 11 beat_not_beat.py -> from the first step, there is an alternation of the mode beats the last action
# of the opponent and does not beat.
import random
def beat_not_beat(observation, configuration):
    if observation.step == 0: # Current step within the episode.
        return random.randrange(0, configuration.signs) # Number of choices each step (default 3 - rock, paper, scissors).
    if observation.step % 2 != 0:
        return (observation.lastOpponentAction + 1) % configuration.signs
    else:
        return observation.lastOpponentAction # Symbol returned by opponent last step. None on the first step.

Overwriting beat_not_beat.py


In [14]:
%%writefile last_actions.py
# Agent 12 last_actions.py -> choice is based on the previous move of the bot and the opponent.
import random
last_bot_action = None
def last_actions(observation, configuration):
    global last_bot_action
    if observation.step == 0: # Current step within the episode.
        # configuration.signs - number of choices each step (default 3 - rock, paper, scissors).
        last_bot_action = random.randrange(0, configuration.signs)
    else:
        # observation.lastOpponentAction - symbol returned by opponent last step. None on the first step.
        last_bot_action =  (last_bot_action + observation.lastOpponentAction) % configuration.signs
    return last_bot_action

Overwriting last_actions.py


In [15]:
# Testing strategies
lst_agents = ['rock_only.py', 'paper_only.py', 'scissors_only.py', 'copy_opponent.py', 'submission.py', \
              'submission_shift_2.py', 'based_win_lose.py', 'vs_frequently_move.py', 'random_shuffle.py', \
              'prime_odd_even.py', 'beat_not_beat.py', 'last_actions.py']
len_lst_agents = len(lst_agents)
scores = {key: list() for key in lst_agents}
for idx in range(len_lst_agents - 1):
    for inner_idx in range(idx + 1, len_lst_agents):
        reward = evaluate(
            "rps", 
            [lst_agents[idx], lst_agents[inner_idx]], 
            configuration={"episodeSteps": 100, "tieRewardThreshold": 1}
        )
        if reward[0][0] > 0:
            scores[lst_agents[idx]].append(1)
            scores[lst_agents[inner_idx]].append(0)
        elif reward[0][0] == 0:
            scores[lst_agents[idx]].append(0)
            scores[lst_agents[inner_idx]].append(0)
        else:
            scores[lst_agents[idx]].append(0)
            scores[lst_agents[inner_idx]].append(1)
sorted_scores = dict(sorted(scores.items(), key=lambda x: sum(x[1]), reverse=True))
print('Agent'.center(24), '|', '0-lose/tie, 1-win'.center(31), '|', 'Number of wins (descending)'.center(5))
for key, values in sorted_scores.items():
    print(f'{key:<25}|{values}|{sum(values):>15}')

         Agent           |        0-lose/tie, 1-win        | Number of wins (descending)
beat_not_beat.py         |[1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1]|              8
submission.py            |[1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0]|              7
vs_frequently_move.py    |[1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0]|              7
based_win_lose.py        |[1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1]|              6
prime_odd_even.py        |[0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0]|              6
paper_only.py            |[1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0]|              5
submission_shift_2.py    |[0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1]|              5
rock_only.py             |[0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0]|              4
copy_opponent.py         |[0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1]|              4
random_shuffle.py        |[0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1]|              4
scissors_only.py         |[0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0]|              2
last_actions.py          |[0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0]|              2

In [16]:
# Evaluation is used to run an episode (environment + agents) multiple times and just return the rewards.
# Example beat_not_beat.py vs. prime_odd_even.py
evaluate(
    "rps", 
    ["beat_not_beat.py", "prime_odd_even.py"], 
    configuration={"episodeSteps": 100, "tieRewardThreshold": 1}
)
# episodeSteps - maximum number of steps the environment can run, default 1000
# tieRewardThreshold - Minimum reward needed to achieve a win rather than a tie, default 20

[[-25.0, 25.0]]

In [17]:
# An environment instance
env = make("rps", configuration={"episodeSteps": 100, "tieRewardThreshold": 1}, debug=True)
env.render()
# episodeSteps - maximum number of steps the environment can run, default 1000
# tieRewardThreshold - Minimum reward needed to achieve a win rather than a tie, default 20

In [18]:
# Example last_actions.py vs. based_win_lose.py
env.reset()

env.run(["last_actions.py", "based_win_lose.py"])
env.render(mode="ipython", width=700, height=550)

In [19]:
# Example submission_shift_2.py vs. vs_frequently_move.py
env.reset()

env.run(["submission_shift_2.py", "vs_frequently_move.py"])
env.render(mode="ipython", width=700, height=550)

In [20]:
#print(env.specification.configuration)

In [21]:
#print(env.specification.observation)