# Provided code

In [None]:
# Install open spiel
!pip install --upgrade open_spiel

Collecting open_spiel
  Downloading open_spiel-1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m31.4 MB/s[0m eta [36m0:00:00[0m
Collecting ml-collections>=0.1.1 (from open_spiel)
  Downloading ml_collections-0.1.1.tar.gz (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ml-collections
  Building wheel for ml-collections (setup.py) ... [?25l[?25hdone
  Created wheel for ml-collections: filename=ml_collections-0.1.1-py3-none-any.whl size=94505 sha256=939f331681956ce33d356a0d305528f47ca7db855428998b89643d5346069d37
  Stored in directory: /root/.cache/pip/wheels/7b/89/c9/a9b87790789e94aadcfc393c283e3ecd5ab916aed0a31be8fe
Successfully built ml-collections
Installing collected packages: ml-collections, open

In [None]:
# Imports
import numpy as np

from open_spiel.python import rl_agent
from open_spiel.python import rl_environment
import pyspiel

In [None]:
# Some helper classes and functions.
# DO NOT CHANGE.

class BotAgent(rl_agent.AbstractAgent):
  """Agent class that wraps a bot.

  Note, the environment must include the OpenSpiel state in its observations,
  which means it must have been created with use_full_state=True.

  This is a simple wrapper that lets the RPS bots be interpreted as agents under
  the RL API.
  """

  def __init__(self, num_actions, bot, name="bot_agent"):
    assert num_actions > 0
    self._bot = bot
    self._num_actions = num_actions

  def restart(self):
    self._bot.restart()

  def step(self, time_step, is_evaluation=False):
    # If it is the end of the episode, don't select an action.
    if time_step.last():
      return
    _, state = pyspiel.deserialize_game_and_state(
        time_step.observations["serialized_state"])
    action = self._bot.step(state)
    probs = np.zeros(self._num_actions)
    probs[action] = 1.0
    return rl_agent.StepOutput(action=action, probs=probs)


#  We will use this function to evaluate the agents. Do not change.

def eval_agents(env, agents, num_players, num_episodes, verbose=False):
  """Evaluate the agent.

  Runs a number of episodes and returns the average returns for each agent as
  a numpy array.

  Arguments:
    env: the RL environment,
    agents: a list of agents (size 2),
    num_players: number of players in the game (for RRPS, this is 2),
    num_episodes: number of evaluation episodes to run.
    verbose: whether to print updates after each episode.
  """
  sum_episode_rewards = np.zeros(num_players)
  for ep in range(num_episodes):
    for agent in agents:
      # Bots need to be restarted at the start of the episode.
      if hasattr(agent, "restart"):
        agent.restart()
    time_step = env.reset()
    episode_rewards = np.zeros(num_players)
    while not time_step.last():
      agents_output = [
          agent.step(time_step, is_evaluation=True) for agent in agents
      ]
      action_list = [agent_output.action for agent_output in agents_output]
      print('action_list:', action_list)
      raise
      time_step = env.step(action_list)
      episode_rewards += time_step.rewards
    sum_episode_rewards += episode_rewards
    if verbose:
      print(f"Finished episode {ep}, "
            + f"avg returns: {sum_episode_rewards / (ep+1)}")

  return sum_episode_rewards / num_episodes


def print_roshambo_bot_names_and_ids(roshambo_bot_names):
  print("Roshambo bot population:")
  for i in range(len(roshambo_bot_names)):
    print(f"{i}: {roshambo_bot_names[i]}")

def create_roshambo_bot_agent(player_id, num_actions, bot_names, pop_id):
  name = bot_names[pop_id]
  # Creates an OpenSpiel bot with the default number of throws
  # (pyspiel.ROSHAMBO_NUM_THROWS). To create one for a different number of
  # throws per episode, add the number as the third argument here.
  bot = pyspiel.make_roshambo_bot(player_id, name)
  return BotAgent(num_actions, bot, name=name)


In [None]:
print("Loading bot population...")
pop_size = pyspiel.ROSHAMBO_NUM_BOTS
print(f"Population size: {pop_size}")
roshambo_bot_names = pyspiel.roshambo_bot_names()
roshambo_bot_names.sort()
print_roshambo_bot_names_and_ids(roshambo_bot_names)

bot_id = 0
roshambo_bot_ids = {}
for name in roshambo_bot_names:
  roshambo_bot_ids[name] = bot_id
  bot_id += 1

Loading bot population...
Population size: 43
Roshambo bot population:
0: actr_lag2_decay
1: adddriftbot2
2: addshiftbot3
3: antiflatbot
4: antirotnbot
5: biopic
6: boom
7: copybot
8: debruijn81
9: driftbot
10: flatbot3
11: foxtrotbot
12: freqbot2
13: granite
14: greenberg
15: halbot
16: inocencio
17: iocainebot
18: marble
19: markov5
20: markovbails
21: mixed_strategy
22: mod1bot
23: multibot
24: peterbot
25: phasenbott
26: pibot
27: piedra
28: predbot
29: r226bot
30: randbot
31: robertot
32: rockbot
33: rotatebot
34: russrocker4
35: shofar
36: sunCrazybot
37: sunNervebot
38: sweetrock
39: switchalot
40: switchbot
41: textbot
42: zq_move


In [None]:
roshambo_bot_ids['greenberg']

14

In [None]:
# # Example: create an RL environment, and two agents from the bot population and
# # evaluate these two agents head-to-head.

# # Note that the include_full_state variable has to be enabled because the
# # BotAgent needs access to the full state.
# env = rl_environment.Environment(
#     "repeated_game(stage_game=matrix_rps(),num_repetitions=" +
#     f"{pyspiel.ROSHAMBO_NUM_THROWS}," +
#     f"recall={RECALL})",
#     include_full_state=True)
# num_players = 2
# num_actions = env.action_spec()["num_actions"]
# # Learning agents might need this:
# # info_state_size = env.observation_spec()["info_state"][0]

# # Create two bot agents
# p0_pop_id = 0   # actr_lag2_decay
# p1_pop_id = 1   # adddriftbot2
# agents = [
#     create_roshambo_bot_agent(0, num_actions, roshambo_bot_names, p0_pop_id),
#     create_roshambo_bot_agent(1, num_actions, roshambo_bot_names, p1_pop_id)
# ]

# print("Starting eval run.")
# avg_eval_returns = eval_agents(env, agents, num_players, 10, verbose=True)

# print("Avg return ", avg_eval_returns)

NameError: ignored

# My own code

Altered or completely new code. Used to generate training data for LSTM

In [None]:
RECALL = 20
env = rl_environment.Environment(
    "repeated_game(stage_game=matrix_rps(),num_repetitions=" +
    f"{pyspiel.ROSHAMBO_NUM_THROWS}," +
    f"recall={RECALL})",
    include_full_state=True)
num_players = 2
num_actions = env.action_spec()["num_actions"]

In [None]:
def generate_training_data(env, bot_id = 14, num_episodes = 1000, verbose=False):
  for ep in range(num_episodes):
    for agent in agents:
      # Bots need to be restarted at the start of the episode.
      if hasattr(agent, "restart"):
        agent.restart()
    time_step = env.reset()
    episode_rewards = np.zeros(num_players)
    while not time_step.last():
      agents_output = [
          agent.step(time_step, is_evaluation=True) for agent in agents
      ]
      action_list = [agent_output.action for agent_output in agents_output]
      time_step = env.step(action_list)
      episode_rewards += time_step.rewards
    sum_episode_rewards += episode_rewards
    if verbose:
      print(f"Finished episode {ep}, "
            + f"avg returns: {sum_episode_rewards / (ep+1)}")

  return sum_episode_rewards / num_episodes

In [None]:
# class RandomAgent(rl_agent.AbstractAgent):
#   """Agent class that learns to play RRPS.

#   You fill this in to create your RRPS agent.

#   See the superclass for more info: https://github.com/google-deepmind/open_spiel/blob/master/open_spiel/python/rl_agent.py
#   """

#   def __init__(self, num_actions, name="bot_agent"):
#     assert num_actions > 0
#     self._num_actions = num_actions  # 3

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


The below cell is used to generate training data. It runs games with one specific bot (in this case Greenberg) against randbot. This data was used to train the first iteration of the model (the one focused purely on beating Greenberg).

In [None]:
from tqdm import tqdm

# Generate training data for specified bot
botName = 'greenberg'
botId = roshambo_bot_ids[botName]

env = rl_environment.Environment(
    "repeated_game(stage_game=matrix_rps(),num_repetitions=" +
    f"{pyspiel.ROSHAMBO_NUM_THROWS}," +
    f"recall={RECALL})",
    include_full_state=True)
num_players = 2
num_actions = env.action_spec()["num_actions"]

trainBot = BotAgent(num_actions, pyspiel.make_roshambo_bot(0, botName), name=botName)  # Bot for whom train data being generated
# randBot = BotAgent(num_actions, pyspiel.make_roshambo_bot(1, 'randbot'), name='randbot')
randBot = myAgent
agents = [trainBot, randBot]

# Note: data is generated/saved in sets to avoid losing data if Colab kicks me off
# With these numbers, each set takes ~3.5 mins (~2.1 sec/run)
numSets = 30   # Number of sets of data to generate
numRuns = 100  # Number of runs to generate training data for per set

for setNum in range(numSets):
  runData = []  # Contains data for each run

  for run in tqdm(range(numRuns), desc=f"Set {setNum+1} / {numSets}"):
    # Reset variables for new run
    randBot.restart()
    trainBot.restart()
    time_step = env.reset()

    result = []  # Contains move data for this game

    while not time_step.last():
      actionList = [agent.step(time_step, is_evaluation=True).action for agent in agents]
      result.append(actionList)
      time_step = env.step(actionList)

    runData.append(np.array(result, dtype=np.uint8))

  if len(runData) != numRuns:
    print(f"Warning: length should be {numRuns} but is {len(runData)}")

  runData = np.array(runData, dtype=np.uint8)
  print('Data shape:', runData.shape)

  # Save np array containing results for this set to google drive
  np.save(f'/content/drive/My Drive/CS486A4/Greenberg_data_LSTMv1/{botName}_{setNum}.npy', runData)

The below cell is used to generate data. It runs games with one specific bot (in this case Greenberg) against every other bot. The data from this cell was not used in either of the models discussed in the report.

In [None]:
# Unlike the above cell, this one focuses on generating data from different agents vs greenberg

from tqdm import tqdm

# Generate training data for specified bot
botName = 'greenberg'
botId = roshambo_bot_ids[botName]

# List of agents that always play same sequence of moves (so don't need to run them a lot)
deterministic_agents = [
    'rockbot', 'rotatebot', 'pibot', 'debruijn81', 'textbot'
]

env = rl_environment.Environment(
    "repeated_game(stage_game=matrix_rps(),num_repetitions=" +
    f"{pyspiel.ROSHAMBO_NUM_THROWS}," +
    f"recall={RECALL})",
    include_full_state=True)
num_players = 2
num_actions = env.action_spec()["num_actions"]

trainBot = BotAgent(num_actions, pyspiel.make_roshambo_bot(0, botName), name=botName)  # Bot for whom train data being generated
# randBot = BotAgent(num_actions, pyspiel.make_roshambo_bot(1, 'randbot'), name='randbot')
# agents = [trainBot, randBot]

numRuns = 100  # Number of runs to generate training data for per model

for name, id in roshambo_bot_ids.items():
  runData = []  # Contains data for each run

  dataBot = BotAgent(num_actions, pyspiel.make_roshambo_bot(1, name), name=name)
  agents = [trainBot, dataBot]

  # Run less times if this is a deterministic agent
  runTimes = 5 if name in deterministic_agents else numRuns

  for run in tqdm(range(runTimes), desc=f"Agent {name} ({id + 1} / {len(roshambo_bot_names)})"):
    # Reset variables for new run
    time_step = env.reset()
    trainBot.restart()
    if hasattr(dataBot, 'restart'):
      dataBot.restart()

    result = []  # Contains move data for this game

    while not time_step.last():
      actionList = [agent.step(time_step, is_evaluation=True).action for agent in agents]
      result.append(actionList)
      time_step = env.step(actionList)

    runData.append(np.array(result, dtype=np.uint8))

  if len(runData) != numRuns:
    print(f"Warning: length should be {numRuns} but is {len(runData)}")

  runData = np.array(runData, dtype=np.uint8)
  print('Data shape:', runData.shape)

  # Save np array containing results for this set to google drive
  np.save(f'/content/drive/My Drive/Greenberg_AllBots_Data/{botName}-VS-{name}.npy', runData)

The below cell is used to generate data. It runs games with every bot against randbot. The data from this cell was used in the second model discussed in the report (the one that can be generalized to other models).

This cell generates 150 games for each bot. If generating data on Colab, I recommend making 3 copies of this notebook and running all 3 at once to speed up data creation.

In [None]:
# Unlike the above cell, this one focuses on generating data from different agents vs randbot

from tqdm import tqdm

# List of agents that always play same sequence of moves (unused)
deterministic_agents = [
    'rockbot', 'rotatebot', 'pibot', 'debruijn81', 'textbot'
]

env = rl_environment.Environment(
    "repeated_game(stage_game=matrix_rps(),num_repetitions=" +
    f"{pyspiel.ROSHAMBO_NUM_THROWS}," +
    f"recall={RECALL})",
    include_full_state=True)
num_players = 2
num_actions = env.action_spec()["num_actions"]

# trainBot = BotAgent(num_actions, pyspiel.make_roshambo_bot(0, botName), name=botName)  # Bot for whom train data being generated
randBot = BotAgent(num_actions, pyspiel.make_roshambo_bot(0, 'randbot'), name='randbot')
# agents = [trainBot, randBot]

numRuns = 150  # Number of runs to generate training data for per model

for name, id in roshambo_bot_ids.items():
  runData = []  # Contains data for each run

  trainBot = BotAgent(num_actions, pyspiel.make_roshambo_bot(0, name), name=name)
  agents = [trainBot, randBot]

  # Run less times if this is a deterministic agent
  # runTimes = 5 if name in deterministic_agents else numRuns
  runTimes = numRuns

  for run in tqdm(range(runTimes), desc=f"Agent {name} ({id + 1} / {len(roshambo_bot_names)})"):
    # Reset variables for new run
    time_step = env.reset()
    randBot.restart()
    if hasattr(trainBot, 'restart'):
      trainBot.restart()

    result = []  # Contains move data for this game

    while not time_step.last():
      actionList = [agent.step(time_step, is_evaluation=True).action for agent in agents]
      result.append(actionList)
      time_step = env.step(actionList)

    runData.append(np.array(result, dtype=np.uint8))

  if len(runData) != numRuns:
    print(f"Warning: length should be {numRuns} but is {len(runData)}")

  runData = np.array(runData, dtype=np.uint8)
  print('Data shape:', runData.shape)

  # Save np array containing results for this set to google drive
  np.save(f'/content/drive/My Drive/CS486A4/AllBots_vs_random/{name}-VS-randbot-7.npy', runData)

Agent actr_lag2_decay (1 / 43): 100%|██████████| 150/150 [04:50<00:00,  1.94s/it]


Data shape: (150, 1000, 2)


Agent adddriftbot2 (2 / 43): 100%|██████████| 150/150 [04:50<00:00,  1.94s/it]


Data shape: (150, 1000, 2)


Agent addshiftbot3 (3 / 43): 100%|██████████| 150/150 [04:51<00:00,  1.94s/it]


Data shape: (150, 1000, 2)


Agent antiflatbot (4 / 43): 100%|██████████| 150/150 [04:50<00:00,  1.94s/it]


Data shape: (150, 1000, 2)


Agent antirotnbot (5 / 43): 100%|██████████| 150/150 [04:51<00:00,  1.94s/it]


Data shape: (150, 1000, 2)


Agent biopic (6 / 43): 100%|██████████| 150/150 [04:55<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent boom (7 / 43): 100%|██████████| 150/150 [04:57<00:00,  1.98s/it]


Data shape: (150, 1000, 2)


Agent copybot (8 / 43): 100%|██████████| 150/150 [04:58<00:00,  1.99s/it]


Data shape: (150, 1000, 2)


Agent debruijn81 (9 / 43): 100%|██████████| 150/150 [04:55<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent driftbot (10 / 43): 100%|██████████| 150/150 [04:55<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent flatbot3 (11 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent foxtrotbot (12 / 43): 100%|██████████| 150/150 [04:53<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent freqbot2 (13 / 43): 100%|██████████| 150/150 [04:53<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent granite (14 / 43): 100%|██████████| 150/150 [04:52<00:00,  1.95s/it]


Data shape: (150, 1000, 2)


Agent greenberg (15 / 43): 100%|██████████| 150/150 [05:17<00:00,  2.12s/it]


Data shape: (150, 1000, 2)


Agent halbot (16 / 43): 100%|██████████| 150/150 [05:02<00:00,  2.01s/it]


Data shape: (150, 1000, 2)


Agent inocencio (17 / 43): 100%|██████████| 150/150 [04:58<00:00,  1.99s/it]


Data shape: (150, 1000, 2)


Agent iocainebot (18 / 43): 100%|██████████| 150/150 [04:59<00:00,  2.00s/it]


Data shape: (150, 1000, 2)


Agent marble (19 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent markov5 (20 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent markovbails (21 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent mixed_strategy (22 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent mod1bot (23 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent multibot (24 / 43): 100%|██████████| 150/150 [04:55<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent peterbot (25 / 43): 100%|██████████| 150/150 [04:55<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent phasenbott (26 / 43): 100%|██████████| 150/150 [04:55<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent pibot (27 / 43): 100%|██████████| 150/150 [04:53<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent piedra (28 / 43): 100%|██████████| 150/150 [04:53<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent predbot (29 / 43): 100%|██████████| 150/150 [04:53<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent r226bot (30 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent randbot (31 / 43): 100%|██████████| 150/150 [04:53<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent robertot (32 / 43): 100%|██████████| 150/150 [04:55<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent rockbot (33 / 43): 100%|██████████| 150/150 [04:57<00:00,  1.98s/it]


Data shape: (150, 1000, 2)


Agent rotatebot (34 / 43): 100%|██████████| 150/150 [04:56<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent russrocker4 (35 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent shofar (36 / 43): 100%|██████████| 150/150 [04:56<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent sunCrazybot (37 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent sunNervebot (38 / 43): 100%|██████████| 150/150 [04:58<00:00,  1.99s/it]


Data shape: (150, 1000, 2)


Agent sweetrock (39 / 43): 100%|██████████| 150/150 [04:55<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent switchalot (40 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent switchbot (41 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent textbot (42 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent zq_move (43 / 43): 100%|██████████| 150/150 [04:57<00:00,  1.98s/it]

Data shape: (150, 1000, 2)



