<a href="https://colab.research.google.com/github/holomorphicsean/SandBox/blob/main/Rock_Paper_Scissors_Simulator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# VARIABLE AND FUNCTION DEFINITIONS




In [48]:
import random

# RPS payoff table
# example: payoff_table[ROCK][SCISSORS] = [1,-1], implying that, from
# player 1's perspective, Rock wins vs Scissors

ROCK = 0
PAPER = 1
SCISSORS = 2

payoff = [
          [[0,0],[-1,1],[1,-1]],
          [[1,-1],[0,0],[-1,1]],
          [[-1,1],[1,-1],[0,0]]
          ]

def regret_table(act1, act2):
  # This function takes in an action from a player
  # and an opponent (e.g. act1 = 0 and act2 = 2 means ROCK vs SCISSORS)
  # and returns a length 3 array of regrets from player 1's perspective

  regret = [0, 0, 0]

  for i in range(3):
    #definition of regret: utility of hypothetical actions vs actual action
    regret[i] = payoff[i][act2][0] - payoff[act1][act2][0]
  return regret


def rm_strategy(regret_sum):
  # First step will be to calculate a strategy will be calculated from the 
  # Regret Sum. Afterwards, that strategy will be used to update the 
  # average strategy, which is what this function will return.

  normalizing_sum = 0
  
  # Remove negative numbers from regret_sum and place into current strategy
  strategy = [i if i > 0 else 0 for i in regret_sum]
  normalizing_sum = sum(strategy)

  # If our normalizing sum is non-positive, then we return an equal strategy
  if normalizing_sum <= 0:
    for i in range(3):
      strategy_sum[i] += 1/3
    return [1/3, 1/3, 1/3]

  # Otherwise, update the strategy and the global strategy_sum variable
  strategy = [i/normalizing_sum for i in strategy]
  for i in range(3):
    strategy_sum[i] += strategy[i]

  return strategy


def action(strategy):
  # Using a 3 element normalized array, we select an action based
  # on the strategy using cumulative probability

  r = random.random() 
  cumulative_prob = 0 
  act = 0 # action index, updates depending on where r falls in the cumul. prob

  while act < 2:
    cumulative_prob += strategy[act]
    if r < cumulative_prob:
      break
    act += 1

  return act

def rm_average_strategy(strategy_sum):
  # This function takes the strategy sum and normalizes it

  average_strategy = [0, 0, 0]
  normalizing_sum = 0

  normalizing_sum = sum(strategy_sum)

  # If normalizing_sum is non-positive then we return an even strategy
  if normalizing_sum <= 0:
    return [1/3, 1/3, 1/3]

  average_strategy = [i/normalizing_sum for i in strategy_sum]

  return average_strategy


# TRAINING ALGORITHM (PROCEDURAL)

In [49]:
# Definitions
regret_sum = [0, 0, 0]      # current tally of regrets
strategy = [0, 0, 0]  # player 1's strategy
strategy_sum = [0, 0, 0]    # player 1's strategy sum to get avg later
opp_strategy = [.4, .3, .3] # opponent's dummy strategy

# Input n number of iterations for program
def train(n):

  win = 0
  draw = 0
  loss = 0
  for i in range(n):
    # Get actions for myself and for my opponent
    strategy1 = rm_strategy(regret_sum)
    act1 = action(strategy1)
    act2 = action(opp_strategy)


    # Get regret table for actions
    regret = regret_table(act1, act2)

    #update win, loss, and draw list
    utility = payoff[act1][act2][0]
    if utility == 1:
      win += 1
    if utility == 0:
      draw += 1
    if utility == -1:
      loss += 1

    # Update regret sum
    for i in range(3):
      regret_sum[i] += regret[i]

  return [win, draw, loss]

wins = train(700000)
print(wins, [100*i/sum(wins) for i in wins])
rm_average_strategy(strategy_sum)

[279529, 209931, 210540] [39.93271428571428, 29.990142857142857, 30.077142857142857]


[9.098783755926613e-05, 0.9998294637188209, 7.95484436198722e-05]

# TRAINING ALGORITHM (OOP)

In [82]:
class Player:

  def __init__(self, name):
    self.name = name

  def set_strategy(self, strategy):
    self.strategy = strategy

  def get_strategy(self):
    return self.strategy

  def roshambo(self):
    a = action(strategy)
    return a

  
p1 = Player("Me")
p1.set_strategy([1/3, 1/3, 1/3])
p1.roshambo()


2