In [11]:
from dataclasses import dataclass
from typing import Dict, List, Optional

import numpy as np
import pandas as pd
from torch import distributions as D

SEED = None
rng = np.random.RandomState(SEED)

In [51]:
Player = str

class Game:
    def __init__(self, 
                 player_hit_probs: Dict[Player, float],  # name and likelihood of hit for each player
                 seed: Optional[int] = None,  # seed for the rng for this game
                ):        
        
        self.player_hit_probs = player_hit_probs
        self.n_players = len(player_hit_probs)
        self.players = set(player_hit_probs.keys())
        
        # book keeping across multiple games
        self.archive = dict()
        self.archive["games_played"] = 0
        self.archive["games_won"] = {player: 0 for player in self.players}
        self.archive["game_stats"] = list()  # stats to store higher-res stuff from indiviual games
        
        # init rng for the game
        self._seed = seed or np.random.randint(int(1e8))
        self.rng = np.random.RandomState(self._seed)
        
        # will all be init'd in self.reset, which we won't call in init to mirror patterns in gym.Env
        self.step_number = None
        self.order: List[Player] = None  # order players go in
        self.player_shots: Dict[Player, Dict[str, int]] = None  # how many hits and misses shot by each player
        self.alive: Dict[Player, bool] = None  # is the player still alive?
        self.num_shot_at: Dict[player, int] = None
        self._curr_player_idx = None  # for keeping track of turn
        
    def record_game(self):
        self.reset()
        done = False
        while not done:
            done = self.step()
        winner = self.current_player  # game will terminate with winner as current player
        self.archive["games_played"] += 1
        self.archive["games_won"][winner] += 1
        stats = dict()
        stats["player_shots"] = self.player_shots
        stats["game_length"] = self.step_number
        stats["order"] = self.order
        self.archive["game_stats"].append(stats)
                
    @property
    def current_player(self) -> Player:
        return self.order[self._curr_player_idx]
        
    def inc_player(self) -> None:
        """move the game to the next player in order that's still alive"""
        prev_idx = self._curr_player_idx
        done = False
        while not done:
            self._curr_player_idx = (self._curr_player_idx + 1) % self.n_players  # inc
            player = self.order[self._curr_player_idx]
            if self.alive[player]:  # stop inc'ing if next player in order is still alive
                done = True
        return
        
    def reset(self):
        """reset a new game with the same players and true probs of a hit"""
        print(f"player probs for this game:")
        for player, prob in self.player_hit_probs.items():
            print(f"\t{player}: {prob:0.2f}")
        
        # set number of player hits and misses to 0
        # set alive to True for all players
        # set num times shot at to 0 for all players
        self.player_shots = dict()
        self.alive = dict()
        self.num_shot_at = dict()
        for player in self.players:
            self.player_shots[player] = dict(hits=0, misses=0)
            self.alive[player] = True
            self.num_shot_at[player] = 0
            
        # set the order
        self.order = self.rng.permutation(list(self.players))
        print(f"player order for this game: {self.order}")
        self._curr_player_idx = -1
        self.step_number = 0
        
    def step(self):
        """have the next player take their turn, update game state, and return if game over"""
        self.step_number += 1
        # move to next player
        self.inc_player()
        shooter = self.current_player
        
        # determine who to shoot at
        target = self.choose_target(shooter=shooter)
        
        # shoot and see if hit
        hit = self.rng.uniform() < self.player_hit_probs[shooter]
        
        self.num_shot_at[target] += 1
        
        if hit:
            self.player_shots[shooter]["hits"] += 1.
            self.alive[target] = False
        else:
            self.player_shots[shooter]["misses"] += 1.
            
        outcome = "hit" if hit else "missed"
        print(f"{shooter} shot at {target} with prob {self.player_hit_probs[shooter]:0.2f} and {outcome} on step {self.step_number}")
        # determine if the game is done
        done = sum(self.alive.values()) == 1
        if done:
            print(f"player {shooter} wins on turn {self.step_number}!")
        return done
        
        
    def choose_target(self, shooter: Player) -> Player:
        """determine which player a shooter will shoot at"""

        # this is one of many strategies you can put here, including
        # going all the way to trained neural networks implementing learned
        # RL policies.
        #
        # for now, we'll determine who one shoots simply as whoever's inferred
        # to have the best aim.  AKA people greedily always try to take the assumed
        # "best" player out
        # 
        # best aim here will be whoever has the highest probabiliy of hit drawn from a beta
        # distribution after 1000 samples from each beta
        
        # get beta disctibution of p(hit) for each player from their observed shots
        player_betas = {player: D.Beta(
                            concentration0=self.player_shots[player]["hits"] + 1.,
                            concentration1=self.player_shots[player]["misses"] + 1.,)
                        for player in self.players 
                        if (player != shooter and self.alive[player])}
        # sample probs from the beta distrs
        inferred_probs = {player: beta.sample((1000,)) 
                           for player, beta in player_betas.items()}
        # determine who had the highest prob most often in the 1000 samples from the betas
        df = pd.DataFrame(inferred_probs)
        maxes = df.values.argmax(axis=1)
        idxs, idx_counts = np.unique(maxes, return_counts=True)
        max_idx = idxs[idx_counts.argmax()]
        # shoot at the person who had the highest prob in the most samples
        to_shoot = df.columns[max_idx]
        
        return to_shoot

In [63]:
N = 9
SEED = None
probs = {"player_" + str(k): v for k, v in enumerate(np.linspace(0, 1, N+2)[1:-1])}

#### manually roll out a single game

In [70]:
game = Game(player_hit_probs=probs, seed=SEED)
game.reset()
done = False
while not done:
    done = game.step()

player probs for this game:
	player_0: 0.10
	player_1: 0.20
	player_2: 0.30
	player_3: 0.40
	player_4: 0.50
	player_5: 0.60
	player_6: 0.70
	player_7: 0.80
	player_8: 0.90
player order for this game: ['player_6' 'player_5' 'player_4' 'player_0' 'player_8' 'player_2'
 'player_7' 'player_3' 'player_1']
player_6 shot at player_8 with prob 0.70 and hit on step 1
player_5 shot at player_2 with prob 0.60 and hit on step 2
player_4 shot at player_3 with prob 0.50 and missed on step 3
player_0 shot at player_4 with prob 0.10 and missed on step 4
player_7 shot at player_4 with prob 0.80 and hit on step 5
player_3 shot at player_0 with prob 0.40 and missed on step 6
player_1 shot at player_0 with prob 0.20 and missed on step 7
player_6 shot at player_3 with prob 0.70 and missed on step 8
player_5 shot at player_3 with prob 0.60 and hit on step 9
player_0 shot at player_1 with prob 0.10 and missed on step 10
player_7 shot at player_0 with prob 0.80 and missed on step 11
player_1 shot at player_0 

#### run a buncha games and assess their results

In [71]:
%%capture

N_GAMES = 1000

game = Game(player_hit_probs=probs, seed=SEED)
[game.record_game() for _ in range(N_GAMES)]

In [72]:
game.archive["games_won"]

{'player_1': 6,
 'player_7': 268,
 'player_4': 73,
 'player_3': 29,
 'player_0': 2,
 'player_5': 113,
 'player_8': 337,
 'player_2': 12,
 'player_6': 160}