In [14]:
import concurrent
import threading
import time

import numba
from hand_evaluator import rank
from typing import List
from numba import jit, njit
import numba as nb
import numpy as np
from prl.environment.Wrappers.prl_wrappers import AugmentObservationWrapper
from prl.baselines.supervised_learning.data_acquisition.environment_utils import build_cards_state_dict, \
    init_wrapped_env, make_player_cards, make_board_cards
import random


In [15]:
hand_size = 2
class MonteCarlo_HandEvaluator:

    # def mc(self, id_caller_thread, deck, hero_cards_1d, board_cards_1d, n_opponents, n_iter):
    def mc(self, deck, hero_cards_1d, board_cards_1d, n_opponents, n_iter):
        n_missing_board_cards = len(deck) - 45
        cards_to_sample = 2 * n_opponents + n_missing_board_cards

        won = 0
        lost = 0
        tied = 0

        for i in range(n_iter):
            # draw board, if not complete already
            drawn_cards_1d = random.sample(deck, cards_to_sample)
            if n_missing_board_cards == 0:
                board = board_cards_1d
            else:
                board = board_cards_1d[:-n_missing_board_cards] + drawn_cards_1d[-n_missing_board_cards:]

            # rank hero hand
            hero_hand = hero_cards_1d + board
            hero_rank = rank(*hero_hand)

            # compare hero hand to opponent hands
            player_still_winning = True
            ties = 0
            for opp in range(n_opponents):
                opp_hand = [drawn_cards_1d[hand_size * opp], drawn_cards_1d[hand_size * opp + 1]] + board
                opp_rank = rank(*opp_hand)
                if opp_rank > hero_rank:
                    player_still_winning = False
                    break
                elif opp_rank == hero_rank:
                    ties += 1

            # update won/lost/tied stats
            if not player_still_winning:
                lost += 1
            elif player_still_winning and ties < n_opponents:
                won += 1
            elif player_still_winning and ties == n_opponents:
                tied += 1
            else:
                raise ValueError("Hero can tie against at most n_opponents, not more. Aborting MC Simulation...")
        return {'won': won, 'lost': lost, 'tied': tied}

    def run_mc(self, hero_cards_1d:List[int], board_cards_1d: List[int], n_opponents, n_iter=1000000) -> dict:
        """
        Returns estimated Effective Hand Strength after running n_iter Monte Carlo rollouts.
        :param hero_cards_1d: Integer representation of heros two hand cards, where 0<= hand_card <= 51
        :param board_cards_1d: Integer representation of board cards, where 0<= board_card <= 51
        :param n_iter: Number of rollouts to run before returning the estimated EHS. Default is 1 Million.
        :param n_opponents: Number of opponents simulated in the MC rollouts.
        :return: Wins, Loses, Ties. Will be used to compute the Effective Hand Strength Pr(win), i.e. Pr(win) = HS x (1 - NPot) + (1 - HS) x PPot
        where HS is computed as in "An Experimental Approach to Online Opponent
        Modeling in Texas Hold'em Poker" http://nozdr.ru/data/media/biblio/kolxoz/Cs/CsLn/A/Advances%20in%20Artificial%20Intelligence%20-%20SBIA%202008,%2019%20conf.(LNCS5249,%20Springer,%202008)(ISBN%209783540881896)(303s)_CsLn_.pdf#page=97
        """

        # https: // github.com / kennethshackleton / SKPokerEval / blob / develop / tests / FiveEval.h
        deck = []
        for i in range(52):
            if i not in hero_cards_1d and i not in board_cards_1d:
                deck.append(i)

        return self.mc(deck, hero_cards_1d, board_cards_1d, n_opponents, n_iter)


In [19]:
n_iterations = [1000, 2000, 5000, 10000, 20000, 50000]
mc = MonteCarlo_HandEvaluator()
for it in n_iterations:
    percents_won = []
    print(f'--------- VARIANCE IN % WON FOR {it} MC ITERATIONS-------------')
    for i in range(10):
        s0 = time.time()
        result_dict = mc.run_mc([0, 1], [-1, -1, -1, -1, -1], 2, it)
        percent_won = round(result_dict['won']/it, 2)
        percents_won.append(percent_won)
        # mc.run_mc([1, 41], [-1, -1, -1, -1, -1], 2, 100000)
        print(f'Time Taken this run: {time.time() - s0} seconds')
    print(f'max: {max(percents_won)}, min: {min(percents_won)}, DIFF = {round((max(percents_won) - min(percents_won))*100, 2)}%')
    print(' ---------------------------------------------------------------')

Time Taken this run: 1.1245548725128174 seconds
max: 0.31, min: 0.3, DIFF = 1.0%
 ---------------------------------------------------------------
--------- VARIANCE IN % WON FOR 1000 MC ITERATIONS-------------
Time Taken this run: 0.02615642547607422 seconds
Time Taken this run: 0.03323817253112793 seconds
Time Taken this run: 0.02999258041381836 seconds
Time Taken this run: 0.01732182502746582 seconds
Time Taken this run: 0.016915321350097656 seconds
Time Taken this run: 0.02197122573852539 seconds
Time Taken this run: 0.01775670051574707 seconds
Time Taken this run: 0.017235279083251953 seconds
Time Taken this run: 0.018349885940551758 seconds
Time Taken this run: 0.016160011291503906 seconds
max: 0.76, min: 0.72, DIFF = 4.0%
 ---------------------------------------------------------------
--------- VARIANCE IN % WON FOR 2000 MC ITERATIONS-------------
Time Taken this run: 0.050925254821777344 seconds
Time Taken this run: 0.047036170959472656 seconds
Time Taken this run: 0.0384354591