In [41]:
from hand_evaluator import rank
import time
from typing import List
from numba import jit, njit
import numba as nb
import numpy as np
from prl.environment.Wrappers.prl_wrappers import AugmentObservationWrapper
from prl.baselines.supervised_learning.data_acquisition.environment_utils import build_cards_state_dict, \
    init_wrapped_env, make_player_cards, make_board_cards
import random
import concurrent
hand_size = 2

In [42]:
# (!!!!) DONT READ THIS CELL, EXPERIMENTS START ONE CELL BELOW
# monte-carlo stub with random sampling and ranking inside the loop for timing purposes only
def mc_basic(hero_cards_1d, board_cards_1d, n_opponents, n_iter=1000000):
    """
    Returns estimated Effective Hand Strength after running n_iter Monte Carlo rollouts.
    :param hero_cards_1d: n * 4-byte representations of cards where n is the number of cards
    :param board_cards_1d: 5 * 4-byte representations of cards where 5 board cards may be zero-bytes
    :param n_iter: Number of rollouts to run before returning the estimated EHS. Default is 1 Million.
    :param n_opponents: Number of opponents simulated in the MC rollouts.
    :return: The Effective Hand Strength Pr(win), i.e. Pr(win) = HS x (1 - NPot) + (1 - HS) x PPot
    where HS is computed as in [LINK HAND STRENGTH]
    """

    # https: // github.com / kennethshackleton / SKPokerEval / blob / develop / tests / FiveEval.h
    deck = []
    for i in range(52):
        if i not in hero_cards_1d and i not in board_cards_1d:
            deck.append(i)

    n_missing_board_cards = len(deck) - 45
    cards_to_sample = 2 * n_opponents + n_missing_board_cards

    for i in range(n_iter):
        drawn_cards_1d = random.sample(deck, cards_to_sample)
        if n_missing_board_cards == 0:
            board = board_cards_1d
        else:
            board = board_cards_1d[:-n_missing_board_cards] + drawn_cards_1d[-n_missing_board_cards:]
        hero_hand = hero_cards_1d + board
        hero_rank = rank(*hero_hand)
        for opp in range(n_opponents):
            opp_hand = [drawn_cards_1d[hand_size * opp], drawn_cards_1d[hand_size * opp + 1]] + board
            #rank(*opp_hand)
    # we left out updating won, lost and tied games for timing

# monte-carlo function with random sampling moved out of the loop and ranking inside loop
def mc_no_sampling(hero_cards_1d, board_cards_1d, n_opponents, n_iter=1000000):
    """
    Returns estimated Effective Hand Strength after running n_iter Monte Carlo rollouts.
    :param hero_cards_1d: n * 4-byte representations of cards where n is the number of cards
    :param board_cards_1d: 5 * 4-byte representations of cards where 5 board cards may be zero-bytes
    :param n_iter: Number of rollouts to run before returning the estimated EHS. Default is 1 Million.
    :param n_opponents: Number of opponents simulated in the MC rollouts.
    :return: The Effective Hand Strength Pr(win), i.e. Pr(win) = HS x (1 - NPot) + (1 - HS) x PPot
    where HS is computed as in [LINK HAND STRENGTH]
    """

    # https: // github.com / kennethshackleton / SKPokerEval / blob / develop / tests / FiveEval.h
    deck = []
    for i in range(52):
        if i not in hero_cards_1d and i not in board_cards_1d:
            deck.append(i)

    n_missing_board_cards = len(deck) - 45
    cards_to_sample = 2 * n_opponents + n_missing_board_cards
    # sampling must go inside loop below, moved out only to measure time savings
    drawn_cards_1d = random.sample(deck, cards_to_sample)
    for i in range(n_iter):

        if n_missing_board_cards == 0:
            board = board_cards_1d
        else:
            board = board_cards_1d[:-n_missing_board_cards] + drawn_cards_1d[-n_missing_board_cards:]
        hero_hand = hero_cards_1d + board
        hero_rank = rank(*hero_hand)
        for opp in range(n_opponents):
            opp_hand = [drawn_cards_1d[hand_size * opp], drawn_cards_1d[hand_size * opp + 1]] + board
            #rank(*opp_hand)
    # we left out updating won, lost and tied games for timing

# monte-carlo stub with random sampling and ranking inside the loop for timing purposes only
def mc_no_ranking(hero_cards_1d, board_cards_1d, n_opponents, n_iter=100000):
    """
    Returns estimated Effective Hand Strength after running n_iter Monte Carlo rollouts.
    :param hero_cards_1d: n * 4-byte representations of cards where n is the number of cards
    :param board_cards_1d: 5 * 4-byte representations of cards where 5 board cards may be zero-bytes
    :param n_iter: Number of rollouts to run before returning the estimated EHS. Default is 1 Million.
    :param n_opponents: Number of opponents simulated in the MC rollouts.
    :return: The Effective Hand Strength Pr(win), i.e. Pr(win) = HS x (1 - NPot) + (1 - HS) x PPot
    where HS is computed as in [LINK HAND STRENGTH]
    """

    # https: // github.com / kennethshackleton / SKPokerEval / blob / develop / tests / FiveEval.h
    deck = []
    for i in range(52):
        if i not in hero_cards_1d and i not in board_cards_1d:
            deck.append(i)

    n_missing_board_cards = len(deck) - 45
    cards_to_sample = 2 * n_opponents + n_missing_board_cards

    for i in range(n_iter):
        drawn_cards_1d = random.sample(deck, cards_to_sample)
        if n_missing_board_cards == 0:
            board = board_cards_1d
        else:
            board = board_cards_1d[:-n_missing_board_cards] + drawn_cards_1d[-n_missing_board_cards:]
        hero_hand = hero_cards_1d + board
        #hero_rank = rank(*hero_hand)
        for opp in range(n_opponents):
            opp_hand = [drawn_cards_1d[hand_size * opp], drawn_cards_1d[hand_size * opp + 1]] + board
            #rank(*opp_hand)
    # we left out updating won, lost and tied games for timing

In [43]:
iterations = int(1e5)
opponents = 2
print('------------------')
print('Running mc with sampling and ranking inside loop')
print('------------------')
for i in range(10):
    s0 = time.time()
    mc_basic([1, 41], [18, 19, 16, 20, 24], opponents, iterations)
    print(time.time() - s0)

------------------
Running mc with sampling and ranking inside loop
------------------
1.2741777896881104
1.157618761062622
1.0550930500030518
0.8599765300750732
0.854851484298706
0.8657231330871582
0.862964391708374
0.8841896057128906
0.8415713310241699
0.8669259548187256


In [44]:
iterations = int(1e5)
opponents = 2
print('------------------')
print('Running mc with sampling, but NO RANKING inside loop')
print('------------------')
for i in range(10):
    s0 = time.time()
    mc_no_ranking([1, 41], [18, 19, 16, 20, 24], opponents, iterations)
    print(time.time() - s0)


------------------
Running mc with sampling, but NO RANKING inside loop
------------------
0.5497863292694092
0.8011584281921387
0.7010242938995361
0.5081372261047363
0.5740370750427246
0.5640580654144287
0.39632630348205566
0.6205101013183594
0.42229604721069336
0.4658069610595703


In [45]:
iterations = int(1e5)
opponents = 2
print('------------------')
print('Running mc WITHOUT sampling in loop and ranking inside loop')
print('------------------')
for i in range(10):
    s0 = time.time()
    mc_no_sampling([1, 41], [18, 19, 16, 20, 24], opponents, iterations)
    print(time.time() - s0)

------------------
Running mc WITHOUT sampling in loop and ranking inside loop
------------------
0.4269142150878906
0.4056665897369385
0.3944544792175293
0.39946627616882324
0.3929262161254883
0.4023599624633789
0.4066944122314453
0.40704965591430664
0.3846781253814697
0.4032175540924072


In [46]:
# via ThreadPoolExecutor
class HandEvaluator_MonteCarlo:

    def mc(self, deck, hero_cards_1d, board_cards_1d, n_opponents, n_iter):
        n_missing_board_cards = len(deck) - 45
        cards_to_sample = 2 * n_opponents + n_missing_board_cards

        won = 0
        lost = 0
        tied = 0

        for i in range(n_iter):
            # draw board, if not complete already
            drawn_cards_1d = random.sample(deck, cards_to_sample)
            if n_missing_board_cards == 0:
                board = board_cards_1d
            else:
                board = board_cards_1d[:-n_missing_board_cards] + drawn_cards_1d[-n_missing_board_cards:]

            # rank hero hand
            hero_hand = hero_cards_1d + board
            hero_rank = rank(*hero_hand)

            # compare hero hand to opponent hands
            player_still_winning = True
            ties = 0
            for opp in range(n_opponents):
                opp_hand = [drawn_cards_1d[hand_size * opp], drawn_cards_1d[hand_size * opp + 1]] + board
                opp_rank = rank(*opp_hand)
                if opp_rank > hero_rank:
                    player_still_winning = False
                    break
                elif opp_rank == hero_rank:
                    ties += 1

            # update won/lost/tied stats
            if not player_still_winning:
                lost += 1
            elif player_still_winning and ties < n_opponents:
                won += 1
            elif player_still_winning and ties == n_opponents:
                tied += 1
            else:
                raise ValueError("Hero can tie against at most n_opponents, not more. Aborting MC Simulation...")
        return {'won': won, 'lost': lost, 'tied': tied}

    def run_mc(self, hero_cards_1d, board_cards_1d, n_opponents, n_iter=1000000):
        """
        Returns estimated Effective Hand Strength after running n_iter Monte Carlo rollouts.
        :param hero_cards_1d: n * 4-byte representations of cards where n is the number of cards
        :param board_cards_1d: 5 * 4-byte representations of cards where 5 board cards may be zero-bytes
        :param n_iter: Number of rollouts to run before returning the estimated EHS. Default is 1 Million.
        :param n_opponents: Number of opponents simulated in the MC rollouts.
        :return: The Effective Hand Strength Pr(win), i.e. Pr(win) = HS x (1 - NPot) + (1 - HS) x PPot
        where HS is computed as in [LINK HAND STRENGTH]
        """

        # https: // github.com / kennethshackleton / SKPokerEval / blob / develop / tests / FiveEval.h
        deck = []
        for i in range(52):
            if i not in hero_cards_1d and i not in board_cards_1d:
                deck.append(i)
        n_threads = 5
        n_iter = int(n_iter/n_threads)
        decks = [deck.copy() for _ in range(n_threads)]
        # args = (deck.copy(), hero_cards_1d, board_cards_1d, n_opponents, n_iter)
        #print(self.mc(*(deck.copy(), hero_cards_1d, board_cards_1d, n_opponents, n_iter)))
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = [executor.submit(self.mc, *(deck.copy(), hero_cards_1d, board_cards_1d, n_opponents, n_iter)) for
                       deck in decks]
            [print(f.result()) for f in futures]


In [47]:
mc = HandEvaluator_MonteCarlo()
for i in range(10):
    s0 = time.time()
    # todo check with equilab how many iterations we need for 3% accuracy delta
    mc.run_mc([1, 41], [18, 19, 16, 20, 24], 2, 100000)
    # mc.run_mc([1, 41], [-1, -1, -1, -1, -1], 2, 100000)
    print(time.time() - s0)

{'won': 0, 'lost': 0, 'tied': 20000}
{'won': 0, 'lost': 0, 'tied': 20000}
{'won': 0, 'lost': 0, 'tied': 20000}
{'won': 0, 'lost': 0, 'tied': 20000}
{'won': 0, 'lost': 0, 'tied': 20000}
2.9530019760131836
{'won': 0, 'lost': 0, 'tied': 20000}
{'won': 0, 'lost': 0, 'tied': 20000}
{'won': 0, 'lost': 0, 'tied': 20000}
{'won': 0, 'lost': 0, 'tied': 20000}
{'won': 0, 'lost': 0, 'tied': 20000}
2.959632158279419
{'won': 0, 'lost': 0, 'tied': 20000}
{'won': 0, 'lost': 0, 'tied': 20000}
{'won': 0, 'lost': 0, 'tied': 20000}
{'won': 0, 'lost': 0, 'tied': 20000}
{'won': 0, 'lost': 0, 'tied': 20000}
2.821007490158081
{'won': 0, 'lost': 0, 'tied': 20000}
{'won': 0, 'lost': 0, 'tied': 20000}
{'won': 0, 'lost': 0, 'tied': 20000}
{'won': 0, 'lost': 0, 'tied': 20000}
{'won': 0, 'lost': 0, 'tied': 20000}
2.9678897857666016
{'won': 0, 'lost': 0, 'tied': 20000}
{'won': 0, 'lost': 0, 'tied': 20000}
{'won': 0, 'lost': 0, 'tied': 20000}
{'won': 0, 'lost': 0, 'tied': 20000}
{'won': 0, 'lost': 0, 'tied': 20000}
1

In [51]:
import threading
# BASIC THREADS NO POOL
class HandEvaluator_MonteCarlo:

    def __init__(self, n_threads):
        self._n_threads = n_threads
        self._result_dicts = [dict() for _ in range(self._n_threads)]
        self._threads = []

    def mc(self, id_caller_thread, deck, hero_cards_1d, board_cards_1d, n_opponents, n_iter):
        n_missing_board_cards = len(deck) - 45
        cards_to_sample = 2 * n_opponents + n_missing_board_cards

        won = 0
        lost = 0
        tied = 0

        for i in range(n_iter):
            # draw board, if not complete already
            drawn_cards_1d = random.sample(deck, cards_to_sample)
            if n_missing_board_cards == 0:
                board = board_cards_1d
            else:
                board = board_cards_1d[:-n_missing_board_cards] + drawn_cards_1d[-n_missing_board_cards:]

            # rank hero hand
            hero_hand = hero_cards_1d + board
            hero_rank = rank(*hero_hand)

            # compare hero hand to opponent hands
            player_still_winning = True
            ties = 0
            for opp in range(n_opponents):
                opp_hand = [drawn_cards_1d[hand_size * opp], drawn_cards_1d[hand_size * opp + 1]] + board
                opp_rank = rank(*opp_hand)
                if opp_rank > hero_rank:
                    player_still_winning = False
                    break
                elif opp_rank == hero_rank:
                    ties += 1

            # update won/lost/tied stats
            if not player_still_winning:
                lost += 1
            elif player_still_winning and ties < n_opponents:
                won += 1
            elif player_still_winning and ties == n_opponents:
                tied += 1
            else:
                raise ValueError("Hero can tie against at most n_opponents, not more. Aborting MC Simulation...")
        return {'won': won, 'lost': lost, 'tied': tied}

    def run_mc(self, hero_cards_1d, board_cards_1d, n_opponents, n_iter=1000000):
        """
        Returns estimated Effective Hand Strength after running n_iter Monte Carlo rollouts.
        :param hero_cards_1d: n * 4-byte representations of cards where n is the number of cards
        :param board_cards_1d: 5 * 4-byte representations of cards where 5 board cards may be zero-bytes
        :param n_iter: Number of rollouts to run before returning the estimated EHS. Default is 1 Million.
        :param n_opponents: Number of opponents simulated in the MC rollouts.
        :return: The Effective Hand Strength Pr(win), i.e. Pr(win) = HS x (1 - NPot) + (1 - HS) x PPot
        where HS is computed as in [LINK HAND STRENGTH]
        """

        # https: // github.com / kennethshackleton / SKPokerEval / blob / develop / tests / FiveEval.h
        deck = []
        for i in range(52):
            if i not in hero_cards_1d and i not in board_cards_1d:
                deck.append(i)

        n_iter = int(n_iter / self._n_threads)
        # decks = [deck.copy() for _ in range(self._n_threads)]
        # args = (deck.copy(), hero_cards_1d, board_cards_1d, n_opponents, n_iter)
        # print(self.mc(*(deck.copy(), hero_cards_1d, board_cards_1d, n_opponents, n_iter)))

        for i in range(self._n_threads):
            args = (i, deck.copy(), hero_cards_1d, board_cards_1d, n_opponents, n_iter)
            thread = threading.Thread(target=self.mc, args=args)
            self._threads.append(thread)
            thread.start()
        for thread in self._threads:
            thread.join()
        # print(self._result_dicts)
        # with concurrent.futures.ThreadPoolExecutor() as executor:
        #     futures = [executor.submit(self.mc, *(deck.copy(), hero_cards_1d, board_cards_1d, n_opponents, n_iter)) for
        #                deck in decks]
        #     [print(f.result()) for f in futures]


In [52]:
mc = HandEvaluator_MonteCarlo(5)
for i in range(10):
    s0 = time.time()
    # todo check with equilab how many iterations we need for 3% accuracy delta
    mc.run_mc([1, 41], [18, 19, 16, 20, 24], 2, 100000)
    # mc.run_mc([1, 41], [-1, -1, -1, -1, -1], 2, 100000)
    print(time.time() - s0)

3.3409080505371094
2.085841655731201
1.7874231338500977
2.777965784072876
1.6215879917144775
1.664315938949585
1.429774522781372
1.714292049407959
2.5805842876434326
3.465304136276245
