In [4]:
import pokerconst as pc
import pokerstrat
from operator import attrgetter
import random
import pokerhands
from sklearn.utils import shuffle
import pdb
from poker_main import *

In [2]:
from utils import *
from strategies import strategy_limper
from time import time

# whether to print what happens in each episode
verbose = False

# instantiate game
deck = Deck()
players = [Player(0, strategy_limper, 1000, verbose=verbose, name='SB'),
           Player(1, strategy_limper, 1000, verbose=verbose, name='DH')]
board = []
dealer = set_dealer(players)
MEMORY = []  # a list of dicts with keys s,a,r,s'
new_game = True
episodes = 0
games = {'n': 0, '#episodes': []}
t0 = time()

while True:
    # at the beginning of a whole new game (one of the player lost or it is the first), all start with the same amounts of money again
    if new_game:
        games['n'] += 1
        games['#episodes'].append(episodes)
        episodes = 0
        if verbose:
            print('####################'
                  'New game (%s) starts.\n'
                  'Players get cash\n'
                  'Last game lasted %.1f\n'
                  'Memory contains %s transitions\n'
                  '####################' % (str(games), time() - t0, str(len(MEMORY))))
            t0 = time()
        print(len(MEMORY), time()-t0)
        players[0].cash(1000)
        players[1].cash(1000)

    # put blinds
    pot = blinds(players, verbose=verbose)

    # shuffle decks are clear board
    deck.populate()
    deck.shuffle()
    board = []

    # keep track of actions of each player for this episode
    actions = {b_round: {player: [] for player in range(2)} for b_round in range(4)}

    # dramatic events monitoring
    fold_occured = False
    all_in = 0  # 0, 1 or 2. If 2, the one of the player is all-in and the other is either all-in or called. In that case, things should be treated differently

    # betting rounds
    for b_round in range(4):
        # differentiate the case where players are all-in from the one where none of them is
        if all_in != 2:
            # deal cards
            deal(deck, players, board, b_round, verbose=verbose)
            agreed = False

            # play
            if b_round != 0:
                to_play = 1 - dealer
            else:
                to_play = dealer

            while not agreed:
                player = players[to_play]
                action = player.play(board, pot, actions, b_round, players[1 - to_play].stack)

                ##### RL #####
                # Store transitions in memory. Just for the current player
                if player.id == 0:
                    state_ = [cards_to_array(player.cards), cards_to_array(board), pot, player.stack, players[1].stack,
                              np.array(BLINDS), dealer, actions_to_array(actions)]
                    action_ = action_to_array(action)
                    reward_ = -action.value
                    transition = {'s': state_, 'a': action_, 'r': reward_}
                    if len(MEMORY) > 0 and not new_game:  # don't take into account transitions overlapping two different games
                        MEMORY[-1]["s'"] = state_
                    MEMORY.append(transition)
                ##############

                pot += action.value
                actions[b_round][player.id].append(action)
                if action.type == 'all in':
                    all_in += 1
                if (action.type == 'call' or action.type == 'bet') and (all_in == 1):
                    all_in += 1

                # break if fold
                if action.type == 'fold':
                    fold_occured = True
                    winner = 1 - to_play
                    if verbose:
                        print(players[winner].name + ' wins because its opponent folded')
                    break

                # decide if it is the end of the betting round
                agreed = agreement(actions, b_round)
                to_play = 1 - to_play

            # potentially stop the episode
            if fold_occured:
                break
        else:
            # deal all remaining cards
            for j in range(b_round, 4):
                deal(deck, players, board, j, verbose=verbose)
            agreed = True
            state_ = [cards_to_array(players[0].cards), cards_to_array(board), pot, players[0].stack, players[1].stack,
                      np.array(BLINDS), dealer, actions_to_array(actions)]
            # keep track of new state
            MEMORY[-1]["s'"] = state_

            # end the episode
            break

    # winner gets money and variables are updated
    split = False
    if not fold_occured:
        hand_1 = evaluate_hand(players[1].cards+board)
        hand_0 = evaluate_hand(players[0].cards+board)
        if hand_1[1] == hand_0[1]:
            split = True
        else:
            winner = int(hand_1[1] > hand_0[1])
        if verbose:
            if not split:
                print(players[0].name + ' cards : ' + str(players[0].cards) + ' and score: ' + str(hand_0[0]))
                print(players[1].name + ' cards : ' + str(players[1].cards) + ' and score: ' + str(hand_1[0]))
                print(players[winner].name + ' wins')
            else:
                print(players[0].name + ' cards : ' + str(players[0].cards) + ' and score: ' + str(hand_0[0]))
                print(players[1].name + ' cards : ' + str(players[1].cards) + ' and score: ' + str(hand_1[0]))
                print('Pot split')
    if not split:
        players[winner].stack += pot
        ##### RL #####
        # If the agent won, gives it the chips
        if winner == 0:
            MEMORY[-1]['r'] += pot
        ##############
    else:
        pot_0, pot_1 = split_pot(actions, dealer)
        players[0].stack += pot_0
        players[1].stack += pot_1
        ##### RL #####
        MEMORY[-1]['r'] += pot_0
        ##############

    pot = 0
    dealer = 1 - dealer
    players[dealer].is_dealer = True
    players[1 - dealer].is_dealer = False
    players[0].cards = []
    players[1].cards = []

    # is the game finished ?
    if players[1-winner].stack == 0:
        new_game = True
    else:
        new_game = False

    episodes += 1

    # @todo: train Q network here


0 0.0
417 0.035126686096191406
611 0.05514883995056152
1607 0.13335657119750977
2370 0.19351482391357422
2547 0.20755219459533691
2667 0.22058868408203125
2841 0.24168133735656738
3807 0.3158402442932129
6693 0.5454528331756592
6783 0.5544750690460205
7145 0.5845551490783691
8242 0.7088868618011475
8571 0.7449829578399658
8885 0.7941150665283203
9233 0.8522679805755615
10256 1.016704797744751
10984 1.0848877429962158
11130 1.0979585647583008
12359 1.2132282257080078
13056 1.2894320487976074
13168 1.29945707321167
13464 1.3265292644500732
14227 1.3876934051513672
15019 1.4538679122924805
15618 1.510019302368164
16463 1.5922374725341797
16609 1.6083154678344727
17149 1.6544032096862793
17615 1.6935069561004639
17892 1.7205793857574463
19032 1.8178365230560303
19517 1.8579444885253906
20153 1.908076524734497
20359 1.9281301498413086
20838 1.974290370941162
21469 2.024385929107666
21809 2.053464412689209
22156 2.082540512084961
22317 2.096618175506592
22499 2.1126224994659424
24458 2.30216

177241 15.788053750991821
177910 15.845157861709595
179212 15.946425437927246
179771 15.994554281234741
179908 16.009607076644897
180281 16.043684244155884
180672 16.074766635894775
180900 16.094820261001587
181361 16.130916118621826
181647 16.154980182647705
182408 16.220155954360962
182882 16.263269662857056
183418 16.30638289451599
183561 16.319417715072632
184101 16.36152982711792
184216 16.371556520462036
184398 16.387599229812622
184497 16.396623134613037
184835 16.423730850219727
185509 16.488906145095825
185684 16.504911184310913
186334 16.556047201156616
186660 16.584158182144165
188107 16.700432777404785
189042 16.782650232315063
189981 16.856883764266968
190299 16.883919715881348
190689 16.922027826309204
191171 16.964170694351196
192113 17.039334535598755
192693 17.086458444595337
193493 17.161664724349976
194013 17.204811096191406
194315 17.229870557785034
194553 17.249893188476562
194845 17.27295470237732
195042 17.29003620147705
195126 17.29902410507202
195531 17.3321120

362292 32.10049104690552
362598 32.131572008132935
363851 32.23187041282654
364134 32.254900217056274
364232 32.263922929763794
364812 32.31104803085327
366580 32.457438945770264
366903 32.48555016517639
367311 32.52160835266113
368043 32.58280658721924
369047 32.67004132270813
369409 32.70810580253601
369522 32.719149589538574
369951 32.75523114204407
370227 32.77829074859619
370638 32.811378717422485
371008 32.84146070480347
371568 32.90563178062439
371694 32.92468214035034
371774 32.935710191726685
372293 32.9888870716095
372524 33.012916803359985
372679 33.0299973487854
373122 33.07508206367493
373228 33.08711338043213
373869 33.1693320274353
374552 33.239558935165405
374964 33.28564190864563
375647 33.35181713104248
376110 33.4109742641449
376389 33.444101095199585
376711 33.48319697380066
377351 33.54533243179321
377536 33.56338143348694
377707 33.583433866500854
378205 33.6395845413208
378443 33.6716685295105
378862 33.71980404853821
379525 33.788981676101685
379686 33.807028055

KeyboardInterrupt: 

In [7]:
players[0]

SB
1890
9c 3s

In [8]:
players[1]

DH
D
0
10d 7c