In [16]:
import numpy as np

Setting up game state

In [1]:
#cards
max_players = 10
num_players = 2
community_pool = 1 #might consider community cards as "dealer" along with pot
suits = 4
card_ranks = 13

#rounds
max_betting_rounds = 4 #pre-flop, flop, turn, river
max_raises = 4

#betting - won't see this in ACPC server text. Supposed to be known by program
small_blind = 50
big_blind = 100
min_raise = 100 #big blind or minimum of previous raise/bet in same round

stack_sizes = 20000 #could be a list for each player

### Construction of game state

Possibilities: one funnel for cards: [suit x rank x (players + community + dealer) x round] 
and one for actions / positions [players x rounds x max raises]
...or throw it all in one? (players + community + dealer) x (suit x rank) x rounds x max raises. a 5D tensor.

another option: Players (and dealer) x cards x rounds x action

In [2]:
size = 3 * 4 * 13 * 4 * 4 #players x suits x rank x betting rounds x max number of raises 
print(size)

2496


What is a "next" state in poker for Q learning? Is it the next time an action opportunity occurs for the agent? I think yes
but it could also mean next state of the game, not necessarily agent to act. Need to confirm, but I think next-action opportunity makes sense, because otherwise how would you know the maximum best next move from the target model?

In [3]:
#you need to know the "agent" i.e. the hero
hero = 0 #this is for one of the states, can also get the states for players up to num_players
#could assign hero via position, or assign position via hero (agent)... I think I prefer assign position to agent.


#### Parsing Logs

In [4]:
import os
os.listdir()

TypeError: listdir() takes exactly 1 argument (0 given)

In [5]:
logsPath = '2017 ACPC logs example/'
fn = 'Intermission.PokerCNN.1.0.log'

f = open(logsPath + fn, "r") #
lines = f.readlines()
f.close()
lines = [line.strip() for line in lines]

print(lines[0:10])

['# name/game/hands/seed Intermission.PokerCNN.1.0 project_acpc_server/holdem.nolimit.2p.reverse_blinds.game 3000 3937880454', '#--t_response 600000', '#--t_hand 600000', '#--t_per_hand 7200', 'STATE:0:r200c/cr438f:6d2d|3s4d/KhJhTh:-200|200:Intermission_2pn_2017|PokerCNN_2pn_2017', 'STATE:1:r223c/cr383c/r1516f:Jd5c|Js8h/Qd6hQh/3c:383|-383:PokerCNN_2pn_2017|Intermission_2pn_2017', 'STATE:2:cr400f:Jc7c|9c5d:100|-100:Intermission_2pn_2017|PokerCNN_2pn_2017', 'STATE:3:r223f:Kc3d|Ah3h:-100|100:PokerCNN_2pn_2017|Intermission_2pn_2017', 'STATE:4:r261r908c/cr1689r4568f:KdJd|7sAs/JcTs7c:1689|-1689:Intermission_2pn_2017|PokerCNN_2pn_2017', 'STATE:5:r223c/cc/cc/cc:6h6s|As9d/Ks7h4c/6d/8s:223|-223:PokerCNN_2pn_2017|Intermission_2pn_2017']


In [6]:
print(lines[5])

STATE:1:r223c/cr383c/r1516f:Jd5c|Js8h/Qd6hQh/3c:383|-383:PokerCNN_2pn_2017|Intermission_2pn_2017


In [42]:
test_list = ['a',1, 5]
test_list.append(1)
test_list

['a', 1, 5, 1]

In [48]:
test = []
test != [] and test[-1]

False

In [64]:
def parse_betting_round(action_string):
    # 3 examples in a list: ['r223c', 'cr383c', 'r1516f']
    round_actions = []
    bet_size = ''
    for char in action_string:
        if char.isdigit():
            bet_size += char
        else:
            if round_actions != [] and round_actions[-1] == 'r':
                round_actions.append(bet_size)
                bet_size = ''
            if char == 'c':
                round_actions.append('c')
            elif char == 'r':
                round_actions.append('r')
            elif char == 'f':
                round_actions.append('f')
            else:
                raise ValueError("invalid action input")

    return round_actions

In [67]:
parse_betting_round('r20cr1516f')

['r', '20', 'c', 'r', '1516', 'f']

In [76]:
def bet_lists(br_list):
    act = []
    size_to = 0
    round_actions = []
    for a in br_list:
        if a == 'c':
            act = ['c', size_to]
            round_actions.append(act)
            act = []
        elif a == 'f':
            round_actions.append(['f', 0])
        elif a == 'r':
            act = ['r']
        elif a.isdigit():
            size_to = int(a)
            act.append(size_to)
            round_actions.append(act)
            act = []
    return round_actions

In [77]:
bet_lists(parse_betting_round('r20cr1516f')) 

[['r', 20], ['c', 20], ['r', 1516], ['f', 0]]

In [121]:
suits = {'s':0 , 'h': 1, 'd': 2, 'c': 3}
cardRanks =\
{"2": 0,
"3": 1,
"4": 2,
"5":3,
"6":4,
"A":5,
"7":6,
"8":7,
"9":8,
"10":9,
"J":10,
"Q":11,
"K":12,
"A": 13}

def split_cards(card_str):
    #example is 'Jd5c|Js8h/Qd6hQh/3c'
    s = card_str.split('|')
    s = [s[0]]+  s[1].split('/')
    return s

def card_to_nums(card):
    # returns rank, suit
    return [cardRanks[card[0]], suits[card[1]]]

def split_by_card(card_glob):
    result = []
    for i in range(0, len(card_glob), 2):
        result.append(card_glob[i:i+2])
    return result

def parse_cards(card_str):
    #splits cards and returns cards by [round, player, suit, rank] where for now community player = -1
    cards = []
    card_list = split_cards(card_str)
    temp = [card_to_nums(x) for x in split_by_card(card_list[0])]
    cards.append([0,0] + temp[0])
    cards.append([0,0] + temp[1])
    temp = [card_to_nums(x) for x in split_by_card(card_list[1])]
    cards.append([0,1] + temp[0])
    cards.append([0,1] + temp[1])
    
    if len(card_list) > 2:
        for i in range(2,len(card_list)):
            temp = [card_to_nums(x) for x in split_by_card(card_list[i])]
            [cards.append([i-1,-1] + x) for x in temp]
    return cards

print(split_cards('Jd5c|Js8h/Qd6hQh/3c'))
print(card_to_nums('Jd'))
print(split_by_card("Qd6hQh"))
print(parse_cards('Jd5c|Js8h/Qd6hQh/3c'))

['Jd5c', 'Js8h', 'Qd6hQh', '3c']
[10, 2]
['Qd', '6h', 'Qh']
[[0, 0, 10, 2], [0, 0, 3, 3], [0, 1, 10, 0], [0, 1, 7, 1], [1, -1, 11, 2], [1, -1, 4, 1], [1, -1, 11, 1], [2, -1, 1, 3]]


In [128]:
def parse_handLog_line(line):
    '''
    example "STATE:1:r223c/cr383c/r1516f:Jd5c|Js8h/Qd6hQh/3c:383|-383:PokerCNN_2pn_2017|Intermission_2pn_2017"
    see: http://www.computerpokercompetition.org/downloads/documents/protocols/protocol.pdf
    '''
    #line = line.strip() #should already be done in pre-processing
    
    player_pos = [] #player name and player position
    actions = [] #action = [round, player_position, betting round, ante-flag]
    cards = [] # cards = [round, player_position, rank, suit]
    result = [] # result, to be used to check against function that calcs rewards - TODO:DELETE ONCE DEBUGGED
    
    parts = line.split(':')
    #print(parts) 
    #['STATE', '1', 'r223c/cr383c/r1516f', 'Jd5c|Js8h/Qd6hQh/3c', '383|-383', 'PokerCNN_2pn_2017|Intermission_2pn_2017\n']
    
    #player position, earlier in list means first to act
    player_pos = parts[-1].split('|') #['PokerCNN_2pn_2017', 'Intermission_2pn_2017']
    result = parts[-2].split('|') # ['383','-383']
    result = [int(i) for i in result]
    
    cards = parse_cards(parts[3])
    
    actions_by_round = parts[2].split('/') #['r223c', 'cr383c', 'r1516f']
    #action = [bet round, player_position, raising round, action, size_to, ante-flag]

    #note that in first round for heads up, the positions reverse. 
    #essentially, antes are like a forced betting round: cr50r100, then go.
    #Add a few betting rounds for antes?
    ante_action1 = [0, 0, 0,'c',0, 1] # start hand
    ante_action2 = [0, 1, 0,'r',50, 1] #small blind
    ante_action3 = [0, 1, 1,'r', 100, 1] #big blind
    
    actions = [ante_action1, ante_action2, ante_action3]
    no_players = len(player_pos)

    #for each action
    rd = 0
    acting_player = 1
    size_to = 100
    bet_rd = 0 
    
    for betting_round in actions_by_round:
        if bet_rd == 0: raise_rd = 1 
        else: raise_rd = 0  # which turn of betting it is (4 max)
        
        round_actions = [] #to store actions each round
        act = []
        
        betL = bet_lists(parse_betting_round(betting_round))
        
        for bet in betL:
            act = [bet_rd, acting_player, raise_rd, bet[0], bet[1], 0]
            if acting_player == no_players - 1:
                raise_rd += 1
                acting_player = 0
            else: acting_player += 1
            round_actions.append(act)
        
        [actions.append(i) for i in round_actions]
        bet_rd +=1
        acting_player = 0
        
    return player_pos, actions, cards, result 

In [129]:
lines[5]

'STATE:1:r223c/cr383c/r1516f:Jd5c|Js8h/Qd6hQh/3c:383|-383:PokerCNN_2pn_2017|Intermission_2pn_2017'

In [130]:
parse_handLog_line(lines[5])
# first_action_other_than_antes = [0, 1, 1,'r', 223, 1] #how do I get this?

(['PokerCNN_2pn_2017', 'Intermission_2pn_2017'],
 [[0, 0, 0, 'c', 0, 1],
  [0, 1, 0, 'r', 50, 1],
  [0, 1, 1, 'r', 100, 1],
  [0, 1, 1, 'r', 223, 0],
  [0, 0, 2, 'c', 223, 0],
  [1, 0, 0, 'c', 0, 0],
  [1, 1, 0, 'r', 383, 0],
  [1, 0, 1, 'c', 383, 0],
  [2, 0, 0, 'r', 1516, 0],
  [2, 1, 0, 'f', 0, 0]],
 [[0, 0, 10, 2],
  [0, 0, 3, 3],
  [0, 1, 10, 0],
  [0, 1, 7, 1],
  [1, -1, 11, 2],
  [1, -1, 4, 1],
  [1, -1, 11, 1],
  [2, -1, 1, 3]],
 [383, -383])