In [2]:
# See [https://www.kaggle.com/code/camillahorne/poker-data-extraction]

# Generate input (x) and output (Y) data from the Poker Hold'Em Games dataset
# For training and validating a machine learning model

The game state will be defined by the following parameters:

Game Data
- Players
- Pot
- Button

Player Data
- Money
- Bet
- Hand
    - Card 1
    - Card 2

Opponent Data x8
- Money
- Action (Fold | Call | Raise | All-In)
- Bet
- Hand
    - Card 1
    - Card 2

In [3]:
import pandas as pd
import os
import re
from datetime import datetime
import json
import glob

In [11]:
def GetGamesData():
    result = []
    
    for dirname, _, filenames in os.walk('*/Data/'):
        for filename in filenames:
            file = open(os.path.join(dirname, filename), 'r')
            content = file.read()

            result += re.findall(r'Game started at: (.+)\nGame ID: (\d+).+\(Hold\'em\)\nSeat (\d+) is the button\n((?:Seat .+\n)+)([^$]+?)------ Summary ------\n(Pot: .+?)\n(?:(Board: .+?)\n)?([^$]+?)Game ended at: (.+)', content)

            file.close()
    
    print ("Total of", len(result), "games")
    
    return (result)

games = GetGamesData()

Total of 52156 games


In [14]:
datetimeFormat = '%Y/%m/%d %H:%M:%S'
seatRe = re.compile(r"Seat (\d+): (.+) \((\d+(?:\.\d+)?)\)")
betRe = re.compile(r"Pot: (\d+(?:\.\d+)?).+Rake (\d+(?:\.\d+)?)(?:.+JP fee (\d+(?:\.\d+)?))?")
boardRe = re.compile(r"\[([^]]*)\]")
playerNameRe = re.compile(r"Player ([^ ]+)")
actionRe = re.compile(r"Player .+ ([^ ]+)s? \((\d+(?:\.\d+)?)\)")
endingNumberRe = re.compile(r".+\((\d+(?:\.\d+)?)\)")
cardsRe = re.compile(r".*\[([^\]]+)\]")
winningRe = re.compile(r"Player .+?(?: \[(.+)\])?\)?\. ?Bets: (\d+(?:\.\d+)?)\. Collects: (\d+(?:\.\d+)?)\.(?: Wins: (\d+(?:\.\d+)?)\.)?(?: Loses: (\d+(?:\.\d+)?)\.)?")

In [15]:
def GetPlayerName(line):
    match = playerNameRe.search(line)
    
    if match is None:
        return ""
    
    return match.group(1)

def GetDuration(startTime, endTime):
    tStart = datetime.strptime(startTime, datetimeFormat)
    tEnd = datetime.strptime(endTime, datetimeFormat)
    tDelta = tEnd - tStart
    return int(tDelta.total_seconds())

def GetAsFloat(value):
    return float(value if value else 0)

In [22]:
cards = {'1':1, '2':2, '3':3, '4':4, '5':5, '6':6, '7':7, '8':8, '9':9, '10':10, 'J':11, 'Q':12, 'K':13, 'A':14}
suits = {'h' : 1, 'd' : 2, 'c' : 3, 's' : 4}
actions = {'Fold' : 1, 'Call' : 2, 'Raise' : 3, 'All-In' : 4}

def ClearDataframe():
    global gameState
    gameState = pd.DataFrame({'numPlayers': [], 'button': [], 'pot': [], 'position': [], 'money': [], 'stake': [], 'card1': [], 'card2': [], 'P1_money': [], 'P1_action': [], 'P1_bet': []})
    
    global action
    action = pd.DataFrame({'action': [], 'bet': []})

Game Data
- Players
- Button
- Pot

Player Data
- Position
- Money
- Bet
- Hand
    - Card 1
    - Card 2

Opponent Data x9?
- Money
- Action (Fold | Call | Raise | All-In)
- Bet
- (Hand not shown)