In [78]:
import re
import pandas as pd
import numpy as np
from IPython.display import display, HTML

test = """PokerStars Hand #239343378447:  Fusion Pot Limit (€0.01/€0.02 EUR) - 2022/10/20 17:37:41 CET [2022/10/20 11:37:41 ET]
Table 'Tucana II' 6-max Seat #5 is the button
Seat 2: kaenah033016 (€1.42 in chips) 
Seat 4: elodie2002 (€1.19 in chips) 
Seat 5: jeje_sat (€1.87 in chips) 
kaenah033016: posts small blind €0.01
elodie2002: posts big blind €0.02
*** HOLE CARDS ***
Dealt to jeje_sat [3c Tc]
jeje_sat: folds 
kaenah033016: calls €0.01
elodie2002: checks 
*** FLOP *** [Jc 6d 7c]
kaenah033016: bets €0.02
elodie2002: calls €0.02
*** TURN *** [Jc 6d 7c] [5d]
kaenah033016: bets €0.02
elodie2002: calls €0.02
*** RIVER *** [Jc 6d 7c 5d] [5h]
kaenah033016: bets €0.02
elodie2002: calls €0.02
*** SHOW DOWN ***
kaenah033016: shows [7s 5c Kd Kh] (a full house, Fives full of Sevens)
elodie2002: mucks hand 
kaenah033016 collected €0.15 from pot
*** SUMMARY ***
Total pot €0.16 | Rake €0.01 
Board [Jc 6d 7c 5d 5h]
Seat 2: kaenah033016 (small blind) showed [7s 5c Kd Kh] and won (€0.15) with a full house, Fives full of Sevens
Seat 4: elodie2002 (big blind) mucked [Js 9d 6s Qc]
Seat 5: jeje_sat (button) folded before Flop (didn't bet)"""
    

substitutions = {
                     'LEGAL_ISO' : "USD|EUR|GBP|CAD|FPP|SC|INR|CNY",      # legal ISO currency codes
                            'LS' : u"\$|\xe2\x82\xac|\u20ac|\£|\u20b9|\¥|", # legal currency symbols - Euro(cp1252, utf-8)
                           'PLYR': r'\s?(?P<PNAME>.+?)',
                            'CUR': u"(\$|\xe2\x82\xac|\u20ac|€|\£|\u20b9|\¥|)",
                          'BRKTS': r'(\(button\) |\(small blind\) |\(big blind\) |\(button blind\) |\(button\) \(small blind\) |\(small blind\) \(button\) |\(big blind\) \(button\) |\(small blind/button\) |\(button\) \(big blind\) )?',
                    }

re_GameInfo = re.compile(u"""(?P<SITE>PokerStars|POKERSTARS|Hive\sPoker|Full\sTilt|PokerMaster|Run\sIt\sOnce\sPoker|BetOnline|PokerBros)(?P<TITLE>\sGame|\sHand|\sHome\sGame|\sHome\sGame\sHand|Game|\s(Zoom|Rush)\sHand|\sGAME)\s\#(?P<HID>[0-9]+):\s+
          (\{.*\}\s+)?((?P<TOUR>((Zoom|Rush)\s)?(Tournament|TOURNAMENT))\s\#                # open paren of tournament info
          (?P<TOURNO>\d+),\s(Table\s\#(?P<HIVETABLE>\d+),\s)?
          # here's how I plan to use LS
          (?P<BUYIN>(?P<BIAMT>[%(LS)s\d\.]+)?\+?(?P<BIRAKE>[%(LS)s\d\.]+)?\+?(?P<BOUNTY>[%(LS)s\d\.]+)?\s?(?P<TOUR_ISO>%(LEGAL_ISO)s)?|Freeroll|)(\s+)?(-\s)?
          (\s.+?,)?
          )?
          # close paren of tournament info
          (?P<MIXED>HORSE|8\-Game|8\-GAME|HOSE|Mixed\sOmaha\sH/L|Mixed\sHold\'em|Mixed\sPLH/PLO|Mixed\sNLH/PLO|Mixed\sOmaha|Triple\sStud)?\s?\(?
          (?P<SPLIT>Split)?\s?
          (?P<GAME>Hold\'em|HOLD\'EM|Hold\'em|6\+\sHold\'em|Razz|RAZZ|Fusion|7\sCard\sStud|7\sCARD\sSTUD|7\sCard\sStud\sHi/Lo|7\sCARD\sSTUD\sHI/LO|Omaha|OMAHA|Omaha\sHi/Lo|OMAHA\sHI/LO|Badugi|Triple\sDraw\s2\-7\sLowball|Single\sDraw\s2\-7\sLowball|5\sCard\sDraw|(5|6)\sCard\sOmaha(\sHi/Lo)?|Courchevel(\sHi/Lo)?)\s
          (?P<LIMIT>No\sLimit|NO\sLIMIT|Fixed\sLimit|Limit|LIMIT|Pot\sLimit|POT\sLIMIT|Pot\sLimit\sPre\-Flop,\sNo\sLimit\sPost\-Flop)\)?,?\s
          (-\s)?
          (?P<SHOOTOUT>Match.*,\s)?
          ((Level|LEVEL)\s(?P<LEVEL>[IVXLC\d]+)\s)?
          \(?                            # open paren of the stakes
          (?P<CURRENCY>%(LS)s|)?
          (ante\s\d+,\s)?
          ((?P<SB>[.0-9]+)/(%(LS)s)?(?P<BB>[.0-9]+)|Button\sBlind\s(?P<CURRENCY1>%(LS)s|)(?P<BUB>[.0-9]+)\s\-\sAnte\s(%(LS)s)?[.0-9]+\s)
          (?P<CAP>\s-\s[%(LS)s]?(?P<CAPAMT>[.0-9]+)\sCap\s-\s)?        # Optional Cap part
          \s?(?P<ISO>%(LEGAL_ISO)s)?
          \)                        # close paren of the stakes
          (?P<BLAH2>\s\[AAMS\sID:\s[A-Z0-9]+\])?         # AAMS ID: in .it HH's
          \s-\s
          (?P<DATETIME>.*$)  """ %  substitutions, re.MULTILINE|re.VERBOSE)

m = re_GameInfo.search(test)
if m != None:

    mg = m.groupdict()

    df = pd.DataFrame.from_dict(mg, orient='index')
    df = df.transpose()
    display(HTML(df.to_html()))

Unnamed: 0,SITE,TITLE,HID,TOUR,TOURNO,HIVETABLE,BUYIN,BIAMT,BIRAKE,BOUNTY,TOUR_ISO,MIXED,SPLIT,GAME,LIMIT,SHOOTOUT,LEVEL,CURRENCY,SB,BB,CURRENCY1,BUB,CAP,CAPAMT,ISO,BLAH2,DATETIME
0,PokerStars,Hand,239343378447,,,,,,,,,,,Fusion,Pot Limit,,,€,0.01,0.02,,,,,EUR,,2022/10/20 17:37:41 CET [2022/10/20 11:37:41 ET]


In [79]:
re_PlayerInfo = re.compile(u"""\s?Seat\s(?P<SEAT>[0-9]+):\s
          (?P<PNAME>.*)\s
          \((%(LS)s)?(?P<CASH>[,.0-9]+)\sin\schips
          (,\s(%(LS)s)?(?P<BOUNTY>[,.0-9]+)\sbounty)?
          \)
          (?P<SITOUT>\sis\ssitting\sout)?  """ %  substitutions, re.MULTILINE|re.VERBOSE)

m = re_PlayerInfo.search(test)
if m != None:

    mg = m.groupdict()

    df = pd.DataFrame.from_dict(mg, orient='index')
    df = df.transpose()
    display(HTML(df.to_html()))

Unnamed: 0,SEAT,PNAME,CASH,BOUNTY,SITOUT
0,2,kaenah033016,1.42,,


In [80]:
re_HandInfo = re.compile(u"""\s?Table\s(ID\s)?\'(?P<TABLE>.+?)\'\s
          ((?P<MAX>\d+)-[Mm]ax\s)?
          (?P<PLAY>\(Play\sMoney\)\s)?
          (Seat\s\#(?P<BUTTON>\d+)\sis\sthe\sbutton)?  """ %  substitutions, re.MULTILINE|re.VERBOSE)

m = re_HandInfo.search(test)
if m != None:

    mg = m.groupdict()

    df = pd.DataFrame.from_dict(mg, orient='index')
    df = df.transpose()
    display(HTML(df.to_html()))

Unnamed: 0,TABLE,MAX,PLAY,BUTTON
0,Tucana II,6,,5


In [81]:
re_Identify = re.compile(u"""(PokerStars|POKERSTARS|Hive\sPoker|Full\sTilt|PokerMaster|Run\sIt\sOnce\sPoker|BetOnline|PokerBros)(\sGame|\sHand|\sHome\sGame|\sHome\sGame\sHand|Game|\s(Zoom|Rush)\sHand|\sGAME)\s\#\d+: """ %  substitutions, re.MULTILINE|re.VERBOSE)

m = re_GameInfo.search(test)
if m != None:

    mg = m.groupdict()

    df = pd.DataFrame.from_dict(mg, orient='index')
    df = df.transpose()
    display(HTML(df.to_html()))

Unnamed: 0,SITE,TITLE,HID,TOUR,TOURNO,HIVETABLE,BUYIN,BIAMT,BIRAKE,BOUNTY,TOUR_ISO,MIXED,SPLIT,GAME,LIMIT,SHOOTOUT,LEVEL,CURRENCY,SB,BB,CURRENCY1,BUB,CAP,CAPAMT,ISO,BLAH2,DATETIME
0,PokerStars,Hand,239343378447,,,,,,,,,,,Fusion,Pot Limit,,,€,0.01,0.02,,,,,EUR,,2022/10/20 17:37:41 CET [2022/10/20 11:37:41 ET]


In [82]:
re_Button = re.compile(u"""Seat\s\#(?P<BUTTON>\d+)\sis\sthe\sbutton
                        """ %  substitutions, re.MULTILINE|re.VERBOSE)

m = re_Button.search(test)
if m != None:

    mg = m.groupdict()

    df = pd.DataFrame.from_dict(mg, orient='index')
    df = df.transpose()
    display(HTML(df.to_html()))

Unnamed: 0,BUTTON
0,5


In [83]:
re_Board = re.compile(u"""\[(?P<CARDS>.+)\]
                        """ %  substitutions, re.MULTILINE|re.VERBOSE)

m = re_Board.search(test)
if m != None:

    mg = m.groupdict()

    df = pd.DataFrame.from_dict(mg, orient='index')
    df = df.transpose()
    display(HTML(df.to_html()))

Unnamed: 0,CARDS
0,2022/10/20 11:37:41 ET


In [85]:
re_Board2       = re.compile(r"\[(?P<C1>\S\S)\] \[(\S\S)?(?P<C2>\S\S) (?P<C3>\S\S)\]")

m = re_Board2.search(test)
if m != None:

    mg = m.groupdict()

    df = pd.DataFrame.from_dict(mg, orient='index')
    df = df.transpose()
    display(HTML(df.to_html()))

In [86]:
re_DateTime1 = re.compile(u"""(?P<Y>[0-9]{4})\/(?P<M>[0-9]{2})\/(?P<D>[0-9]{2})[\- ]+(?P<H>[0-9]+):(?P<MIN>[0-9]+):(?P<S>[0-9]+)
                        """ %  substitutions, re.MULTILINE|re.VERBOSE)

m = re_DateTime1.search(test)
if m != None:

    mg = m.groupdict()

    df = pd.DataFrame.from_dict(mg, orient='index')
    df = df.transpose()
    display(HTML(df.to_html()))

Unnamed: 0,Y,M,D,H,MIN,S
0,2022,10,20,17,37,41


In [87]:
re_DateTime2 = re.compile(u"""(?P<Y>[0-9]{4})\/(?P<M>[0-9]{2})\/(?P<D>[0-9]{2})[\- ]+(?P<H>[0-9]+):(?P<MIN>[0-9]+)
                        """ %  substitutions, re.MULTILINE|re.VERBOSE)

m = re_DateTime2.search(test)
if m != None:

    mg = m.groupdict()

    df = pd.DataFrame.from_dict(mg, orient='index')
    df = df.transpose()
    display(HTML(df.to_html()))

Unnamed: 0,Y,M,D,H,MIN
0,2022,10,20,17,37


In [88]:
re_Action = re.compile(u"""%(PLYR)s:(?P<ATYPE>\sbets|\schecks|\sraises|\scalls|\sfolds|\sdiscards|\sstands\spat)
                        (\s%(CUR)s(?P<BET>[,.\d]+))?(\sto\s%(CUR)s(?P<BETTO>[,.\d]+))?  # the number discarded goes in <BET>
                        \s*(and\sis\sall.in)?
                        (and\shas\sreached\sthe\s[%(CUR)s\d\.,]+\scap)?
                        (\son|\scards?)?
                        (\s\(disconnect\))?
                        (\s\[(?P<CARDS>.+?)\])?\s*$
                        """ %  substitutions, re.MULTILINE|re.VERBOSE)

m = re_Action.search(test)
if m != None:

    mg = m.groupdict()

    df = pd.DataFrame.from_dict(mg, orient='index')
    df = df.transpose()
    display(HTML(df.to_html()))

Unnamed: 0,PNAME,ATYPE,BET,BETTO,CARDS
0,jeje_sat,folds,,,


In [89]:
re_ShowdownAction = re.compile(u"""%s: shows \[(?P<CARDS>.*)\]""" % substitutions['PLYR'], re.MULTILINE)
m = re_ShowdownAction.search(test)
if m != None:

    mg = m.groupdict()

    df = pd.DataFrame.from_dict(mg, orient='index')
    df = df.transpose()
    display(HTML(df.to_html()))

Unnamed: 0,PNAME,CARDS
0,kaenah033016,7s 5c Kd Kh


In [90]:
re_CollectPot = re.compile(u"Seat (?P<SEAT>[0-9]+): %(PLYR)s %(BRKTS)s(collected|showed \[.*\] and (won|collected)) \(?%(CUR)s(?P<POT>[,.\d]+)\)?(, mucked| with.*|)" %  substitutions, re.MULTILINE)
m = re_CollectPot.search(test)
if m != None:

    mg = m.groupdict()

    df = pd.DataFrame.from_dict(mg, orient='index')
    df = df.transpose()
    display(HTML(df.to_html()))

Unnamed: 0,SEAT,PNAME,POT
0,2,kaenah033016,0.15


In [91]:
re_CollectPot2      = re.compile(r"%(PLYR)s (collected|cashed out the hand for) %(CUR)s(?P<POT>[,.\d]+)" %  substitutions, re.MULTILINE)
if m != None:

    mg = m.groupdict()

    df = pd.DataFrame.from_dict(mg, orient='index')
    df = df.transpose()
    display(HTML(df.to_html()))

Unnamed: 0,SEAT,PNAME,POT
0,2,kaenah033016,0.15


In [92]:
re_CashedOut = re.compile(r"cashed\sout\sthe\shand")

m = re_CashedOut.search(test)
if m != None:

    mg = m.groupdict()

    df = pd.DataFrame.from_dict(mg, orient='index')
    df = df.transpose()
    display(HTML(df.to_html()))

In [93]:
re_WinningRankOne = re.compile(u"%(PLYR)s wins the tournament and receives %(CUR)s(?P<AMT>[,\.0-9]+) - congratulations!$" %  substitutions, re.MULTILINE)

m = re_WinningRankOne.search(test)
if m != None:

    mg = m.groupdict()

    df = pd.DataFrame.from_dict(mg, orient='index')
    df = df.transpose()
    display(HTML(df.to_html()))

In [94]:
re_WinningRankOther = re.compile(u"%(PLYR)s finished the tournament in (?P<RANK>[0-9]+)(st|nd|rd|th) place and received %(CUR)s(?P<AMT>[,.0-9]+)\.$" %  substitutions, re.MULTILINE)

m = re_WinningRankOther.search(test)
if m != None:

    mg = m.groupdict()

    df = pd.DataFrame.from_dict(mg, orient='index')
    df = df.transpose()
    display(HTML(df.to_html()))

In [95]:
re_Uncalled   = re.compile('Uncalled bet \(%(CUR)s(?P<BET>[,.\d]+)\) returned to' %  substitutions, re.MULTILINE)

m = re_Uncalled.search(test)
if m != None:

    mg = m.groupdict()

    df = pd.DataFrame.from_dict(mg, orient='index')
    df = df.transpose()
    display(HTML(df.to_html()))

In [96]:
re_Rake             = re.compile(u"""
                        Total\spot\s%(CUR)s(?P<POT>[,\.0-9]+)(.+?)?\s\|\sRake\s%(CUR)s(?P<RAKE>[,\.0-9]+)"""
                         %  substitutions, re.MULTILINE|re.VERBOSE)

m = re_Rake.search(test)
if m != None:

    mg = m.groupdict()

    df = pd.DataFrame.from_dict(mg, orient='index')
    df = df.transpose()
    display(HTML(df.to_html()))

Unnamed: 0,POT,RAKE
0,0.16,0.01


In [97]:
re_STP             = re.compile(u"""
                        STP\sadded:\s%(CUR)s(?P<AMOUNT>[,\.0-9]+)"""
                         %  substitutions, re.MULTILINE|re.VERBOSE)

m = re_STP.search(test)
if m != None:

    mg = m.groupdict()

    df = pd.DataFrame.from_dict(mg, orient='index')
    df = df.transpose()
    display(HTML(df.to_html()))

In [120]:
player_re = "jeje_sat"
subst = {
                'PLYR': player_re,
                'BRKTS': r'(\(button\) |\(small blind\) |\(big blind\) |\(button\) \(small blind\) |\(button\) \(big blind\) )?',
                'CUR': u"(\$|\xe2\x82\xac|\u20ac||\£|)"
            }
re_HeroCards = re.compile(r"Dealt to %(PLYR)s(?: \[(?P<OLDCARDS>.+?)\])?( \[(?P<NEWCARDS>.+?)\])" % subst, re.MULTILINE)



m = re_HeroCards.search(test)
if m != None:

    mg = m.groupdict()

    df = pd.DataFrame.from_dict(mg, orient='index')
    df = df.transpose()
    display(HTML(df.to_html()))

Unnamed: 0,OLDCARDS,NEWCARDS
0,,3c Tc
