In [1]:
# Imports
import numpy as np
import pandas as pd
from deuces import Card

In [2]:
# load long-form dataframe
hands_long = pd.read_pickle('../data/hands_long.pkl')

# remove all unnecessary columns, we are only interested in the player's hole cards and final evaluation
preflop_hands = hands_long[[
    'hand_id', 'player_id', 'hole_', 'river_eval_'
]].copy()
# preview data
preflop_hands.head()

Unnamed: 0,hand_id,player_id,hole_,river_eval_
0,0,0,"[533255, 67144223]",2734
1,1,0,"[4212241, 164099]",2578
2,2,0,"[268454953, 8394515]",5977
3,3,0,"[8406803, 8398611]",2516
4,4,0,"[16787479, 279045]",3346


In [3]:
# we are less concerned with the exact hole cards, and more concerned with the rank combinations and suitedness
preflop_hands['rank_set'] = preflop_hands['hole_'].apply(
    lambda cards: frozenset([Card.get_rank_int(card) for card in cards])
)
preflop_hands['suited'] = preflop_hands['hole_'].apply(
    lambda cards: Card.get_suit_int(cards[0]) == Card.get_suit_int(cards[1])
)
preflop_hands = preflop_hands.drop(columns=['hole_'])
# preview data
preflop_hands.head()

Unnamed: 0,hand_id,player_id,river_eval_,rank_set,suited
0,0,0,2734,"(10, 3)",False
1,1,0,2578,"(1, 6)",False
2,2,0,5977,"(12, 7)",False
3,3,0,2516,(7),False
4,4,0,3346,"(8, 2)",False


In [4]:
# create a dataframe that represents all the possible preflop conditions to account for, like number of players, rank combinations, and suitedness
rows = []
for players in range(2,10): # number of players from 2 to 9 (includes self)
    for r1 in range(13): # rank 0 to 12 (2 to A)
        for r2 in range(13): # rank 0 to 12 (2 to A)
            rows.append({
                'players': players,
                'rank_set': frozenset([r1, r2]), # use frozenset to ignore order and easy comparisons
                'suited': r1 < r2, # suited if r1 < r2 (e.g., (A, K) is suited, (K, A) is not)
            })
preflop_conditions = pd.DataFrame(rows)
# preview data
preflop_conditions.head()

Unnamed: 0,players,rank_set,suited
0,2,(0),False
1,2,"(0, 1)",True
2,2,"(0, 2)",True
3,2,"(0, 3)",True
4,2,"(0, 4)",True


In [5]:
# merge the two dataframes
preflop_hands = preflop_hands.merge(
    preflop_conditions,
    on=['rank_set', 'suited'],
)
# we should have 900k * 8 = 7.2 million rows now
preflop_hands.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7200000 entries, 0 to 7199999
Data columns (total 6 columns):
 #   Column       Dtype 
---  ------       ----- 
 0   hand_id      int64 
 1   player_id    int64 
 2   river_eval_  int64 
 3   rank_set     object
 4   suited       bool  
 5   players      int64 
dtypes: bool(1), int64(4), object(1)
memory usage: 281.5+ MB
