# Poker Bot

Poker Bot built using TensorFlow. Trained with reinforcement learning using the PyPokerEngine library.

### Building the Poker AI

In [9]:
# Packages to install
# pip install PyPokerEngine
# pip install pyyaml h5py  # Required to save models in HDF5 format

In [14]:
import os
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import collections
import collections.abc
import h5py
from tensorflow import keras


from tensorflow.keras import layers, losses
from tensorflow.keras.layers import Dense, Flatten, Reshape, LeakyReLU
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam, RMSprop
from collections import Counter
from datetime import datetime
import keras
import keras.callbacks
from keras.callbacks import TensorBoard

In [3]:
from pypokerengine.players import BasePokerPlayer
from pypokerengine.api.emulator import Emulator
from pypokerengine.utils.game_state_utils import restore_game_state

In [4]:
# Notes:

# use tf.keras.callbacks.ModelCheckpoint to continually 
# save the model both during and at the end of training.
# https://www.tensorflow.org/tutorials/keras/save_and_load

In this implementation, the feature vector's length is constant for every game state. The community cards are represented as 5 pairs of suit and rank features, with placeholders for missing cards. The action histories are aggregated into a fixed number of features.

In [8]:
# Load saved model or create new
load_saved_model = False
model_pathname = 'pathname'

In [9]:
# Define neural network architecture
def create_model(input_shape):
    if load_saved_model and os.path.isfile(model_pathname):
        model = model.load_model(model_pathname)
    else:
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(128, activation='relu', input_shape=input_shape),
            tf.keras.layers.Dropout(0.5),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(32, activation='relu'),
            tf.keras.layers.Dense(3, activation='softmax')  # Assuming simple output (fold, call, or raise)
        ])
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [10]:
# Helper methods
def flatten(x):
        if isinstance(x, collections.Iterable):
            return [a for i in x for a in flatten(i)]
        else:
            return [x]

In [44]:
class PokerBot(BasePokerPlayer):
    
    # TODO: Read some saved state of the model to allow reinforcement learning over time
    def __init__(self):
        # Initialize model
        self.model = create_model((34,))  # Input shape, adjust based on features
    
    # TODO: Look at Emulator implementation to make the model work with reinforcement learning
    # Make an action based on the model output
    def declare_action(self, valid_actions, hole_card, round_state):
        # Prepare feature vector based on the game state
        feature_vector = self._extract_features(hole_card, round_state)
        print("input size: " + str(len(feature_vector)))
        print("input shape: " + str(feature_vector.shape))
        
        
        # Use the model to predict the action
        action_probs = self.model.predict(feature_vector).flatten()
        outcome = np.argmax(action_probs)
        print("action_probs length: " + str(len(action_probs)))
        print(action_probs)
        print("action argmax: " + str(outcome))
        action_info = valid_actions[outcome]
        action = action_info["action"]
        if outcome == 2:
            # Scale raise to the confidence of the model
            amount = action_info["amount"]["min"] + math.floor((action_info["amount"]["max"] - action_info["amount"]["min"]) * action_probs[outcome])
        else:
            amount = action_info["amount"]
        
        return action, amount
    
    # Setup Emulator object by registering game information
    def receive_game_start_message(self, game_info):
        return
        
        # Emulator skeleton code
        player_num = game_info["player_num"]
        max_round = game_info["rule"]["max_round"]
        small_blind_amount = game_info["rule"]["small_blind_amount"]
        ante_amount = game_info["rule"]["ante"]
        blind_structure = game_info["rule"]["blind_structure"]
        
        self.emulator = Emulator()
        self.emulator.set_game_rule(player_num, max_round, small_blind_amount, ante_amount)
        self.emulator.set_blind_structure(blind_structure)
        
        # Register algorithm of each player which used in the simulation.
        for player_info in game_info["seats"]["players"]:
            self.emulator.register_player(player_info["uuid"], PokerBot())

    def receive_round_start_message(self, round_count, hole_card, seats):
        pass

    def receive_street_start_message(self, street, round_state):
        pass

    def receive_game_update_message(self, new_action, round_state):
        pass
    
    def receive_round_result_message(self, winners, hand_info, round_state):
        pass
    
    # Additional methods
    
    # Produce a feature vector of length 34
    def _extract_features(self, hole_card, round_state):
        # 4 Features from hole cards
        hole_card_features = [self._card_to_feature(card) for card in hole_card]

        # 10 Features from community cards (always represent 5 cards)
        community_cards = round_state['community_card'] + [None] * (5 - len(round_state['community_card']))
        community_card_features = [self._card_to_feature(card) if card else [0, 0] for card in community_cards]

        # 8 Standard features
        standard_features = [
            round_state['round_count'],
            round_state['pot']['main']['amount'],
            sum([side_pot['amount'] for side_pot in round_state['pot']['side']]),
            round_state['dealer_btn'],
            round_state['small_blind_pos'],
            round_state['big_blind_pos'],
            round_state['small_blind_amount'],
            self._street_to_feature(round_state['street'])
        ]

        # 8 Action history features (2 {# raises, # calls} for each betting stage: preflop, flop, turn, river)
        action_history_features = self._aggregate_action_histories(round_state['action_histories'])

        # Combine all features into a single fixed-size feature vector of length 34
        # Flatten the list of lists
        features = flatten(hole_card_features + community_card_features + standard_features + action_history_features)
        features = np.array(features)
        features = features.reshape(1, -1)
        return features
    
    def _card_to_feature(self, card):
        # Convert card to a numerical feature
        suits = {'C': 1, 'D': 2, 'H': 3, 'S': 4, 'None': 0}
        ranks = {'2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, 'T': 10, 'J': 11, 'Q': 12, 'K': 13, 'A': 14, 'None': 0}
        suit = suits.get(card[0], 0) if card else 0
        rank = ranks.get(card[1], 0) if card else 0
        return [suit, rank]
    
    def _street_to_feature(self, street):
        # Convert street to a numerical feature
        streets = {'preflop': 1, 'flop': 2, 'turn': 3, 'river': 4, 'showdown': 5}
        return streets.get(street, 0)

    def _aggregate_action_histories(self, action_histories):
        '''
        # Aggregate action histories into a fixed-length vector
        # Example: Count the number of raises, calls, etc.
        raise_count = sum(1 for action in action_histories.get('preflop', []) if action['action'] == 'raise')
        call_count = sum(1 for action in action_histories.get('preflop', []) if action['action'] == 'call')
        # Add more aggregated features as needed
        # Ensure the length of this vector is fixed
        return [raise_count, call_count]
        '''
        
        # Initialize counts
        raise_count = [0, 0, 0, 0]  # Preflop, Flop, Turn, River
        call_count = [0, 0, 0, 0]
        fold_count = [0, 0, 0, 0]

        # Define rounds
        rounds = ['preflop', 'flop', 'turn', 'river']

        # Count actions in each round
        for i, round in enumerate(rounds):
            for action in action_histories.get(round, []):
                if action['action'] == 'raise':
                    raise_count[i] += 1
                elif action['action'] == 'call':
                    call_count[i] += 1
                elif action['action'] == 'fold':
                    fold_count[i] += 1

        # Flatten and return
        return raise_count + call_count + fold_count

### Simulating Games

In [45]:
from pypokerengine.api.game import setup_config, start_poker

# Declare game setup paramers
config = setup_config(max_round=10, initial_stack=100, small_blind_amount=5)
config.register_player(name="p1", algorithm=PokerBot())
config.register_player(name="p2", algorithm=PokerBot())
config.register_player(name="p3", algorithm=PokerBot())
game_result = start_poker(config, verbose=1)

Started the round 1
Street "preflop" started. (community card = [])
input size: 1
input shape: (1, 34)
action_probs length: 3
[0.8358018  0.08501973 0.07917854]
action argmax: 0
"p1" declared "fold:0"
input size: 1
input shape: (1, 34)
action_probs length: 3
[0.12063771 0.7927347  0.08662754]
action argmax: 1
"p2" declared "call:10"
input size: 1
input shape: (1, 34)
action_probs length: 3
[0.01404726 0.97568536 0.0102674 ]
action argmax: 1
"p3" declared "call:10"
Street "flop" started. (community card = ['DT', 'C8', 'H2'])
input size: 1
input shape: (1, 34)
action_probs length: 3
[0.29833773 0.5655386  0.13612375]
action argmax: 1
"p2" declared "call:0"
input size: 1
input shape: (1, 34)
action_probs length: 3
[0.00253156 0.9901956  0.00727279]
action argmax: 1
"p3" declared "call:0"
Street "turn" started. (community card = ['DT', 'C8', 'H2', 'D8'])
input size: 1
input shape: (1, 34)
action_probs length: 3
[0.37794915 0.57985306 0.04219781]
action argmax: 1
"p2" declared "call:0"
inpu

action_probs length: 3
[0.7799418  0.04404062 0.17601752]
action argmax: 0
"p1" declared "fold:0"
Street "flop" started. (community card = ['D4', 'CJ', 'C4'])
input size: 1
input shape: (1, 34)
action_probs length: 3
[0.00170078 0.9953833  0.0029159 ]
action argmax: 1
"p3" declared "call:0"
input size: 1
input shape: (1, 34)
action_probs length: 3
[0.45082757 0.51215285 0.03701959]
action argmax: 1
"p2" declared "call:0"
Street "turn" started. (community card = ['D4', 'CJ', 'C4', 'C5'])
input size: 1
input shape: (1, 34)
action_probs length: 3
[5.810188e-04 9.978661e-01 1.552916e-03]
action argmax: 1
"p3" declared "call:0"
input size: 1
input shape: (1, 34)
action_probs length: 3
[0.45026422 0.5284787  0.02125712]
action argmax: 1
"p2" declared "call:0"
Street "river" started. (community card = ['D4', 'CJ', 'C4', 'C5', 'ST'])
input size: 1
input shape: (1, 34)
action_probs length: 3
[4.3599933e-04 9.9811769e-01 1.4464383e-03]
action argmax: 1
"p3" declared "call:0"
input size: 1
input 

action_probs length: 3
[0.25001156 0.7191556  0.03083285]
action argmax: 1
"p2" declared "call:0"
input size: 1
input shape: (1, 34)
action_probs length: 3
[0.00348817 0.99210936 0.00440247]
action argmax: 1
"p3" declared "call:0"
Street "turn" started. (community card = ['C3', 'D8', 'D7', 'DQ'])
input size: 1
input shape: (1, 34)
action_probs length: 3
[0.26403716 0.7325968  0.00336602]
action argmax: 1
"p2" declared "call:0"
input size: 1
input shape: (1, 34)
action_probs length: 3
[3.0041111e-04 9.9937123e-01 3.2831304e-04]
action argmax: 1
"p3" declared "call:0"
Street "river" started. (community card = ['C3', 'D8', 'D7', 'DQ', 'CJ'])
input size: 1
input shape: (1, 34)
action_probs length: 3
[0.44724238 0.5499346  0.00282295]
action argmax: 1
"p2" declared "call:0"
input size: 1
input shape: (1, 34)
action_probs length: 3
[1.1363227e-04 9.9960655e-01 2.7974191e-04]
action argmax: 1
"p3" declared "call:0"
"['p2']" won the round 9 (stack = {'p1': 55, 'p2': 35, 'p3': 210})
Started the