In [7]:
import random
import numpy as np
from collections import defaultdict
import itertools
import pickle
from typing import List, Dict, Tuple, Set
from tqdm import tqdm
import os

class Card:
    def __init__(self, rank: str, suit: str):
        self.rank = rank
        self.suit = suit
        
    def __repr__(self):
        return f"{self.rank}{self.suit}"
    
    def __eq__(self, other):
        return self.rank == other.rank and self.suit == other.suit
    
    def __hash__(self):
        return hash((self.rank, self.suit))
    
    def get_value(self):
        rank_values = {'2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7,
                      '8': 8, '9': 9, 'T': 10, 'J': 11, 'Q': 12, 'K': 13, 'A': 14}
        return rank_values[self.rank]

class Deck:
    def __init__(self):
        ranks = ['2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K', 'A']
        suits = ['h', 'd', 'c', 's']
        self.cards = [Card(rank, suit) for rank in ranks for suit in suits]
        self.shuffle()
        
    def shuffle(self):
        random.shuffle(self.cards)
        
    def deal(self) -> Card:
        return self.cards.pop()

def evaluate_hand(cards: List[Card]) -> Tuple[int, List[int]]:
    if len(cards) < 5:
        raise ValueError("Need at least 5 cards to evaluate a hand")
    
    best_rank = -1
    best_tiebreakers = []
    
    for combo in itertools.combinations(cards, 5):
        values = sorted([card.get_value() for card in combo], reverse=True)
        suits = [card.suit for card in combo]
        
        flush = len(set(suits)) == 1
        
        unique_values = sorted(list(set(values)))
        straight = False
        if len(unique_values) >= 5:
            for i in range(len(unique_values) - 4):
                if unique_values[i] - unique_values[i+4] == 4:
                    straight = True
                    high_card = unique_values[i]
                    break
            if set(unique_values[-5:]) == {14, 5, 4, 3, 2}:
                straight = True
                high_card = 5
        
        value_counts = defaultdict(int)
        for v in values:
            value_counts[v] += 1
        counts = sorted(value_counts.values(), reverse=True)
        sorted_groups = sorted(value_counts.items(), key=lambda x: (-x[1], -x[0]))
        
        if straight and flush:
            rank = 8
            tiebreakers = [high_card]
        elif counts[0] == 4:
            rank = 7
            quad_val = sorted_groups[0][0]
            kicker = sorted_groups[1][0]
            tiebreakers = [quad_val, kicker]
        elif counts[0] == 3 and counts[1] == 2:
            rank = 6
            trip_val = sorted_groups[0][0]
            pair_val = sorted_groups[1][0]
            tiebreakers = [trip_val, pair_val]
        elif flush:
            rank = 5
            tiebreakers = values[:5]
        elif straight:
            rank = 4
            tiebreakers = [high_card]
        elif counts[0] == 3:
            rank = 3
            trip_val = sorted_groups[0][0]
            kickers = [x[0] for x in sorted_groups[1:]]
            tiebreakers = [trip_val] + kickers[:2]
        elif counts[0] == 2 and counts[1] == 2:
            rank = 2
            pair1 = sorted_groups[0][0]
            pair2 = sorted_groups[1][0]
            kicker = sorted_groups[2][0]
            tiebreakers = [max(pair1, pair2), min(pair1, pair2), kicker]
        elif counts[0] == 2:
            rank = 1
            pair_val = sorted_groups[0][0]
            kickers = [x[0] for x in sorted_groups[1:]]
            tiebreakers = [pair_val] + kickers[:3]
        else:
            rank = 0
            tiebreakers = values[:5]
        
        if rank > best_rank or (rank == best_rank and tiebreakers > best_tiebreakers):
            best_rank = rank
            best_tiebreakers = tiebreakers
    
    return (best_rank, best_tiebreakers)

class PokerGameState:
    def __init__(self, num_players: int = 4, initial_chips: int = 1000):
        self.num_players = num_players
        self.players_chips = [initial_chips for _ in range(num_players)]
        self.current_bets = [0] * num_players
        self.pot = 0
        self.community_cards = []
        self.deck = Deck()
        self.player_hands = [[] for _ in range(num_players)]
        self.current_player = 0
        self.round = 0
        self.last_raise = 0
        self.small_blind = 10
        self.big_blind = 20
        self.folded = [False] * num_players
        self.actions = []
        self.dealer_position = 0
        self.seen_cards = set()  # Track all cards seen in showdowns
        
    def start_new_hand(self):
        self.community_cards = []
        self.player_hands = [[] for _ in range(self.num_players)]
        self.current_bets = [0] * self.num_players
        self.pot = 0
        self.round = 0
        self.folded = [False] * self.num_players
        self.actions = []
        self.deck = Deck()
        
        # Remove seen cards from the new deck
        self.deck.cards = [card for card in self.deck.cards if card not in self.seen_cards]
        self.deck.shuffle()
        
        self.dealer_position = (self.dealer_position + 1) % self.num_players
        sb_pos = (self.dealer_position + 1) % self.num_players
        bb_pos = (sb_pos + 1) % self.num_players
        
        self.players_chips[sb_pos] -= self.small_blind
        self.players_chips[bb_pos] -= self.big_blind
        self.current_bets[sb_pos] = self.small_blind
        self.current_bets[bb_pos] = self.big_blind
        self.pot = self.small_blind + self.big_blind
        self.last_raise = self.big_blind
        self.current_player = (bb_pos + 1) % self.num_players
        
        for _ in range(2):
            for i in range(self.num_players):
                card = self.deck.deal()
                self.player_hands[i].append(card)
    
    def get_legal_actions(self, player: int) -> List[Tuple[str, int]]:
        if self.folded[player]:
            return []
            
        actions = []
        current_bet = self.current_bets[player]
        max_bet = max(self.current_bets)
        chips = self.players_chips[player]
        
        actions.append(("fold", 0))
        
        if current_bet == max_bet:
            actions.append(("check", 0))
        
        if current_bet < max_bet:
            call_amount = min(max_bet - current_bet, chips)
            actions.append(("call", call_amount))
        
        if chips > 0:
            min_raise = max(self.last_raise, self.big_blind)
            min_raise_amount = max_bet - current_bet + min_raise
            max_raise_amount = chips
            
            if min_raise_amount <= max_raise_amount:
                if min_raise_amount == max_raise_amount:
                    actions.append(("all-in", chips))
                else:
                    actions.append(("raise", (min_raise_amount, max_raise_amount)))
                    actions.append(("all-in", chips))
            elif chips > 0:
                actions.append(("all-in", chips))
        
        return actions
    
    def apply_action(self, player: int, action: str, amount: int = 0):
        if self.folded[player]:
            return
            
        self.actions.append((player, action, amount))
        
        if action == "fold":
            self.folded[player] = True
        elif action == "check":
            pass
        elif action == "call":
            call_amount = min(amount, self.players_chips[player])
            self.players_chips[player] -= call_amount
            self.current_bets[player] += call_amount
            self.pot += call_amount
        elif action == "raise":
            raise_amount = min(amount, self.players_chips[player])
            self.players_chips[player] -= raise_amount
            self.current_bets[player] += raise_amount
            self.pot += raise_amount
            self.last_raise = raise_amount - (max(self.current_bets) - self.current_bets[player])
        elif action == "all-in":
            all_in_amount = min(amount, self.players_chips[player])
            self.players_chips[player] -= all_in_amount
            self.current_bets[player] += all_in_amount
            self.pot += all_in_amount
            if (self.current_bets[player] - all_in_amount) < max(self.current_bets):
                self.last_raise = all_in_amount - (max(self.current_bets) - self.current_bets[player])
        
        self.current_player = (self.current_player + 1) % self.num_players
        while self.folded[self.current_player] and not self.is_round_over():
            self.current_player = (self.current_player + 1) % self.num_players
        
        if self.is_round_over():
            self.advance_round()
    
    def is_round_over(self) -> bool:
        if sum(1 for f in self.folded if not f) <= 1:
            return True
            
        active_players = [i for i in range(self.num_players) if not self.folded[i]]
        if len(self.actions) == 0:
            return False
            
        last_raise_index = -1
        for i, (p, a, _) in enumerate(self.actions):
            if a == "raise":
                last_raise_index = i
        
        if last_raise_index == -1:
            acted_players = set(p for p, _, _ in self.actions)
            return all(p in acted_players for p in active_players)
        else:
            required_players = active_players.copy()
            for p, _, _ in self.actions[last_raise_index+1:]:
                if p in required_players:
                    required_players.remove(p)
            return len(required_players) == 0 and all(
                self.current_bets[p] == self.current_bets[active_players[0]] 
                for p in active_players
            )
    
    def advance_round(self):
        self.round += 1
        self.actions = []
        self.last_raise = 0
        self.current_bets = [0] * self.num_players
        
        if self.round == 1:
            self.community_cards.extend([self.deck.deal() for _ in range(3)])
        elif self.round in [2, 3]:
            self.community_cards.append(self.deck.deal())
        
        active_players = [i for i in range(self.num_players) if not self.folded[i]]
        if active_players:
            self.current_player = (self.dealer_position + 1) % self.num_players
            while self.folded[self.current_player]:
                self.current_player = (self.current_player + 1) % self.num_players
    
    def is_hand_over(self) -> bool:
        return self.round >= 4 or sum(1 for f in self.folded if not f) <= 1
    
    def get_winner(self) -> List[int]:
        if sum(self.folded) == self.num_players - 1:
            return [i for i, folded in enumerate(self.folded) if not folded]
        
        active_players = [i for i in range(self.num_players) if not self.folded[i]]
        best_hand = None
        winners = []
        
        for player in active_players:
            all_cards = self.player_hands[player] + self.community_cards
            hand_rank = evaluate_hand(all_cards)
            
            if best_hand is None or hand_rank > best_hand:
                best_hand = hand_rank
                winners = [player]
            elif hand_rank == best_hand:
                winners.append(player)
        
        return winners
    
    def distribute_pot(self, winners: List[int]):
        win_amount = self.pot // len(winners)
        for winner in winners:
            self.players_chips[winner] += win_amount
        self.pot = 0
        
        # Remember all shown cards
        for player in range(self.num_players):
            if not self.folded[player]:
                for card in self.player_hands[player]:
                    self.seen_cards.add(card)
        for card in self.community_cards:
            self.seen_cards.add(card)

class PokerQLearningAgent:
    def __init__(self, player_index: int, alpha: float = 0.1, gamma: float = 0.9, 
                 epsilon: float = 0.2, initial_q_value: float = 0.0):
        self.player_index = player_index
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.initial_q_value = initial_q_value
        self.q_table = defaultdict(lambda: defaultdict(lambda: initial_q_value))
        self.last_state = None
        self.last_action = None
        self.hand_strength_memory = {}  # Stores hand strength estimates
        
    def estimate_hand_strength(self, game_state: PokerGameState) -> float:
        """Calculate hand strength without bluffing (0-1 scale)"""
        hand = game_state.player_hands[self.player_index]
        community = game_state.community_cards
        
        if not community:  # Pre-flop
            # Simplified pre-flop hand strength based on Sklansky groups
            h1, h2 = hand[0].get_value(), hand[1].get_value()
            suited = hand[0].suit == hand[1].suit
            
            if h1 == h2:  # Pair
                strength = min(0.7 + h1/30, 0.95)
            elif h1 >= 12 and h2 >= 10 and (h1-h2 <= 2 or suited):
                strength = min(0.5 + max(h1,h2)/40, 0.85)
            elif h1 >= 10 and h2 >= 8:
                strength = min(0.3 + max(h1,h2)/50, 0.7)
            else:
                strength = min(0.1 + max(h1,h2)/80, 0.4)
            return strength
        
        # Post-flop - use actual evaluation
        all_cards = hand + community
        rank, _ = evaluate_hand(all_cards)
        return min(0.1 + rank * 0.1, 0.9)  # Convert rank to 0.1-0.9 scale
    
    def get_state_key(self, game_state: PokerGameState) -> str:
        """State representation with hand strength and seen cards info"""
        hand = game_state.player_hands[self.player_index]
        h1, h2 = hand[0].get_value(), hand[1].get_value()
        suited = "s" if hand[0].suit == hand[1].suit else "o"
        
        # Hand strength category (discretized)
        hand_strength = self.estimate_hand_strength(game_state)
        strength_category = min(int(hand_strength * 10), 9)  # 0-9
        
        # Community cards info
        cc = game_state.community_cards
        cc_count = len(cc)
        
        # Pot and bet info
        to_call = max(game_state.current_bets) - game_state.current_bets[self.player_index]
        pot_odds = to_call / (game_state.pot + to_call) if (game_state.pot + to_call) > 0 else 0
        pot_odds_category = min(int(pot_odds * 10), 9)  # 0-9
        
        # Position (0-3)
        position = (self.player_index - game_state.dealer_position) % game_state.num_players
        
        # Seen cards impact (how many of our cards have been seen before)
        seen_count = sum(1 for card in hand if card in game_state.seen_cards)
        
        state_parts = [
            f"HS{strength_category}",
            f"H{max(h1,h2)}{min(h1,h2)}{suited}",
            f"CC{cc_count}",
            f"PO{pot_odds_category}",
            f"POS{position}",
            f"SC{seen_count}",
            f"RD{game_state.round}"
        ]
        return "|".join(state_parts)
    
    def get_action(self, game_state: PokerGameState, training: bool = True) -> Tuple[str, int]:
        legal_actions = game_state.get_legal_actions(self.player_index)
        if not legal_actions:
            return ("fold", 0)
        
        state_key = self.get_state_key(game_state)
        hand_strength = self.estimate_hand_strength(game_state)
        
        # Exploration with decaying epsilon
        if training and random.random() < self.epsilon:
            chosen_action = random.choice(legal_actions)
            action, amount = chosen_action
            if action in ["raise", "all-in"]:
                amount = random.randint(amount[0], amount[1]) if isinstance(amount, tuple) else amount
            self.last_state = state_key
            self.last_action = (action, amount)
            return (action, amount)
        
        # Action selection based on Q-values and hand strength
        action_values = []
        for action, amount in legal_actions:
            if action == "raise":
                action_key = ("raise", amount[0])
            elif action == "all-in":
                action_key = ("raise", amount)  # Treat all-in as max raise
            else:
                action_key = (action, amount)
            
            # Base Q-value
            q_value = self.q_table[state_key][action_key]
            
            # Hand strength modifier - no bluffing means strong correlation between hand strength and action
            if action == "fold":
                q_value *= (1 - hand_strength)  # Less likely to fold with stronger hands
            elif action in ["call", "check"]:
                q_value *= (0.5 + hand_strength * 0.5)  # Moderate correlation
            elif action in ["raise", "all-in"]:
                q_value *= (0.3 + hand_strength * 0.7)  # Strong correlation
            
            action_values.append((action, amount, q_value))
        
        # Select action with highest modified Q-value
        best_value = max(v for _, _, v in action_values)
        best_actions = [(a, amt) for a, amt, v in action_values if v == best_value]
        chosen_action = random.choice(best_actions)
        
        action, amount = chosen_action
        if action in ["raise", "all-in"]:
            amount = random.randint(amount[0], amount[1]) if isinstance(amount, tuple) else amount
        
        self.last_state = state_key
        self.last_action = (action, amount)
        return (action, amount)
    
    def learn(self, next_state: PokerGameState, reward: float):
        if self.last_state is None or self.last_action is None:
            return
            
        action, amount = self.last_action
        next_state_key = self.get_state_key(next_state)
        
        # Enhanced reward based on hand strength
        hand_strength = self.estimate_hand_strength(next_state)
        reward = reward * (0.5 + hand_strength * 0.5)  # Scale reward by hand strength
        
        # For Q-update, normalize all-in as raise
        update_action = "raise" if action == "all-in" else action
        update_amount = amount if action == "all-in" else (amount[0] if action == "raise" and isinstance(amount, tuple) else amount)
        
        current_q = self.q_table[self.last_state][(update_action, update_amount)]
        
        next_legal_actions = next_state.get_legal_actions(self.player_index)
        if next_legal_actions and not next_state.is_hand_over():
            max_next_q = max(
                self.q_table[next_state_key][
                    ("raise", amt if a == "all-in" else (amt[0] if a == "raise" else amt))
                ] for a, amt in next_legal_actions
            )
        else:
            max_next_q = 0
        
        new_q = current_q + self.alpha * (reward + self.gamma * max_next_q - current_q)
        self.q_table[self.last_state][(update_action, update_amount)] = new_q
        
        # Decay epsilon to reduce exploration over time
        self.epsilon = max(0.05, self.epsilon * 0.999)
    
    def save_q_table(self, filename: str):
        with open(filename, 'wb') as f:
            q_table_regular = {k: dict(v) for k, v in self.q_table.items()}
            pickle.dump(q_table_regular, f)
    
    def load_q_table(self, filename: str):
        with open(filename, 'rb') as f:
            q_table_regular = pickle.load(f)
            self.q_table = defaultdict(lambda: defaultdict(lambda: self.initial_q_value))
            for k, v in q_table_regular.items():
                self.q_table[k].update(v)

def train_agents(episodes=10000):
    agents = [PokerQLearningAgent(i) for i in range(4)]
    
    # Create progress bar
    with tqdm(total=episodes, desc="Training agents") as pbar:
        for episode in range(episodes):
            game = PokerGameState(num_players=4)
            game.start_new_hand()
            
            while not game.is_hand_over():
                current_player = game.current_player
                agent = agents[current_player]
                
                action, amount = agent.get_action(game)
                game.apply_action(current_player, action, amount)
            
            # Calculate rewards
            winners = game.get_winner()
            for i in range(4):
                if i in winners:
                    # Reward based on both winning and hand strength
                    all_cards = game.player_hands[i] + game.community_cards
                    rank, _ = evaluate_hand(all_cards)
                    reward = (game.pot / len(winners)) * (0.5 + rank * 0.1)
                else:
                    # Penalize based on investment and hand strength
                    hand_strength = agents[i].estimate_hand_strength(game)
                    reward = -game.current_bets[i] * (1.5 - hand_strength * 0.5)
                
                agents[i].learn(game, reward)
            
            # Update progress bar every episode
            pbar.update(1)
            
            # Optional: Update description with current epsilon (exploration rate)
            if (episode + 1) % 100 == 0:
                pbar.set_description(f"Training agents (ε={agents[0].epsilon:.2f})")
    
    # Save trained agents
    for i, agent in enumerate(agents):
        agent.save_q_table(f"no_bluff_agent_{i}.pkl")
    
    return agents

def visible_poker_game(agents: List[PokerQLearningAgent], num_hands: int = 3, initial_chips: int = 1000):
    game = PokerGameState(num_players=4, initial_chips=initial_chips)
    hand_count = 0
    
    while hand_count < num_hands:
        # Reset if any player is broke
        if any(chips <= 0 for chips in game.players_chips):
            print("\n=== Resetting chip counts (player went broke) ===")
            game = PokerGameState(num_players=4, initial_chips=initial_chips)
        
        hand_count += 1
        print(f"\n=== Starting Hand #{hand_count} ===")
        print(f"Chip counts: {[f'P{i}: ${chips}' for i, chips in enumerate(game.players_chips)]}")
        game.start_new_hand()
        
        print(f"\nDealer: Player {game.dealer_position}")
        print(f"Small Blind: Player {(game.dealer_position + 1) % 4} (${game.small_blind})")
        print(f"Big Blind: Player {(game.dealer_position + 2) % 4} (${game.big_blind})")
        
        while not game.is_hand_over():
            print("\n" + "="*50)
            print(f"Round {'Pre-flop' if game.round == 0 else 'Flop' if game.round == 1 else 'Turn' if game.round == 2 else 'River'}")
            
            if game.community_cards:
                print(f"\nCommunity Cards: {', '.join(str(card) for card in game.community_cards)}")
            else:
                print("\nCommunity Cards: None yet")
            
            for i in range(4):
                status = [
                    f"Player {i}:",
                    f"Chips: ${game.players_chips[i]}",
                    f"Current Bet: ${game.current_bets[i]}",
                    "[FOLDED]" if game.folded[i] else "",
                ]
                if game.is_hand_over() or i == game.current_player:
                    status.append(f"Hand: {', '.join(str(card) for card in game.player_hands[i])}")
                else:
                    status.append("Hand: [Hidden]")
                print(" ".join(status))
            
            current_player = game.current_player
            print(f"\nPlayer {current_player}'s turn")
            legal_actions = game.get_legal_actions(current_player)
            
            print("Available actions:")
            for i, (action, amount) in enumerate(legal_actions):
                if action == "raise":
                    print(f"  {i+1}. Raise (${amount[0]} to ${amount[1]})")
                elif action == "all-in":
                    print(f"  {i+1}. ALL-IN (${amount})")
                else:
                    print(f"  {i+1}. {action.capitalize()} {f'${amount}' if amount > 0 else ''}")
            
            action, amount = agents[current_player].get_action(game, training=False)
            if action in ["raise", "all-in"]:
                amount = random.randint(amount[0], amount[1]) if isinstance(amount, tuple) else amount
            
            print(f"Player {current_player} chooses to {action.upper()} {f'${amount}' if amount > 0 else ''}")
            game.apply_action(current_player, action, amount)
        
        # Showdown
        print("\n" + "="*50)
        print("=== Hand Results ===")
        
        print("\nFinal Board:")
        print(f"Community Cards: {', '.join(str(card) for card in game.community_cards)}")
        
        print("\nPlayer Hands:")
        for i in range(4):
            hand_desc = f"Player {i}: {', '.join(str(card) for card in game.player_hands[i])}"
            if game.folded[i]:
                hand_desc += " [FOLDED]"
            print(hand_desc)
        
        winners = game.get_winner()
        if winners:
            win_amount = game.pot // len(winners)
            print(f"\nWinner(s): {', '.join(f'Player {w}' for w in winners)}")
            print(f"Each wins: ${win_amount}")
            
            hand_names = [
                "High Card", "One Pair", "Two Pair", "Three of a Kind",
                "Straight", "Flush", "Full House", "Four of a Kind", 
                "Straight Flush"
            ]
            for winner in winners:
                all_cards = game.player_hands[winner] + game.community_cards
                rank, _ = evaluate_hand(all_cards)
                print(f"Player {winner} has: {hand_names[rank]}")
        else:
            print("\nNo winners - all players folded")
        
        game.distribute_pot(winners if winners else [])
        
        print("\nUpdated Chip Counts:")
        for i in range(4):
            print(f"Player {i}: ${game.players_chips[i]}")
        
        # Pause between hands
        if hand_count < num_hands:
            input("\nPress Enter to continue to next hand...")

if __name__ == "__main__":
    agents = []
    try:
        # Try to load all agents
        for i in range(4):
            try:
                agent = PokerQLearningAgent(i)
                agent.load_q_table(f"no_bluff_agent_{i}.pkl")
                agents.append(agent)
                print(f"Loaded agent {i} from file")
            except FileNotFoundError:
                print(f"No saved agent found for player {i}, creating new one")
                agents.append(PokerQLearningAgent(i))
        
        # If we loaded at least one agent, ask if we want to continue training
        if any(os.path.exists(f"no_bluff_agent_{i}.pkl") for i in range(4)):
            response = input("Some agents were loaded. Continue training? (y/n): ")
            if response.lower() == 'y':
                agents = train_agents(episodes=10000)
        else:
            agents = train_agents(episodes=10000)
    except Exception as e:
        print(f"Error loading agents: {e}")
        print("Creating new agents...")
        agents = train_agents(episodes=10000)
    
    print("\nStarting visible poker game...")
    visible_poker_game(agents, num_hands=5, initial_chips=1000)

No saved agent found for player 0, creating new one
No saved agent found for player 1, creating new one
No saved agent found for player 2, creating new one
No saved agent found for player 3, creating new one


Training agents (ε=0.05): 100%|██████████| 10000/10000 [00:22<00:00, 453.60it/s]



Starting visible poker game...

=== Starting Hand #1 ===
Chip counts: ['P0: $1000', 'P1: $1000', 'P2: $1000', 'P3: $1000']

Dealer: Player 1
Small Blind: Player 2 ($10)
Big Blind: Player 3 ($20)

Round Pre-flop

Community Cards: None yet
Player 0: Chips: $1000 Current Bet: $0  Hand: 6s, 8s
Player 1: Chips: $1000 Current Bet: $0  Hand: [Hidden]
Player 2: Chips: $990 Current Bet: $10  Hand: [Hidden]
Player 3: Chips: $980 Current Bet: $20  Hand: [Hidden]

Player 0's turn
Available actions:
  1. Fold 
  2. Call $20
  3. Raise ($40 to $1000)
  4. ALL-IN ($1000)
Player 0 chooses to RAISE $622

Round Pre-flop

Community Cards: None yet
Player 0: Chips: $378 Current Bet: $622  Hand: [Hidden]
Player 1: Chips: $1000 Current Bet: $0  Hand: 3s, Tc
Player 2: Chips: $990 Current Bet: $10  Hand: [Hidden]
Player 3: Chips: $980 Current Bet: $20  Hand: [Hidden]

Player 1's turn
Available actions:
  1. Fold 
  2. Call $622
  3. ALL-IN ($1000)
Player 1 chooses to ALL-IN $1000

Round Pre-flop

Community C


Press Enter to continue to next hand... 



=== Resetting chip counts (player went broke) ===

=== Starting Hand #2 ===
Chip counts: ['P0: $1000', 'P1: $1000', 'P2: $1000', 'P3: $1000']

Dealer: Player 1
Small Blind: Player 2 ($10)
Big Blind: Player 3 ($20)

Round Pre-flop

Community Cards: None yet
Player 0: Chips: $1000 Current Bet: $0  Hand: 7h, Qd
Player 1: Chips: $1000 Current Bet: $0  Hand: [Hidden]
Player 2: Chips: $990 Current Bet: $10  Hand: [Hidden]
Player 3: Chips: $980 Current Bet: $20  Hand: [Hidden]

Player 0's turn
Available actions:
  1. Fold 
  2. Call $20
  3. Raise ($40 to $1000)
  4. ALL-IN ($1000)
Player 0 chooses to ALL-IN $1000

Round Pre-flop

Community Cards: None yet
Player 0: Chips: $0 Current Bet: $1000  Hand: [Hidden]
Player 1: Chips: $1000 Current Bet: $0  Hand: Ac, 7c
Player 2: Chips: $990 Current Bet: $10  Hand: [Hidden]
Player 3: Chips: $980 Current Bet: $20  Hand: [Hidden]

Player 1's turn
Available actions:
  1. Fold 
  2. Call $1000
  3. ALL-IN ($1000)
Player 1 chooses to ALL-IN $1000

Round 


Press Enter to continue to next hand... 



=== Resetting chip counts (player went broke) ===

=== Starting Hand #3 ===
Chip counts: ['P0: $1000', 'P1: $1000', 'P2: $1000', 'P3: $1000']

Dealer: Player 1
Small Blind: Player 2 ($10)
Big Blind: Player 3 ($20)

Round Pre-flop

Community Cards: None yet
Player 0: Chips: $1000 Current Bet: $0  Hand: Qd, Jc
Player 1: Chips: $1000 Current Bet: $0  Hand: [Hidden]
Player 2: Chips: $990 Current Bet: $10  Hand: [Hidden]
Player 3: Chips: $980 Current Bet: $20  Hand: [Hidden]

Player 0's turn
Available actions:
  1. Fold 
  2. Call $20
  3. Raise ($40 to $1000)
  4. ALL-IN ($1000)
Player 0 chooses to ALL-IN $1000

Round Pre-flop

Community Cards: None yet
Player 0: Chips: $0 Current Bet: $1000  Hand: [Hidden]
Player 1: Chips: $1000 Current Bet: $0  Hand: Qc, Kd
Player 2: Chips: $990 Current Bet: $10  Hand: [Hidden]
Player 3: Chips: $980 Current Bet: $20  Hand: [Hidden]

Player 1's turn
Available actions:
  1. Fold 
  2. Call $1000
  3. ALL-IN ($1000)
Player 1 chooses to ALL-IN $1000

Round 


Press Enter to continue to next hand... 



=== Resetting chip counts (player went broke) ===

=== Starting Hand #4 ===
Chip counts: ['P0: $1000', 'P1: $1000', 'P2: $1000', 'P3: $1000']

Dealer: Player 1
Small Blind: Player 2 ($10)
Big Blind: Player 3 ($20)

Round Pre-flop

Community Cards: None yet
Player 0: Chips: $1000 Current Bet: $0  Hand: 7s, Ac
Player 1: Chips: $1000 Current Bet: $0  Hand: [Hidden]
Player 2: Chips: $990 Current Bet: $10  Hand: [Hidden]
Player 3: Chips: $980 Current Bet: $20  Hand: [Hidden]

Player 0's turn
Available actions:
  1. Fold 
  2. Call $20
  3. Raise ($40 to $1000)
  4. ALL-IN ($1000)
Player 0 chooses to CALL $20

Round Pre-flop

Community Cards: None yet
Player 0: Chips: $980 Current Bet: $20  Hand: [Hidden]
Player 1: Chips: $1000 Current Bet: $0  Hand: 9d, 9h
Player 2: Chips: $990 Current Bet: $10  Hand: [Hidden]
Player 3: Chips: $980 Current Bet: $20  Hand: [Hidden]

Player 1's turn
Available actions:
  1. Fold 
  2. Call $20
  3. Raise ($40 to $1000)
  4. ALL-IN ($1000)
Player 1 chooses to 


Press Enter to continue to next hand... 



=== Resetting chip counts (player went broke) ===

=== Starting Hand #5 ===
Chip counts: ['P0: $1000', 'P1: $1000', 'P2: $1000', 'P3: $1000']

Dealer: Player 1
Small Blind: Player 2 ($10)
Big Blind: Player 3 ($20)

Round Pre-flop

Community Cards: None yet
Player 0: Chips: $1000 Current Bet: $0  Hand: 3h, 8d
Player 1: Chips: $1000 Current Bet: $0  Hand: [Hidden]
Player 2: Chips: $990 Current Bet: $10  Hand: [Hidden]
Player 3: Chips: $980 Current Bet: $20  Hand: [Hidden]

Player 0's turn
Available actions:
  1. Fold 
  2. Call $20
  3. Raise ($40 to $1000)
  4. ALL-IN ($1000)
Player 0 chooses to RAISE $451

Round Pre-flop

Community Cards: None yet
Player 0: Chips: $549 Current Bet: $451  Hand: [Hidden]
Player 1: Chips: $1000 Current Bet: $0  Hand: 9h, 7d
Player 2: Chips: $990 Current Bet: $10  Hand: [Hidden]
Player 3: Chips: $980 Current Bet: $20  Hand: [Hidden]

Player 1's turn
Available actions:
  1. Fold 
  2. Call $451
  3. Raise ($902 to $1000)
  4. ALL-IN ($1000)
Player 1 choose

In [None]:
agents = train_agents(episodes=10000)
