In [54]:
import random
import itertools
import numpy as np
from collections import defaultdict, deque
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import os

class Card:
    def __init__(self, rank, suit):
        self.rank = rank
        self.suit = suit

    def __repr__(self):
        return f"{self.rank}{self.suit}"

    def get_value(self):
        return {'2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7,
                '8': 8, '9': 9, 'T': 10, 'J': 11, 'Q': 12, 'K': 13, 'A': 14}[self.rank]

class Deck:
    def __init__(self):
        ranks = '23456789TJQKA'
        suits = 'hdcs'
        self.cards = [Card(r, s) for r in ranks for s in suits]
        self.shuffle()

    def shuffle(self):
        random.shuffle(self.cards)

    def deal(self):
        return self.cards.pop() if self.cards else None

def evaluate_hand(cards):
    if len(cards) < 5:
        return (0, [])  # Not enough cards for evaluation
    
    best_rank, best_tiebreakers = -1, []
    for combo in itertools.combinations(cards, 5):
        vals = sorted([c.get_value() for c in combo], reverse=True)
        suits = [c.suit for c in combo]
        
        # Check for flush and straight
        flush = len(set(suits)) == 1
        uniq = sorted(set(vals), reverse=True)
        straight = False
        
        for i in range(len(uniq) - 4):
            if uniq[i] - uniq[i+4] == 4:
                straight = True
                high_card = uniq[i]
                break
                
        if set([14, 5, 4, 3, 2]).issubset(set(vals)):
            straight, high_card = True, 5
            
        counts = defaultdict(int)
        for v in vals: counts[v] += 1
        count_vals = sorted(counts.items(), key=lambda x: (-x[1], -x[0]))
        
        if flush and straight:
            current_rank, current_tiebreakers = 8, [high_card]
        elif count_vals[0][1] == 4:
            current_rank, current_tiebreakers = 7, [count_vals[0][0], count_vals[1][0]]
        elif count_vals[0][1] == 3 and count_vals[1][1] == 2:
            current_rank, current_tiebreakers = 6, [count_vals[0][0], count_vals[1][0]]
        elif flush:
            current_rank, current_tiebreakers = 5, vals[:5]
        elif straight:
            current_rank, current_tiebreakers = 4, [high_card]
        elif count_vals[0][1] == 3:
            current_rank, current_tiebreakers = 3, [count_vals[0][0]] + sorted(set(vals)-{count_vals[0][0]}, reverse=True)[:2]
        elif count_vals[0][1] == 2 and count_vals[1][1] == 2:
            pairs = sorted([count_vals[0][0], count_vals[1][0]], reverse=True)
            current_rank, current_tiebreakers = 2, pairs + [max(set(vals)-set(pairs))]
        elif count_vals[0][1] == 2:
            current_rank, current_tiebreakers = 1, [count_vals[0][0]] + sorted(set(vals)-{count_vals[0][0]}, reverse=True)[:3]
        else:
            current_rank, current_tiebreakers = 0, vals[:5]
            
        if current_rank > best_rank or (current_rank == best_rank and current_tiebreakers > best_tiebreakers):
            best_rank, best_tiebreakers = current_rank, current_tiebreakers
            
    return best_rank, best_tiebreakers

class PokerGameState:
    def __init__(self, num_players=4, initial_chips=1000):
        self.num_players = num_players
        self.initial_chips = initial_chips
        self.small_blind = 10
        self.big_blind = 20
        self.players_chips = [initial_chips] * num_players
        self.reset()

    def reset(self):
        """Full reset for a new hand while preserving chip counts"""
        self.current_bets = [0] * self.num_players
        self.pot = 0
        self.deck = Deck()
        self.community_cards = []
        self.player_hands = []
        self.folded = [False] * self.num_players
        self.round = 0
        self.actions = []
        self.last_raise = self.big_blind
        
        # Deal cards (with validation)
        for _ in range(self.num_players):
            card1, card2 = self.deck.deal(), self.deck.deal()
            if None in (card1, card2):
                self.deck = Deck()  # Reshuffle if out of cards
                card1, card2 = self.deck.deal(), self.deck.deal()
            self.player_hands.append([card1, card2])
        
        # Rotate dealer and set positions
        self.dealer_position = (self.dealer_position + 1) % self.num_players if hasattr(self, 'dealer_position') else 0
        self.current_player = (self.dealer_position + 3) % self.num_players  # UTG
        
        # Post blinds (with chip protection)
        sb_pos = (self.dealer_position + 1) % self.num_players
        bb_pos = (self.dealer_position + 2) % self.num_players
        
        sb_post = min(self.small_blind, self.players_chips[sb_pos])
        bb_post = min(self.big_blind, self.players_chips[bb_pos])
        
        self.players_chips[sb_pos] -= sb_post
        self.players_chips[bb_pos] -= bb_post
        self.current_bets[sb_pos] = sb_post
        self.current_bets[bb_pos] = bb_post
        self.pot = sb_post + bb_post
        
        self.players_acted_this_round = [False] * self.num_players
        self.num_checks = 0

    def get_legal_actions(self, player):
        if self.folded[player] or self.players_chips[player] <= 0:
            return []
            
        actions = [('fold', 0)]
        call_amt = max(self.current_bets) - self.current_bets[player]
        
        if call_amt == 0:
            actions.append(('check', 0))
        else:
            actions.append(('call', call_amt))
        
        if not self.all_players_checked():
            min_raise = max(self.last_raise, self.big_blind)
            max_raise = self.players_chips[player] - call_amt
            if max_raise > min_raise:
                actions.append(('raise', min_raise))
        
        if self.players_chips[player] > 0:
            actions.append(('all-in', self.players_chips[player]))
            
        return actions

    def all_players_checked(self):
        active_players = [i for i in range(self.num_players) if not self.folded[i] and self.players_chips[i] > 0]
        return all(self.players_acted_this_round[p] for p in active_players) and self.num_checks == len(active_players)

    def apply_action(self, player, action, amount=0):
        self.players_acted_this_round[player] = True
        
        if action == 'fold':
            self.folded[player] = True
            self.num_checks = 0
        elif action == 'call':
            amt = min(amount, self.players_chips[player])
            self.players_chips[player] -= amt
            self.current_bets[player] += amt
            self.pot += amt
            self.num_checks = 0
        elif action == 'raise':
            total = amount + max(0, max(self.current_bets) - self.current_bets[player])
            actual = min(total, self.players_chips[player])
            self.players_chips[player] -= actual
            self.current_bets[player] += actual
            self.pot += actual
            self.last_raise = amount
            self.num_checks = 0
        elif action == 'check':
            self.num_checks += 1
        elif action == 'all-in':
            amt = self.players_chips[player]
            self.players_chips[player] = 0
            self.current_bets[player] += amt
            self.pot += amt
            self.num_checks = 0
            
        self.actions.append((player, action, amount))
        
        # Check round completion
        active_players = [i for i in range(self.num_players) if not self.folded[i] and self.players_chips[i] > 0]
        round_complete = all(self.current_bets[p] == max(self.current_bets) for p in active_players)
        
        if round_complete or self.all_players_checked():
            self.advance_round()
            
        # Move to next active player
        self.move_to_next_player()

    def advance_round(self):
        self.round += 1
        self.current_bets = [0] * self.num_players
        self.players_acted_this_round = [False] * self.num_players
        self.num_checks = 0
        self.last_raise = self.big_blind
        
        if self.round == 1:  # Flop
            for _ in range(3):
                card = self.deck.deal()
                if card: self.community_cards.append(card)
        elif self.round in [2, 3]:  # Turn/River
            card = self.deck.deal()
            if card: self.community_cards.append(card)
            
        self.move_to_first_active_player()

    def move_to_next_player(self):
        start = self.current_player
        while True:
            self.current_player = (self.current_player + 1) % self.num_players
            if not self.folded[self.current_player] and self.players_chips[self.current_player] > 0:
                break
            if self.current_player == start:  # All players folded or bankrupt
                break

    def move_to_first_active_player(self):
        self.current_player = (self.dealer_position + 1) % self.num_players
        while self.folded[self.current_player] or self.players_chips[self.current_player] <= 0:
            self.current_player = (self.current_player + 1) % self.num_players

    def is_hand_over(self):
        active_players = [i for i in range(self.num_players) if not self.folded[i] and self.players_chips[i] > 0]
        return len(active_players) <= 1 or self.round >= 4

class CNN_DQN(nn.Module):
    def __init__(self, input_shape, output_size):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(input_shape[0], 32, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Flatten()
        )
        self._conv_out = self._get_conv_out(input_shape)
        self.fc = nn.Sequential(
            nn.Linear(self._conv_out, 256),
            nn.ReLU(),
            nn.Linear(256, output_size)
        )

    def _get_conv_out(self, shape):
        with torch.no_grad():
            return self.conv(torch.zeros(1, *shape)).shape[1]

    def forward(self, x):
        return self.fc(self.conv(x))

class PokerAgent:
    def __init__(self, input_shape, action_size, device):
        self.device = device
        self.action_size = action_size
        self.model = CNN_DQN(input_shape, action_size).to(device)
        self.target_model = CNN_DQN(input_shape, action_size).to(device)
        self.target_model.load_state_dict(self.model.state_dict())
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.0001)
        self.memory = deque(maxlen=50000)
        self.batch_size = 128
        self.gamma = 0.99
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.9995
        self.update_target_every = 100
        self.steps = 0

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        state = torch.FloatTensor(state).unsqueeze(0).to(self.device)
        with torch.no_grad():
            q_values = self.model(state)
        return torch.argmax(q_values).item()

    def replay(self):
        if len(self.memory) < self.batch_size:
            return None
            
        batch = random.sample(self.memory, self.batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)
        
        states = torch.FloatTensor(np.array(states)).to(self.device)
        next_states = torch.FloatTensor(np.array(next_states)).to(self.device)
        actions = torch.LongTensor(actions).to(self.device)
        rewards = torch.FloatTensor(rewards).to(self.device)
        dones = torch.FloatTensor(dones).to(self.device)

        current_q = self.model(states).gather(1, actions.unsqueeze(1))
        next_q = self.target_model(next_states).max(1)[0].detach()
        target_q = rewards + (1 - dones) * self.gamma * next_q
        
        loss = nn.MSELoss()(current_q.squeeze(), target_q)
        
        self.optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
        self.optimizer.step()
        
        self.steps += 1
        if self.steps % self.update_target_every == 0:
            self.target_model.load_state_dict(self.model.state_dict())
            
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
        return loss.item()

def generate_state_tensor(game_state, player_idx):
    state = np.zeros((4, 13, 13), dtype=np.float32)
    
    # Private cards
    for i, card in enumerate(game_state.player_hands[player_idx]):
        if card:
            val = card.get_value() - 2
            suit = {'h':0, 'd':1, 'c':2, 's':3}[card.suit]
            state[0, suit, val] = 1.0
    
    # Community cards
    for i, card in enumerate(game_state.community_cards):
        if card:
            val = card.get_value() - 2
            suit = {'h':0, 'd':1, 'c':2, 's':3}[card.suit]
            state[1, suit, val] = 1.0
    
    # Betting info
    state[2, :, :] = min(1.0, sum(game_state.current_bets) / (game_state.big_blind * 20))
    
    # Player info
    state[3, 0, player_idx] = 1.0
    state[3, 1, 0] = game_state.players_chips[player_idx] / game_state.initial_chips
    
    return state

def train_agents(episodes=1000):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_shape = (4, 13, 13)
    action_size = 5
    agents = [PokerAgent(input_shape, action_size, device) for _ in range(4)]
    
    stats = {
        'rewards': [[] for _ in range(4)],
        'losses': [[] for _ in range(4)],
        'bankruptcies': [0]*4
    }
    
    action_name_map = {'fold':0, 'check':1, 'call':2, 'raise':3, 'all-in':4}
    
    for episode in tqdm(range(episodes), desc="Training"):
        game = PokerGameState()
        initial_chips = game.players_chips.copy()
        
        while not game.is_hand_over():
            p = game.current_player
            if game.folded[p] or game.players_chips[p] <= 0:
                game.move_to_next_player()
                continue
                
            state = generate_state_tensor(game, p)
            legal_actions = game.get_legal_actions(p)
            legal_indices = [action_name_map[a] for a, _ in legal_actions]
            
            action_idx = agents[p].act(state)
            
            # Find the corresponding legal action
            if action_idx in legal_indices:
                # Find position of this action in legal actions
                pos = legal_indices.index(action_idx)
                a_name, amount = legal_actions[pos]
            else:
                # Choose random legal action
                pos = random.randint(0, len(legal_actions)-1)
                a_name, amount = legal_actions[pos]
                
            game.apply_action(p, a_name, amount)
            
            next_state = generate_state_tensor(game, p)
            agents[p].remember(state, action_idx, 0, next_state, game.is_hand_over())
        
        # Calculate rewards
        for i, agent in enumerate(agents):
            chip_change = game.players_chips[i] - initial_chips[i]
            reward = np.tanh(chip_change / (game.big_blind * 5))
            
            if game.players_chips[i] <= 0:
                reward = -2.0
                stats['bankruptcies'][i] += 1
                game.players_chips[i] = 10  # Small rebuy
                
            if agent.memory:
                last_state, last_action, _, last_next_state, last_done = agent.memory[-1]
                agent.memory[-1] = (last_state, last_action, reward, last_next_state, True)
                stats['rewards'][i].append(reward)
            
            loss = agent.replay()
            if loss is not None:
                stats['losses'][i].append(loss)
    
    return agents, stats
def simulate_game(agents, num_hands=5):
    game = PokerGameState()
    for hand in range(num_hands):
        game.reset()
        print(f"\nHand {hand+1}:")
        print(f"Chips: {game.players_chips}")
        print(f"Dealer: {game.dealer_position}")
        
        while not game.is_hand_over():
            p = game.current_player
            if game.folded[p] or game.players_chips[p] <= 0:
                game.move_to_next_player()
                continue
                
            state = generate_state_tensor(game, p)
            legal_actions = game.get_legal_actions(p)
            action = agents[p].act(state)
            
            a_name, amount = legal_actions[action - [a for a,_ in legal_actions].index(['fold','check','call','raise','all-in'][action])]
            print(f"Player {p} {a_name.upper()} {amount if amount else ''}")
            game.apply_action(p, a_name, amount)
        
        print("\nFinal Stacks:", game.players_chips)
        print("Community:", game.community_cards)
        for i, hand in enumerate(game.player_hands):
            print(f"Player {i}: {hand} {'FOLDED' if game.folded[i] else ''}")

if __name__ == "__main__":
    trained_agents, stats = train_agents(episodes=1000)
    simulate_game(trained_agents, num_hands=3)

Training: 100%|██████████| 1000/1000 [01:28<00:00, 11.26it/s]


Hand 1:
Chips: [1000, 990, 970, 980]
Dealer: 1





ValueError: 'check' is not in list

In [53]:
trained_agents = train_cnn_agents(episodes=1000)



Training on cpu

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Episode 1/1000 | Avg Reward: -2.000 | Avg Loss: nan | ε: 1.000 | Chips: [10, 10, 980, 10]
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 0 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 0 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Fold

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 0 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 1 turn | Folded: False | Chips: 960
Player 2 turn | Folded: False | Chips: 940
Player 2 turn | Folded: False | Chips: 920
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 980
Player 0 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 940
Player 3 turn | Folded: False | Chips: 940
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Pla

Episode 131/1000 | Avg Reward: -0.915 | Avg Loss: 0.094 | ε: 0.959 | Chips: [960, 990, 940, 960]
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 0 turn | Folded: False | C

Episode 171/1000 | Avg Reward: -0.829 | Avg Loss: 0.128 | ε: 0.921 | Chips: [1000, 980, 10, 980]
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 0 turn | Folded: False | Chips: 980
Player 0 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | 

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Episode 211/1000 | Avg Reward: -0.980 | Avg Loss: 0.110 | ε: 0.885 | Chips: [1000, 10, 10, 960]
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 2 turn | Folded: False | Ch

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 2 turn | Folded: False | Chips: 980
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 960
Player 0 turn | Folded: False | Chips: 960
Player 1 turn | Folded: False | Chips: 940
Player 2 turn | Folded: False | Chips: 940
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Pla

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 0 turn | Folded: False | Chips: 980
Player 0 turn | Folded: False | Chips: 960
P

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 960
Player 1 turn | Folded: False | Chips: 940
Player 2 turn | Folded: False | Chips: 940
Player 3 turn | Folded: False | Chips: 920
Player 3 turn | Folded: False | Chips: 920
Player 3 turn | Folded: False | Chips: 920
Player 1 turn | Folded: False | Chips: 920
Player 3 turn | Folded: False | Chips: 900
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 1 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Play

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 0 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 960
Player 0 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 980
P

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Episode 401/1000 | Avg Reward: -0.955 | Avg Loss: 0.059 | ε: 0.732 | Chips: [10, 10, 980, 1000]
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | 

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 100

Episode 481/1000 | Avg Reward: -0.924 | Avg Loss: 0.050 | ε: 0.676 | Chips: [10, 10, 980, 920]
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 960
Player 0 turn | Folded: False | Chips: 960
Player 1 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 0 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 0 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 960
Player 0 turn | Folded: False | Chips: 940
Player 3 turn | Folded: False | Chips: 920
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
P

Episode 551/1000 | Avg Reward: -0.737 | Avg Loss: 0.048 | ε: 0.630 | Chips: [10, 960, 980, 1000]
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 980
Player 2 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 960
Player 1 turn | Folded: False | Chips: 960
Player 2 turn | Folded: False | Chi

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 960
Player 0 turn | Folded: False | Chips: 960
Player 2 turn | Folded: False | Chips: 940
Player 2 turn | Folded: False | Chips: 940
Player 2 turn | Folded: False | Chips: 940
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 2 turn | Folded: False | Chips: 960
Player 1 turn | Folded: False | Chips: 960
Episode 591/1000 | Avg Reward: -0.736 | Avg Lo

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 0 turn | Folded: False | Chips: 980
Player 1 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 980
Player 0 turn | Folded: False | Chips: 960
Player 1 turn | Folded: False | Chips: 940
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 980
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Pla

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 1 turn | Folded: False | Chips: 980
Player 2 turn | Folded: False | Chips: 960
Episode 701/1000 | Avg Reward: -0.668 | Avg Loss: 0.046 | ε: 0.542 | Chips: [1000, 960, 960, 1000]
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 960
Player 2 turn | Folded: False | Chips: 940
Player 2 turn | Folded: False | Chips: 940
Player 2 turn | Folded: False | Chips: 940
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | C

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 960
Player 1 turn | Folded: False | Chips: 940
Episode 741/1000 | Avg Reward: -0.655 | Avg Loss: 0.043 | ε: 0.521 | Chips: [1000, 10, 940, 10]
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Ch

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 2 turn | Folded: False | Chips: 980
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 980
Player 1 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
P

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 2 turn | Folded: False | Chips: 980
Player 0 turn | Folded: False | Chips: 980
Player 1 turn | Folded: False | Chips: 980
Player 0 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 2 turn | Folded: False | Chips: 980
Player 2 turn | Folded: False | Chips: 980
Player 0 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Pl

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Episode 851/1000 | Avg Reward: -0.625 | Avg Loss: 0.042 | ε: 0.467 | Chips: [1000, 10, 980, 980]
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 960
Player 1 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 940
Player 1 turn | Folded: False | Chips: 940
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chi

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 980
Player 1 turn | Folded: False | Chips: 960
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 2 turn | Folded: False | Chips: 980
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Episode 891/1000 | Avg Reward: -0.669 | Avg 

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 2 turn | Folded: False | Chips: 980
Player 2 turn | Folded: False | Chips: 980
Player 1 turn | Folded: False | Chips: 980
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
E

Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 3 turn | Folded: False | Chips: 1000
Player 0 turn | Folded: False | Chips: 1000
Player 1 turn | Folded: False | Chips: 990
Player 2 turn | Folded: False | Chips: 980
Player 0 turn | Folded: False | Chips: 9

In [55]:
simulate_game(trained_agents, num_hands=20)


Hand 1:
Chips: [1000, 990, 970, 980]
Dealer: 1
Player 0 CALL 20
Player 1 FOLD 
Player 2 CALL 10
Player 3 FOLD 
Player 0 CHECK 
Player 0 FOLD 

Final Stacks: [980, 990, 960, 980]
Community: [2d, 4d, 9c, Ts, 4s]
Player 0: [9h, 5s] FOLDED
Player 1: [Ad, Qs] FOLDED
Player 2: [7s, 4c] 
Player 3: [Qc, Tc] FOLDED

Hand 2:
Chips: [960, 990, 960, 970]
Dealer: 2
Player 1 CALL 20
Player 2 CALL 20
Player 3 FOLD 
Player 1 CHECK 
Player 1 CHECK 
Player 1 CHECK 

Final Stacks: [960, 970, 940, 970]
Community: [7d, 9c, 4h, 7c, Kh]
Player 0: [Js, Ad] 
Player 1: [Kc, Th] 
Player 2: [Ah, Tc] 
Player 3: [5c, 5s] FOLDED

Hand 3:
Chips: [950, 950, 940, 970]
Dealer: 3
Player 2 CALL 20
Player 3 CALL 20
Player 0 CALL 10
Player 1 FOLD 
Player 2 FOLD 
Player 3 FOLD 

Final Stacks: [940, 950, 920, 950]
Community: [5d, Ad, 9c, 5s, 8c]
Player 0: [2d, 7d] 
Player 1: [Tc, Ah] FOLDED
Player 2: [6c, 9h] FOLDED
Player 3: [Ac, Qd] FOLDED

Hand 4:
Chips: [940, 940, 900, 950]
Dealer: 0
Player 3 FOLD 
Player 0 CALL 20
Playe

ValueError: 'call' is not in list

In [35]:
def verify_training_setup():
    print("\nTraining Setup Verification:")
    print(f"Batch size: {agents[0].batch_size}")
    print(f"Memory size: {len(agents[0].memory)}")
    print(f"Model architecture:\n{agents[0].model}")
    print(f"Optimizer: {agents[0].optimizer}")
verify_training_setup()


Training Setup Verification:


NameError: name 'agents' is not defined