# 🎯 DQN Baccarat Player – Google Colab Notebook

In [2]:
# 📚 Install Dependencies
!pip install gym numpy torch matplotlib

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [1]:
# 🧠 Baccarat Environment
import random
import numpy as np
import gym
from gym import spaces

class BaccaratEnv(gym.Env):
    def __init__(self):
        super(BaccaratEnv, self).__init__()
        self.action_space = spaces.Discrete(4)  # 0: Banker, 1: Player, 2: Tie, 3: Sit Out
        self.observation_space = spaces.Box(low=0, high=3, shape=(10,), dtype=np.int32)
        self.reset()

    def reset(self):
        self.past_results = [random.choice([0, 1]) for _ in range(10)]
        self.bankroll = 1000
        self.done = False
        return np.array(self.past_results)

    def step(self, action):
        if self.done:
            return self.past_results, 0, True, {}

        outcome = random.choices([0, 1, 2], weights=[0.4586, 0.4462, 0.0952])[0]
        reward = 0
        if action == 3:
            reward = 0
        elif action == outcome:
            if outcome == 0:
                reward = 0.95 * 10
            elif outcome == 1:
                reward = 10
            elif outcome == 2:
                reward = 80
        else:
            reward = -10

        self.bankroll += reward
        self.past_results.pop(0)
        self.past_results.append(outcome)

        if self.bankroll <= 0:
            self.done = True

        return np.array(self.past_results), reward, self.done, {"bankroll": self.bankroll}


In [3]:
# 🎯 DQN Agent
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import random

class DQN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQN, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, output_dim)
        )

    def forward(self, x):
        return self.net(x.float())


In [5]:
# 🔁 Training Loop
env = BaccaratEnv()
model = DQN(10, 4)
target_model = DQN(10, 4)
target_model.load_state_dict(model.state_dict())

optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.MSELoss()
replay_buffer = deque(maxlen=5000)

def select_action(state, epsilon):
    if random.random() < epsilon:
        return env.action_space.sample()
    else:
        with torch.no_grad():
            return torch.argmax(model(torch.tensor(state))).item()

def train_model():
    if len(replay_buffer) < 64:
        return
    batch = random.sample(replay_buffer, 64)
    states, actions, rewards, next_states, dones = zip(*batch)

    states = torch.tensor(states)
    actions = torch.tensor(actions)
    rewards = torch.tensor(rewards)
    next_states = torch.tensor(next_states)
    dones = torch.tensor(dones, dtype=torch.float32)

    q_values = model(states).gather(1, actions.unsqueeze(1)).squeeze()
    next_q_values = target_model(next_states).max(1)[0]
    expected_q = rewards + (1 - dones) * 0.99 * next_q_values

    loss = loss_fn(q_values, expected_q.detach())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


In [None]:
# 🚀 Run Training
num_episodes = 5000
epsilon = 1.0
epsilon_decay = 0.999
min_epsilon = 0.1

episode_rewards = []

for episode in range(num_episodes):
    state = env.reset()
    total_reward = 0
    done = False

    while not done:
        action = select_action(state, epsilon)
        next_state, reward, done, info = env.step(action)
        replay_buffer.append((state, action, reward, next_state, done))
        train_model()
        state = next_state
        total_reward += reward

    epsilon = max(min_epsilon, epsilon * epsilon_decay)
    episode_rewards.append(total_reward)

    if episode % 100 == 0:
        target_model.load_state_dict(model.state_dict())
        print(f"Episode {episode}, Reward: {total_reward}, Bankroll: {info['bankroll']}")


  states = torch.tensor(states)


Episode 0, Reward: -1009.0, Bankroll: -9.0
Episode 100, Reward: -1003.0, Bankroll: -3.0
Episode 200, Reward: -1000.0, Bankroll: 0.0
Episode 300, Reward: -1006.5, Bankroll: -6.5
Episode 400, Reward: -1000.0, Bankroll: 0.0
Episode 500, Reward: -1000.5, Bankroll: -0.5
Episode 600, Reward: -1001.5, Bankroll: -1.5
Episode 700, Reward: -1003.0, Bankroll: -3.0
Episode 800, Reward: -1005.0, Bankroll: -5.0


In [None]:
# 📊 Plot Training Progress
import matplotlib.pyplot as plt
plt.plot(episode_rewards)
plt.xlabel("Episode")
plt.ylabel("Total Reward")
plt.title("DQN Baccarat Agent Training Progress")
plt.show()


In [None]:
# 🧠 Function to Suggest the Next Bet Based on Recent Outcomes
def suggest_bet(past_results):
    """
    past_results: list of 10 recent results (0=Banker, 1=Player, 2=Tie)
    returns: bet suggestion from the trained model
    """
    state = torch.tensor(past_results).unsqueeze(0)
    with torch.no_grad():
        q_values = model(state)
    action = torch.argmax(q_values).item()
    action_map = {0: 'Banker', 1: 'Player', 2: 'Tie', 3: 'Sit Out'}
    return action_map[action]

# 🕹️ Interactive Assistant Function
def run_assistant():
    print("🎲 Real-Time Baccarat Assistant")
    print("🔢 Enter last 10 outcomes using numbers: 0 = Banker, 1 = Player, 2 = Tie")
    print("💡 Type 'exit' to quit.")

    while True:
        user_input = input("Your last 10 outcomes: ")
        if user_input.lower() == 'exit':
            print("👋 Exiting assistant.")
            break
        try:
            past_results = list(map(int, user_input.strip().split()))
            if len(past_results) != 10 or any(x not in [0, 1, 2] for x in past_results):
                print("⚠️ Please enter exactly 10 numbers using 0, 1, or 2.")
                continue
            suggestion = suggest_bet(past_results)
            print(f"🎯 Model suggests: **{suggestion}**")
        except Exception as e:
            print(f"❌ Error: {e}")


In [None]:
run_assistant()
