In [None]:
!pip install treys



In [None]:
import random
from treys import Card, Evaluator
from collections import Counter
from itertools import combinations

# --- Card mapping ---
rank_map = {'2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7,
            '8': 8, '9': 9, 'T': 10, 'J': 11, 'Q': 12, 'K': 13, 'A': 14}
suit_map = {'♠': 's', '♥': 'h', '♦': 'd', '♣': 'c'}

def convert_to_treys(card_str):
    return Card.new(card_str[0] + suit_map[card_str[1]])

def hand_label(hand):
    ranks = sorted([card[0] for card in hand], key=lambda r: '23456789TJQKA'.index(r), reverse=True)
    suited = hand[0][1] == hand[1][1]
    return ''.join(ranks) + ('s' if suited else 'o')

evaluator = Evaluator()

def Reward(hand1, hand2, community):
    hand1_t = [convert_to_treys(c) for c in hand1]
    hand2_t = [convert_to_treys(c) for c in hand2]
    comm_t = [convert_to_treys(c) for c in community]
    score1 = evaluator.evaluate(comm_t, hand1_t)
    score2 = evaluator.evaluate(comm_t, hand2_t)

    class2 = evaluator.get_rank_class(score2)
    hand_type = evaluator.class_to_string(class2)

    if hand_type == "High Card":
        return 0
    elif hand_type == "One Pair":
        full = hand2 + community
        ranks = [c[0] for c in full]
        count = Counter(ranks)
        pairs = [r for r, c in count.items() if c >= 2]
        highest = max([rank_map[p] for p in pairs]) if pairs else 0
        if highest < 4:
            return 0

    if score1 < score2:
        return 1
    elif score2 < score1:
        return -1
    return 0

def Multiplier(hand1, community):
    hand1_t = [convert_to_treys(c) for c in hand1]
    comm_t = [convert_to_treys(c) for c in community]
    score = evaluator.evaluate(comm_t, hand1_t)
    class_num = evaluator.get_rank_class(score)
    hand_name = evaluator.class_to_string(class_num)
    return {
        "High Card": 1, "One Pair": 1, "Two Pair": 1, "Three of a Kind": 1,
        "Straight": 1, "Flush": 3, "Full House": 4, "Four of a Kind": 10,
        "Straight Flush": 25, "Royal Flush": 100
    }.get(hand_name, 1)

def finalReward(hand1, hand2, community, a):
    r = Reward(hand1, hand2, community)
    if r == 1:
        return (1 + Multiplier(hand1, community)) + a
    if r == 0:
        return 1
    if r == -1:
        if a == -1:
          return 0
        else:
          return -a

# Q-table
Q = {}

# Uniform exploration of all starting hands
deck = [r + s for r in rank_map for s in suit_map]
starting_hands = list(combinations(deck, 2))

for hand1 in starting_hands:
    hand1 = list(hand1)
    for i in range(100):  # simulate 100 games per hand
        # Deal the rest of the cards (excluding hand1)
        remaining_deck = [c for c in deck if c not in hand1]
        random.shuffle(remaining_deck)
        hand2 = [remaining_deck.pop(), remaining_deck.pop()]
        community = [remaining_deck.pop() for _ in range(5)]

        t1 = tuple(sorted(hand1))
        t2 = tuple(sorted(hand1 + community[:3]))
        t3 = tuple(sorted(hand1 + community))

        a = random.choice([0, 4])  # preflop: check or raise
        Q[(t1, a)] = (1 / (i + 1)) * finalReward(hand1, hand2, community, a) + (i / (i + 1)) * Q.get((t1, a), 0)

        if a == 0:
            a2 = random.choice([0, 2])
            Q[(t2, a2)] = (1 / (i + 1)) * finalReward(hand1, hand2, community, a2) + (i / (i + 1)) * Q.get((t2, a2), 0)

            if a2 == 0:
                a3 = random.choice([-1, 1])
                Q[(t3, a3)] = (1 / (i + 1)) * finalReward(hand1, hand2, community, a3) + (i / (i + 1)) * Q.get((t3, a3), 0)



ModuleNotFoundError: No module named 'treys'

In [None]:
best_action_q_values = []

for (state, action) in Q:
    if len(state) != 2:
        continue  # only preflop states

    # only evaluate once per unique state
    if any(s == state for (s, _) in best_action_q_values):
        continue

    q_check = Q.get((state, 0), 0)
    q_raise = Q.get((state, 4), 0)
    best_q = max(q_check, q_raise)
    best_action_q_values.append((state, best_q))

# Extract just the values and compute average
avg_q_value = sum(q for _, q in best_action_q_values) / len(best_action_q_values)

print(f"Average Q-value of best preflop action: {avg_q_value:.2f}")

NameError: name 'Q' is not defined

In [None]:
# --- Q-values for preflop hands (2 cards only) ---
def print_preflop_q_values(hands_to_check=None):
    print("\nPreflop Q-values:")
    qvals = []

    for (state, action) in Q:
        if len(state) != 2:
            continue  # only preflop

        label = hand_label(state)
        if hands_to_check and label not in hands_to_check:
            continue

        if action not in [0, 4]:
            continue

        entry = next((x for x in qvals if x['hand'] == label), None)
        if not entry:
            entry = {'hand': label, 'check': Q.get((state, 0), 0), 'raise': Q.get((state, 4), 0)}
            qvals.append(entry)

    # Sort for display
    qvals_sorted = sorted(qvals, key=lambda x: ('23456789TJQKA'.index(x['hand'][0]), x['hand']), reverse=True)

    for q in qvals_sorted:
        print(f"{q['hand']:>4} | Check: {q['check']:.2f} | Raise: {q['raise']:.2f}")

# 🔧 To print Q-values for all hands:
print_preflop_q_values()

# 🔧 Or to check specific hands only:
# print_preflop_q_values(hands_to_check=['AA', 'AKs', '72o', 'JTo'])


Preflop Q-values:


NameError: name 'Q' is not defined

# New Section

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def build_heatmap_matrix():
    heatmap = np.zeros((13, 13))
    actions = {}

    for (state, action) in Q:
        if len(state) != 2:
            continue

        label = hand_label(state)
        r1 = 'AKQJT98765432'.index(label[0])
        r2 = 'AKQJT98765432'.index(label[1])
        i, j = (r1, r2) if 's' in label else (r2, r1)

        q_raise = Q.get((state, 4), 0)
        q_check = Q.get((state, 0), 0)

        # Fill heatmap with difference
        heatmap[i, j] = q_raise - q_check
        actions[(i, j)] = label

    return heatmap, actions

In [None]:
def plot_preflop_heatmap():
    heatmap, actions = build_heatmap_matrix()
    fig, ax = plt.subplots(figsize=(10, 8))
    cax = ax.matshow(heatmap, cmap='coolwarm', vmin=-5, vmax=5)

    plt.colorbar(cax, label='Raise Advantage')

    # Axis labels
    ranks = list('AKQJT98765432')
    ax.set_xticks(np.arange(13))
    ax.set_yticks(np.arange(13))
    ax.set_xticklabels(ranks)
    ax.set_yticklabels(ranks)

    # Hand labels
    for i in range(13):
        for j in range(13):
            label = actions.get((i, j), '')
            if label:
                ax.text(j, i, label, va='center', ha='center', fontsize=6, color='black')

    ax.set_title("Preflop Raise Advantage Heatmap")
    plt.xlabel("Second Card")
    plt.ylabel("First Card")
    plt.show()

plot_preflop_heatmap()

NameError: name 'Q' is not defined