In [None]:
import json
import re
from typing import List, Dict, Any, Optional

In [2]:
def parse_pokerstars_hand_history(file_path: str) -> List[Dict[str, Any]]:
    """Parse PokerStars hand history file and return structured data."""
    
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()
    
    # Split into individual hands
    hands = content.split('PokerStars Zoom Hand #')[1:]  # Skip empty first element
    
    parsed_hands = []
    
    for hand_text in hands:
        hand_data = parse_single_hand(hand_text)
        if hand_data:
            parsed_hands.append(hand_data)
    
    return parsed_hands

def parse_single_hand(hand_text: str) -> Optional[Dict[str, Any]]:
    """Parse a single poker hand."""
    
    lines = hand_text.strip().split('\n')
    if not lines:
        return None
    
    hand_data = {
        "hand_id": "",
        "game_type": "",
        "stakes": "",
        "datetime": "",
        "datetime_et": "",
        "table": "",
        "max_players": 0,
        "button_seat": 0,
        "players": [],
        "hero": "",
        "hole_cards": [],
        "small_blind": {},
        "big_blind": {},
        "preflop_actions": [],
        "flop": [],
        "flop_actions": [],
        "turn": "",
        "turn_actions": [],
        "river": "",
        "river_actions": [],
        "showdown": [],
        "pot": 0.0,
        "rake": 0.0,
        "winners": []
    }
    
    # Parse header line
    header = lines[0]
    hand_id_match = re.search(r'^(\d+):', header)
    if hand_id_match:
        hand_data["hand_id"] = hand_id_match.group(1)
    
    game_match = re.search(r'Hold\'em No Limit \((€[\d.]+)/(€[\d.]+)\)', header)
    if game_match:
        hand_data["game_type"] = "Hold'em No Limit"
        hand_data["stakes"] = f"{game_match.group(1)}/{game_match.group(2)}"
    
    date_match = re.search(r'- (\d{4}/\d{2}/\d{2} \d{1,2}:\d{2}:\d{2} \w+) \[(\d{4}/\d{2}/\d{2} \d{1,2}:\d{2}:\d{2} \w+)\]', header)
    if date_match:
        hand_data["datetime"] = date_match.group(1)
        hand_data["datetime_et"] = date_match.group(2)
    
    # Parse table info
    table_match = re.search(r"Table '([^']+)' (\d+)-max Seat #(\d+) is the button", lines[1] if len(lines) > 1 else "")
    if table_match:
        hand_data["table"] = table_match.group(1)
        hand_data["max_players"] = int(table_match.group(2))
        hand_data["button_seat"] = int(table_match.group(3))
    
    current_section = "players"
    i = 2
    
    while i < len(lines):
        line = lines[i].strip()
        
        # Parse players
        if line.startswith("Seat ") and "in chips" in line:
            seat_match = re.match(r'Seat (\d+): ([^\(]+) \(€([\d.]+) in chips\)', line)
            if seat_match:
                player = {
                    "seat": int(seat_match.group(1)),
                    "name": seat_match.group(2).strip(),
                    "chips": float(seat_match.group(3))
                }
                hand_data["players"].append(player)
        
        # Parse blinds
        elif "posts small blind" in line:
            blind_match = re.match(r'([^:]+): posts small blind €([\d.]+)', line)
            if blind_match:
                hand_data["small_blind"] = {
                    "player": blind_match.group(1).strip(),
                    "amount": float(blind_match.group(2))
                }
        
        elif "posts big blind" in line:
            blind_match = re.match(r'([^:]+): posts big blind €([\d.]+)', line)
            if blind_match:
                hand_data["big_blind"] = {
                    "player": blind_match.group(1).strip(),
                    "amount": float(blind_match.group(2))
                }
        
        # Parse hole cards
        elif line.startswith("Dealt to "):
            hero_match = re.match(r'Dealt to ([^\[]+) \[([^\]]+)\]', line)
            if hero_match:
                hand_data["hero"] = hero_match.group(1).strip()
                cards = hero_match.group(2).strip().split()
                hand_data["hole_cards"] = cards
                current_section = "preflop"
        
        # Parse board sections
        elif line == "*** FLOP ***":
            flop_match = re.search(r'\[([^\]]+)\]', line)
            if flop_match:
                cards = flop_match.group(1).strip().split()
                hand_data["flop"] = cards
            current_section = "flop"
        
        elif line.startswith("*** FLOP ***"):
            flop_match = re.search(r'\[([^\]]+)\]', line)
            if flop_match:
                cards = flop_match.group(1).strip().split()
                hand_data["flop"] = cards
            current_section = "flop"
        
        elif line.startswith("*** TURN ***"):
            turn_match = re.search(r'\[([^\]]+)\] \[([^\]]+)\]', line)
            if turn_match:
                hand_data["turn"] = turn_match.group(2).strip()
            current_section = "turn"
        
        elif line.startswith("*** RIVER ***"):
            river_match = re.search(r'\[([^\]]+)\] \[([^\]]+)\]', line)
            if river_match:
                hand_data["river"] = river_match.group(2).strip()
            current_section = "river"
        
        elif line.startswith("*** SHOW DOWN ***"):
            current_section = "showdown"
        
        elif line.startswith("*** SUMMARY ***"):
            current_section = "summary"
        
        # Parse actions
        elif ": folds" in line or ": calls" in line or ": raises" in line or ": checks" in line or ": bets" in line or "all-in" in line:
            action = parse_action(line)
            if action and current_section in ["preflop", "flop", "turn", "river"]:
                hand_data[f"{current_section}_actions"].append(action)
        
        # Parse showdown
        elif current_section == "showdown" and ": shows" in line:
            showdown_match = re.match(r'([^:]+): shows \[([^\]]+)\] \(([^\)]+)\)', line)
            if showdown_match:
                hand_data["showdown"].append({
                    "player": showdown_match.group(1).strip(),
                    "cards": showdown_match.group(2).strip().split(),
                    "hand": showdown_match.group(3).strip()
                })
        
        # Parse pot and rake from summary
        elif line.startswith("Total pot"):
            pot_match = re.search(r'Total pot €([\d.]+)(?: \| Rake €([\d.]+))?', line)
            if pot_match:
                hand_data["pot"] = float(pot_match.group(1))
                if pot_match.group(2):
                    hand_data["rake"] = float(pot_match.group(2))
        
        # Parse winners
        elif "collected" in line and current_section == "summary":
            winner_match = re.match(r'Seat \d+: ([^\(]+) (?:\([^\)]+\) )?(?:showed .+ and )?(?:won|collected) \(€([\d.]+)\)', line)
            if winner_match:
                hand_data["winners"].append({
                    "player": winner_match.group(1).strip(),
                    "amount": float(winner_match.group(2))
                })
        
        i += 1
    
    return hand_data

def parse_action(line: str) -> Optional[Dict[str, Any]]:
    """Parse a single action line."""
    
    # Fold
    if ": folds" in line:
        player = line.split(":")[0].strip()
        return {"player": player, "action": "folds"}
    
    # Call
    elif ": calls" in line:
        match = re.match(r'([^:]+): calls €([\d.]+)', line)
        if match:
            return {
                "player": match.group(1).strip(),
                "action": "calls",
                "amount": float(match.group(2))
            }
    
    # Raise
    elif ": raises" in line:
        match = re.match(r'([^:]+): raises €([\d.]+) to €([\d.]+)', line)
        if match:
            return {
                "player": match.group(1).strip(),
                "action": "raises",
                "raise_amount": float(match.group(2)),
                "total": float(match.group(3))
            }
    
    # Bet
    elif ": bets" in line:
        match = re.match(r'([^:]+): bets €([\d.]+)', line)
        if match:
            all_in = "and is all-in" in line
            return {
                "player": match.group(1).strip(),
                "action": "bets",
                "amount": float(match.group(2)),
                "all_in": all_in
            }
    
    # Check
    elif ": checks" in line:
        player = line.split(":")[0].strip()
        return {"player": player, "action": "checks"}
    
    # Uncalled bet
    elif "Uncalled bet" in line:
        match = re.match(r'Uncalled bet \(€([\d.]+)\) returned to (.+)', line)
        if match:
            return {
                "action": "uncalled_bet",
                "amount": float(match.group(1)),
                "player": match.group(2).strip()
            }
    
    # Collected pot
    elif "collected" in line and "from pot" in line:
        match = re.match(r'([^:]+) collected €([\d.]+) from pot', line)
        if match:
            return {
                "player": match.group(1).strip(),
                "action": "collected",
                "amount": float(match.group(2))
            }
    
    return None


In [None]:
def main():
    input_file = "./notebooks/data.txt"  # Change this to your input file name
    output_file = "poker_hands.json"  # Change this to your desired output file name
    
    print(f"Parsing {input_file}...")
    hands = parse_pokerstars_hand_history(input_file)
    
    print(f"Found {len(hands)} hands")
    
    # Write to JSON file
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump({"hands": hands}, f, indent=2, ensure_ascii=False)
    
    print(f"Successfully wrote {len(hands)} hands to {output_file}")

if __name__ == "__main__":
    main()

Parsing test.txt...
Found 194 hands
Successfully wrote 194 hands to poker_hands.json
