# Project 1: Poker Data
### September 27, 2015

Insert narrative and some instructions here.

In [3]:
# Parses "hdb" file from the IRC Poker Database http://poker.cs.ualberta.ca/irc_poker_database.html
import xlrd
import os
import csv
from zipfile import ZipFile
import pprint
import re

download_directory = "/Users/davidsmith/Downloads/"
#hdb_file = download_directory + "nolimit/199505/hdb"
#hroster_file = download_directory + "nolimit/199505/hroster"
#pdb_file = download_directory + "nolimit/199505/pdb/pdb.ShoelessJ"
#pdf_directory = download_directory + "/nolimit/199505/pdb/"
outfile = download_directory + "hands_output.txt"

#def open_zip(datafile):
#    with ZipFile('{0}.zip'.format(datafile), 'r') as myzip:
#        myzip.extractall()

# Global variables
pot_cats = ["flop", "turn", "river", "showdown"]
deck = {'A': 'ace', 'K': 'king', 'Q': 'queen', 'J': 'jack', 'T': '10'}
suits = {'c': 'clubs', 's': 'spades', 'h': 'hearts', 'd': 'diamonds'}
bet_action_codes = {
            '-' : 'no action',
            'B' : 'blind bet',
            'f' : 'fold',
            'k' : 'check',
            'b' : 'bet',
            'c' : 'call',
            'r' : 'raise',
            'A' : 'all-in',
            'Q' : 'quits game',
            'K' : 'kicked from game'
            }
bet_action_cats = ["pre-flop", "flop", "turn", "river"]
folder_search_re = re.compile(r'\d{6}$', re.IGNORECASE)


def parse_hdb_file(hdb_file, hands):
    
    split_filename = hdb_file.split("/")
    id_prefix = split_filename[-3] + "_" + split_filename[-2] + "_"
    with open(hdb_file, "r") as hdb:
        for line in hdb:
            hand = {}
            pot_data = []
            board_card_data = []  
            _id = id_prefix + line[:9]
            hand["_id"] = _id
            hand["game_type"] = split_filename[-3]
            hand["dealer"] = line[10:13].strip()
            hand["hand_num"] = line[14:19].strip()
            hand["num_players"] = line[20:22].strip()
            pot_data.append(line[23:31].strip())
            pot_data.append(line[31:39].strip())
            pot_data.append(line[39:48].strip())
            pot_data.append(line[48:57].strip())
            board_card_data.append(line[58:60].strip().strip("\n"))
            board_card_data.append(line[61:63].strip().strip("\n"))
            board_card_data.append(line[64:66].strip().strip("\n"))
            board_card_data.append(line[67:69].strip().strip("\n"))
            board_card_data.append(line[70:72].strip().strip("\n"))
            
            pots = []
            i = 0
            for p in pot_data:
                pot = {}
                pot["stage"] = pot_cats[i]
                pot["num_players"] = p.split("/")[0]
                pot["starting_pot_size"] = p.split("/")[1]
                pots.append(pot)
                i = i + 1
                
            board = []
            for b in board_card_data:
                board_card = {}
                if b != "":
                    if board_card_data.index(b) + 1 <= 3:
                        board_card["stage"] = "flop"
                    elif board_card_data.index(b) + 1 == 4:
                        board_card["stage"] = "turn"
                    elif board_card_data.index(b) + 1 == 5:
                        board_card["stage"] = "river"
                    if b[0] in deck.keys():
                        board_card["value"] = deck[b[0]]
                    else:
                        board_card["value"] = b[0]
                    board_card["suit"] = suits[b[1]]
                    board.append(board_card)
                
            hand["pots"] = pots
            hand["board"] = board
            hands[_id] = hand
    hdb.close()
    return hands, id_prefix


def parse_hroster_file(hroster_file, id_prefix, hands):
    
    with open(hroster_file, "r") as hroster:
        for line in hroster:
            players = {}
            _id = id_prefix + line[:9]
            player_data = line[13:].strip("\n").split(" ")
            for p in player_data:
                player = {}
                player["username"] = p
                players[p] = player
            hands[_id]["players"] = players
    hroster.close()
    return hands

    
def parse_pdb_file(pdb_file, id_prefix, hands):
    
    username = pdb_file.split(".")[-1]
    with open(pdb_file, "r") as pdb:
        for line in pdb:
            _id = id_prefix + line[10:19]
            position = line[23:25].strip(" ")
            bet_action_rounds = line[26:47].split(" ")
            bet_action_rounds = [elem for elem in bet_action_rounds if elem != '']
            
            rest_of_line = line[26:].strip("\n").split(" ")
            rest_of_line = [elem for elem in rest_of_line if elem != '']
        
            bet_actions = []
            i = 0
            for item in rest_of_line:
                bet_action = {}
                bet_action["actions"] = []
                if rest_of_line.index(item) <= 3:
                    for b in item:
                        bet_action["actions"].append(bet_action_codes[b])
                    bet_action["stage"] = bet_action_cats[i]
                    bet_actions.append(bet_action)
                    i = i + 1
            bankroll, action, winnings = rest_of_line[4:7]
            player_cards = []
            if len(rest_of_line) == 9:
                for item in rest_of_line[7:9]:
                    player_card = {}
                    if item[0] in deck.keys():
                        player_card["value"] = deck[item[0]]
                    else:
                        player_card["value"] = item[0]
                    player_card["suit"] = suits[item[1]]
                    player_cards.append(player_card)                    
            hands[_id]["players"][username]["bet_actions"] = bet_actions
            hands[_id]["players"][username]["bankroll"] = int(bankroll)
            hands[_id]["players"][username]["action"] = int(action)
            hands[_id]["players"][username]["winnings"] = int(winnings)
            hands[_id]["players"][username]["player_cards"] = player_cards
            hands[_id]["players"][username]["position"] = int(position)
    pdb.close()
    return hands



def loop_pdb_files(pdb_file_directory, hands_col, id_prefix):
    for root, dirs, files in os.walk(pdb_file_directory, topdown=False):
        for name in files:
            pdb_file = os.path.join(root, name)
            print "Processing " + pdb_file + "..."
            hs = parse_pdb_file(pdb_file, id_prefix, hands_col)
    print "...Finished processing PDB files."
    return hands_col


def loop_file_groups():
    hands = {}
    hands_list = []
    for root, dirs, files in os.walk(download_directory, topdown=False):
        d = folder_search_re.search(root)
        if d:
            hdb_file = root + "/" + "hdb"
            print "Processing " + hdb_file + "..."
            hroster_file = root + "/" + "hroster"
            print "Processing " + hroster_file + "..."
            pdb_directory = root + "/" + "pdb/"
            print "Processing " + pdb_directory + "..."
            hands, idp = parse_hdb_file(hdb_file, hands)
            hands = parse_hroster_file(hroster_file, idp, hands)
            hands = loop_pdb_files(pdb_directory, hands, idp)
    hands_list = hands.values()
    return hands_list
            
    
hnds = loop_file_groups()
print len(hnds)

# Still to do 9/28/15:
# Download the files directly from the Internet (http://poker.cs.ualberta.ca/IRCdata/)
# DONE Loop through hdb and hroster files, DONE --> be sure to tack YearMonth onto front of timestamp field
# Loop through tgz files
# Change working directory?

Processing /Users/davidsmith/Downloads/holdem/199504/hdb...
Processing /Users/davidsmith/Downloads/holdem/199504/hroster...
Processing /Users/davidsmith/Downloads/holdem/199504/pdb/...
Processing /Users/davidsmith/Downloads/holdem/199504/pdb/pdb.[[...
Processing /Users/davidsmith/Downloads/holdem/199504/pdb/pdb.[]...
Processing /Users/davidsmith/Downloads/holdem/199504/pdb/pdb.[]bot...
Processing /Users/davidsmith/Downloads/holdem/199504/pdb/pdb.][...
Processing /Users/davidsmith/Downloads/holdem/199504/pdb/pdb.^...
Processing /Users/davidsmith/Downloads/holdem/199504/pdb/pdb.^-__-^...
Processing /Users/davidsmith/Downloads/holdem/199504/pdb/pdb.^^^^^...
Processing /Users/davidsmith/Downloads/holdem/199504/pdb/pdb.^^^^^^...
Processing /Users/davidsmith/Downloads/holdem/199504/pdb/pdb.^awhyde...
Processing /Users/davidsmith/Downloads/holdem/199504/pdb/pdb.^jc...
Processing /Users/davidsmith/Downloads/holdem/199504/pdb/pdb.^jcc...
Processing /Users/davidsmith/Downloads/holdem/199504/pdb/