In [1]:
from pluribus.poker.deck import Deck
import math
import numpy as np
from itertools import combinations
import time
import dill as pickle

### Brief Look at Size of Problem

In [2]:
# additional resources:
# https://poker.cs.ualberta.ca/publications/2013-techreport-nl-size.pdf

In [3]:
deck = Deck()

In [4]:
# when dealing with huge combos, this will be more performant
# can evaluate the stored number directly
cards = [c.eval_card for c in deck]

In [5]:
def get_card_combos(num_cards, cards):
    """
    return combos of cards (Card.eval_card)
    """
    # most performant I could find so far
    return np.asarray(list((combinations(cards, num_cards))))

In [6]:
def ncr(n,r):
    """
    helper function for calculating combination size
    n choose r"""
    return int(math.factorial(n)/(math.factorial(r)*math.factorial(n-r)))

In [7]:
start = time.time()
unique_starting_hands = get_card_combos(2, cards)
end = time.time()
print(end - start)

0.0016908645629882812


In [8]:
#52 choose 2 (see: ncr())
print("unique starting_hands: ", len(unique_starting_hands))

unique starting_hands:  1326


In [9]:
start = time.time()
flops = get_card_combos(3, cards) # flop combos plus starting hands
end = time.time()
print(end - start)

0.012323856353759766


In [10]:
# the following consider Ah,Kh,Jh,7h as different than Ah,Kh,7h,Jh
# this might be fair to encode some sense of strategy

In [11]:
print("unique flops with unique starting hands: ", ncr(52, 2) * ncr(50, 3))

unique flops with unique starting hands:  25989600


In [12]:
print("unique flops + turns: ", ncr(52, 2) * ncr(50, 3) * ncr(47, 1))

unique flops + turns:  1221511200


In [13]:
print("unique flops + turns + rivers: ", ncr(52, 2) * ncr(50, 3) * ncr(47, 1) * ncr(46, 1))

unique flops + turns + rivers:  56189515200


In [14]:
# both the supplementary and this paper below mention

In [15]:
# we could also consider Ah,Kh,Jh,7h as not different than Ah,Kh,7h,Jh
# this would mean 
# obviously, we have to consider starting hands as separate from the board
# as evaluating Ks,Jh on a Ah,Kh,7h board is different than Ah,Jh on a Ks,Kh,7h
# TODO (c): do we care about situations likie this? --yes!

In [16]:
# example
print("unique flops with unique starting hands: ", ncr(52, 2) * ncr(50, 3))

unique flops with unique starting hands:  25989600


In [17]:
print("unique flops + turns: ", ncr(52, 2) * ncr(50, 4))

unique flops + turns:  305377800


In [18]:
print("unique flops + turns: ", ncr(52, 2) * ncr(50, 5))

unique flops + turns:  2809475760


In [19]:
# but, really the best way would be to work on how to get lossless 
# here is an example: https://poker.cs.ualberta.ca/publications/2013-techreport-nl-size.pdf
# it'll be a combination of strategy the same hands plus one of the methods above

In [20]:
# here's a demonstration of the size (not even the largest problem)
# TODO (c) we'll need to figure out how to apply lossless to each round
# even then we'll still be clustering 2,428,287,420!! (169, flop: 1,286,792, turn: 55,190,538, river: 2,428,287,420)
# unless we are using some sort of hip sampling trick

### Notes on the Size of the Problem as it relates to Clustering the Information Situations

- Turns out the second way depicted above will be fine (I think) for the imperfect recall paradigm
    - see here: pg.4 under heading "Computing the abstraction for round 1": http://www.cs.cmu.edu/~sandholm/gs3.aaai07.pdf
- Secondly, potentially use lossless as mentioned here
    - pg. 274 5th paragragh http://www.ifaamas.org/Proceedings/aamas2013/docs/p271.pdf
    - note also the two k means algorithm efficeincies discussed in the paragraph
    - this is discussed in detail here: http://www.cs.cmu.edu/~sandholm/gs3.aaai07.pdf
- Thirdly, change the clustering problem to deal with indices of histograms, rather than the entire histogram
- I'm estimating 160 gigabytes to store the 2.5 billion x 8 numpy array needed to perform k means clustering on the river (using OCHS), or about 224 gigabytes to store the 3.5 billion X 8 numpy array needed to do the clustering for the river
    - However, on the turn the problem would be at a minimum 5.5 X 10^7 x 200, which is too big
    - can use sparse data here: https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html