<div class="alert alert-block alert-info">
<h1> A small library for simple discrete Probability tasks like card games, urns etc.</h1>
<h2> classes: ProbDist(dict), Dist(Counter)</h2>
<h2> functions: P(event,space), such_that, cross, choose, ball, combos, such_that,  joint</h2>
</div>

In [120]:
import itertools #for combos
from fractions import Fraction #for results of P
from math import factorial #for choose
import random #to sample randomly
from collections import Counter # for Dist class

# class: ProbDistribution 
## a dictionary, which maps {outcomes: probability}, normalies

In [121]:
class ProbDist(dict):
    "A Probability Distribution; an {outcome: probability} mapping."
    def __init__(self, mapping=(), **kwargs):
        self.update(mapping, **kwargs)
        # Make probabilities sum to 1.0; assert no negative probabilities
        total = sum(self.values())
        for outcome in self:
            self[outcome] = self[outcome] / total
            assert self[outcome] >= 0
            

In [122]:
#test case
#Dr Gelb and Dr Blau from simpson paradox
# S == success F == fail H == heart B == band aid

DrGelb = ProbDist(SH=70, FH=20, SB=10, FB=0)
DrBlau = ProbDist(SH= 2, FH= 8, SB=81, FB=9)
print(DrGelb)
print(DrBlau)

{'SH': 0.7, 'FH': 0.2, 'SB': 0.1, 'FB': 0.0}
{'SH': 0.02, 'FH': 0.08, 'SB': 0.81, 'FB': 0.09}


## use case: Simpsons Paradox

In [123]:
#compare heart ops
print(DrGelb['SH']/(DrGelb['SH']+DrGelb['FH']) > DrBlau['SH']/(DrBlau['SH']+DrBlau['FH']))
#compare band-aid ops
print(DrGelb['SB']/(DrGelb['SB']+DrGelb['FB']) > DrBlau['SB']/(DrBlau['SB']+DrBlau['FB']))
print('Dr Gelb is more successful if both parts are taken seperately')

True
True
Dr Gelb is more successful if both parts are taken seperately


In [124]:
#compare all ops
print((DrGelb['SH']+DrGelb['SB']) > (DrBlau['SH']+DrBlau['SB']))
print('yet if we take Total Propabilies NOT so !')

False
yet if we take Total Propabilies NOT so !


# class: Distribution
## similar to ProbDist yet NO normalization AND very versatile constructor

In [125]:
class Dist(Counter): 
    "A Distribution of {outcome: frequency} pairs."

In [126]:
#test cases: 
## A set of equiprobable outcomes:
Dist({1, 2, 3, 4, 5, 6})


Dist({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1})

In [127]:
#test cases: 
##A collection of outcomes, with repetition indicating frequency:
Dist('THHHTTHHT')


Dist({'T': 4, 'H': 5})

In [128]:
#test cases: 
## A set of equiprobable outcomes:
Dist({1, 2, 3, 4, 5, 6})


Dist({1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1})

In [129]:
#test cases: 
## A mapping of {outcome: frequency} pairs:
Dist({'H': 5, 'T': 4})

Dist({'H': 5, 'T': 4})

In [130]:
#test cases: 
## Keyword arguments:
Dist(H=5, T=4) == Dist({'H': 5}, T=4) == Dist('TTTT', H=5)


True

# function: P(event,space)

In [131]:
def P(event, space): 
    """The probability of an event, given a sample space of equiprobable outcomes. 
    event: a collection of outcomes, or a predicate that is true of outcomes in the event. 
    space: a set of outcomes or a probability distribution of {outcome: frequency}."""
    if callable(event):
        event = such_that(event, space)
    if isinstance(space, ProbDist):
        return sum(space[o] for o in space if o in event)
    else:
        return Fraction(len(event & space), len(space))


In [132]:
#test case for P(event,space)
D     = {1, 2, 3, 4, 5, 6} # a sample space
even  = {   2,    4,    6} # an event

P(even, D)

Fraction(1, 2)

# such_that : outcomes in the sample space for which the predicate is true

In [133]:
def such_that(predicate, space): 
    """The outcomes in the sample space for which the predicate is true.
    If space is a set, return a subset {outcome,...};
    if space is a ProbDist, return a ProbDist {outcome: frequency,...};
    in both cases only with outcomes where predicate(element) is true."""
    if isinstance(space, ProbDist):
        return ProbDist({o:space[o] for o in space if predicate(o)})
    else:
        return {o for o in space if predicate(o)}

In [134]:
#test case, "B"oys and "G"irls, first letter is OLDER
#e.g. 'BG' to denote the outcome in which the older child is a boy and the younger a girl.

S = {'BG', 'BB', 'GB', 'GG'}

In [135]:
def two_boys(outcome): return outcome.count('B') == 2

def older_is_a_boy(outcome): return outcome.startswith('B')

P(two_boys, such_that(older_is_a_boy, S))

Fraction(1, 2)

In [136]:
def at_least_one_boy(outcome): return 'B' in outcome

P(two_boys, such_that(at_least_one_boy, S))

Fraction(1, 3)

In [137]:
such_that(at_least_one_boy, S)

{'BB', 'BG', 'GB'}

# cross fct (cartesian product)

In [138]:
def cross(A, B):
    "The set of ways of concatenating one item from collection A with one from B."
    return {a + b 
            for a in A for b in B}


In [139]:
#test case for "cross"
suits = u'♥♠♦♣'
ranks = u'AKQJT98765432'

myDeck = cross(suits,ranks)
print(myDeck)

{'♥T', '♥2', '♠8', '♠A', '♠J', '♦7', '♥6', '♣T', '♣A', '♦5', '♠2', '♥A', '♣K', '♥J', '♠4', '♦6', '♠9', '♦2', '♥3', '♠7', '♦J', '♥5', '♠5', '♣8', '♥7', '♦K', '♦9', '♣3', '♣4', '♦A', '♣6', '♠K', '♥K', '♠Q', '♠6', '♣9', '♦4', '♣J', '♠3', '♦3', '♥4', '♥8', '♥9', '♦8', '♠T', '♦Q', '♣7', '♥Q', '♣2', '♣Q', '♣5', '♦T'}


In [140]:
#test case for "cross": find all cross
mycross = {s for s in myDeck if s[0]==u'♣'}
len(mycross)

13

In [141]:
#test case for "cross"
bgbirthdays = cross('BG', '1234567')
print(bgbirthdays)

{'G5', 'G3', 'G7', 'B3', 'B6', 'B5', 'G2', 'G1', 'G6', 'B4', 'B1', 'B7', 'G4', 'B2'}


In [142]:
#test case for "cross"
S3 = cross(bgbirthdays, bgbirthdays);len(S3)

196

# choose fct (binomial coefficient)

In [92]:
def choose(n, c):
    "Number of ways to choose c items from a list of n items."
    return factorial(n) // (factorial(n - c) * factorial(c))

In [93]:
choose(9, 6)

84

# fill an urn : many balls 

In [94]:
def balls(color, n):
    "A set of n numbered balls of the given color."
    return {color + str(i)
            for i in range(1, n + 1)}


In [95]:
#test case
urn = balls('B', 6) | balls('R', 9) | balls('W', 8)
print(len(urn))
random.sample(urn,7)

23


['B5', 'R5', 'W8', 'R9', 'R7', 'W3', 'W7']

# combos => fully generate all combinations

In [96]:
def combos(items, n):
    "All combinations of n items; each combo as a space-separated str."
    return set(map(' '.join, itertools.combinations(items, n)))

In [97]:
#test case
Hands = combos(myDeck, 5)
print(len(Hands))
random.sample(Hands,7)

2598960


['♠J ♥6 ♦J ♦3 ♦Q',
 '♠A ♦9 ♥4 ♦8 ♣2',
 '♥2 ♠4 ♥3 ♦4 ♠T',
 '♥6 ♥J ♠Q ♥4 ♠T',
 '♥2 ♠J ♦7 ♣K ♦J',
 '♣K ♦2 ♥K ♠T ♣Q',
 '♥J ♠9 ♥K ♣2 ♣Q']

In [99]:
#test case
flush = {hand for hand in Hands if any(hand.count(suit) == 5 for suit in suits)}
P(flush, Hands)

Fraction(33, 16660)

In [100]:
four_kind = {hand for hand in Hands if any(hand.count(rank) == 4 for rank in ranks)}

P(four_kind, Hands)

Fraction(1, 4165)

## use case: hands which contain 3 aces

In [146]:
def _3aces(sample): return sample.count('A') == 3

In [147]:
P(_3aces, Hands)

Fraction(94, 54145)

In [158]:
#test case 1
Hands = combos(myDeck, 6)
exact = P(_3aces, Hands)
print(exact)

184/54145


In [149]:
_3acesin4 = choose(4,3);print(_3acesin4)

4


In [152]:
_3nonacesin48 = choose(48,3);print(_3nonacesin48)

17296


In [171]:
#test case 2
U6 = combos(urn, 6)
random.sample(U6, 5)

['R4 R9 B4 R6 R3 B1',
 'R4 W4 R9 R2 B5 R1',
 'R6 R8 B5 W2 R7 R1',
 'B6 B2 R6 R8 B3 B1',
 'W3 R6 W2 W5 W8 R1']

## function: select n balls of a given color in an urn

In [178]:
def select(color, n, space=U6):
    "The subset of the sample space with exactly `n` balls of given `color`."
    return {s for s in space if s.count(color) == n}

In [180]:
#test case
U6 = combos(urn, 6)
N = len(U6)

assert N * P(select('R', 6), U6) == choose(9, 6)

In [174]:
N * P(select('B', 3) & select('W', 2) & select('R', 1), U6) == choose(6, 3) * choose(8, 2) * choose(9, 1)

True

In [175]:
N * P(select('W', 4), U6) == choose(8, 4) * choose(6 + 9, 2)  # (6 + 9 non-white balls)

True

In [176]:
P(select('W', 3), U6)

Fraction(3640, 14421)

## function: joint gives the distribution of two independent distributions.

In [182]:
def joint(A, B, combine='{}{}'.format):
    """The joint distribution of two independent distributions. 
    Result is all entries of the form {'ab': frequency(a) * frequency(b)}"""
    return Dist({combine(a, b): A[a] * B[b]
                 for a in A for b in B})

In [185]:
#test case
die6 = Dist({6: 1/6, '-': 5/6})

two6s = joint(die6, die6)
two6s

Dist({'66': 0.027777777777777776,
      '6-': 0.1388888888888889,
      '-6': 0.1388888888888889,
      '--': 0.6944444444444445})

In [186]:
#bernoulli chain, length 2, p = 1/6

In [192]:
suits = u'♥♠♦♣'
skatranks = u'AKDJZ987'

my32Deck = cross(suits,skatranks)
print(my32Deck)

{'♠8', '♦Z', '♠A', '♠J', '♦7', '♣A', '♥A', '♣K', '♥J', '♠Z', '♠9', '♦J', '♠7', '♣8', '♥7', '♦K', '♦9', '♥Z', '♣D', '♦A', '♠K', '♥K', '♣9', '♣J', '♥D', '♦D', '♥8', '♥9', '♦8', '♣Z', '♣7', '♠D'}


In [195]:
my8Hands = combos(my32Deck,8)

In [197]:
len(my8Hands)

10518300

In [199]:
type(my8Hands)

set

In [200]:
help(set)

Help on class set in module builtins:

class set(object)
 |  set() -> new empty set object
 |  set(iterable) -> new set object
 |  
 |  Build an unordered collection of unique elements.
 |  
 |  Methods defined here:
 |  
 |  __and__(self, value, /)
 |      Return self&value.
 |  
 |  __contains__(...)
 |      x.__contains__(y) <==> y in x.
 |  
 |  __eq__(self, value, /)
 |      Return self==value.
 |  
 |  __ge__(self, value, /)
 |      Return self>=value.
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __gt__(self, value, /)
 |      Return self>value.
 |  
 |  __iand__(self, value, /)
 |      Return self&=value.
 |  
 |  __init__(self, /, *args, **kwargs)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  __ior__(self, value, /)
 |      Return self|=value.
 |  
 |  __isub__(self, value, /)
 |      Return self-=value.
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __ixor__(self, value, /)
 |      Re

In [202]:
my1Ass = {s for s in my8Hands if s.count('A') == 1}

In [203]:
len(my1Ass)

4736160

In [204]:
mysol = len(my1Ass)/len(my8Hands)*3/(32-8)

In [205]:
mysol

0.05628476084538376

In [206]:
253/4495

0.05628476084538376

In [207]:
choose(18,6)


18564

In [209]:
Iknowit = Dist({1: 10/18, '-': 8/18})

In [210]:
def YN(n, Iknowit):
    "Joint probability distribution from drawing  Y/N questions from n samples."
    if n == 1:
        return Iknowit
    else:
        return joint(Iknowit, YN(n - 1, Iknowit))

In [212]:
mycur = YN(6,Iknowit)
mycur

Dist({'111111': 0.029401194111858146,
      '11111-': 0.02352095528948651,
      '1111-1': 0.02352095528948651,
      '1111--': 0.018816764231589213,
      '111-11': 0.02352095528948651,
      '111-1-': 0.018816764231589206,
      '111--1': 0.018816764231589206,
      '111---': 0.015053411385271368,
      '11-111': 0.02352095528948651,
      '11-11-': 0.018816764231589206,
      '11-1-1': 0.018816764231589206,
      '11-1--': 0.015053411385271364,
      '11--11': 0.018816764231589206,
      '11--1-': 0.015053411385271363,
      '11---1': 0.015053411385271363,
      '11----': 0.012042729108217092,
      '1-1111': 0.023520955289486518,
      '1-111-': 0.018816764231589213,
      '1-11-1': 0.018816764231589213,
      '1-11--': 0.015053411385271368,
      '1-1-11': 0.018816764231589213,
      '1-1-1-': 0.015053411385271364,
      '1-1--1': 0.015053411385271364,
      '1-1---': 0.012042729108217092,
      '1--111': 0.018816764231589213,
      '1--11-': 0.015053411385271364,
      '1--1-1': 

In [227]:
temp = 0.0;
for s in mycur.keys():
    if (s.count('1')==3):
        temp = temp + mycur[s]
    if (s.count('1')==4):
        temp = temp + mycur[s]
    if (s.count('1')==5):
        temp = temp + mycur[s]
    if (s.count('1')==6):
        temp = temp + mycur[s]
print(temp)

0.753846617028043
