In [71]:
import src.thinkbayes as tb
import src.cookie2 as c
import src.monty as m
import src.m_and_m as mnm

## Probability Mass Function

Maps values to their associated probabilities

In [42]:
# this creates a virtual six-sided die
die_pmf = tb.Pmf()
for x in [1, 2, 3, 4, 5, 6]:
    die_pmf.Set(x, 1/6.0)
    
die_pmf.Print()

1 0.166666666667
2 0.166666666667
3 0.166666666667
4 0.166666666667
5 0.166666666667
6 0.166666666667


In [43]:
# create a word distribution
word_list = ["the", "cat", "sat", "on", "the", "mat"]
word_pmf = tb.Pmf()
for word in word_list:
    word_pmf.Incr(word, 1)

print("Words")
print word_pmf.Print()

# normalize to get total number of words
print("\nTotal number of words")
print word_pmf.Normalize()

# print probability of any given word in the distribution
print("\nProbability of word 'the'")
print word_pmf.Prob("the")

Words
cat 1
mat 1
on 1
sat 1
the 2
None

Total number of words
6

Probability of word 'the'
0.333333333333


## The Cookie Problem

- Bowl 1 contains 30 vanilla cookies, 10 chocolate cookies
- Bowl 2 contains 20 each
- You randomly choose a bowl to pick a cookie in that bowl (also at random). The cookie is vanilla. What's the probability that you picked from Bowl 1?

In [72]:
cookie_pmf = tb.Pmf()
# initialize prior probabilities
cookie_pmf.Set("Bowl 1", 0.5)
cookie_pmf.Set("Bowl 2", 0.5)

# multiply the priors with the likelihoods
cookie_pmf.Mult("Bowl 1", 0.75)
cookie_pmf.Mult("Bowl 2", 0.5)

# renormalize
cookie_pmf.Normalize()

print cookie_pmf.Prob("Bowl 1")
print cookie_pmf.Prob("Bowl 2")

0.6
0.4


# The Bayesian Framework

## Cookie Problem (revisited)

In [77]:
hypos = ["Bowl 1", "Bowl 2"]
cookie_pmf = c.Cookie(hypos)
# a dataset of cookie draws from same bowl
dataset = ["vanilla", "chocolate", "vanilla"]
for d in dataset:
    print "Data: {}".format(d)
    cookie_pmf.Update(d)
    for hypo, prob in cookie_pmf.Items():
        print hypo, prob
    print ""

Data: vanilla
Bowl 2 0.4
Bowl 1 0.6

Data: chocolate
Bowl 2 0.571428571429
Bowl 1 0.428571428571

Data: vanilla
Bowl 2 0.470588235294
Bowl 1 0.529411764706



## Monty Hall Problem

In [57]:
hypos = "ABC"
monty_pmf = m.Monty(hypos)
data = "B"
monty_pmf.Update(data)
for hypo, prob in monty_pmf.Items():
    print hypo, prob

A 0.333333333333
C 0.666666666667
B 0.0


### Using the generalized Suite class

In [60]:
# all this class needs is a Likelihood function
# to be able to compute the posterior probabilities
class Monty(tb.Suite):
    
    def Likelihood(self, data, hypo):
        if hypo == data:
            return 0
        elif hypo == "A":
            return 0.5
        else:
            return 1

monty_suite = Monty("ABC")
monty_suite.Update("B")
monty_suite.Print()

A 0.333333333333
B 0.0
C 0.666666666667


## The M&M problem

In [69]:
# The M_and_M class implements the hypotheses
# s.t. hypothesis A: bag1 = mix94, bag2 = mix96
#      hypothesis B: bag1 = mix96, bag2 = mix94
m_and_m_suite = mnm.M_and_M("AB")
# pick a yellow m&m from bag1, and
# green one from bag2
m_and_m_suite.Update(("bag1", "yellow"))
m_and_m_suite.Update(("bag2", "green"))
m_and_m_suite.Print()

A 0.740740740741
B 0.259259259259


# Exercises

Example 2-1

Reimplement the cookie problem such that we sample from the bowls without replacement.

You can find the implementation in `src/cookie2.py`. The solution implements a new `Bowl` class that is initialized with the number of vanilla and chocolate cookies in the bowl.

A `Bowl` has a `mix` property which normalizes the number of vanilla and chocolate cookies into a proportion for use in the `Cookie.Update` method.

It also has a `decrement` method for when a cookie is taken from the bowl without replacement.

This object is used by `Cookie` in when `Update` is called with a new data point. A bowl is randomly chosen and a cookie of the type `data = {vanilla, chocolate}` is removed from the bowl.