http://www.greenteapress.com/thinkbayes/thinkbayes.pdf

In [50]:
from thinkbayes2 import Pmf, Suite

The Cookie Problem:

There are two bowls
Bowl 1: 30 vanilla and 10 chocolate
Bowl 2: 20 vanilla and 20 chocolate

You pick one cookie and it is vanilla, what is the probability it is from Bowl 1?

Likelihood for Bowl 1 = P(Vanilla|Bowl 1) = 0.75 <br>
Likelihood for Bowl 2 = P(Vanilla|Bowl 2) = 0.5

In [5]:
pmf = Pmf()

# set the prior distribution to be a 50% chance the cookie is from Bowl 1 and 50% chance it's from Bowl 2
pmf.Set('Bowl 1', 0.5)
pmf.Set('Bowl 2', 0.5)

# update the distribution based on new data by multiplying each prior by it's correspoding likelihood
pmf.Mult('Bowl 1', 0.75)
pmf.Mult('Bowl 2', 0.5)

# normalize the probability distribution to add up to 1.0
pmf.Normalize()

print(pmf.Prob('Bowl 1'))

0.6000000000000001


In [71]:
# define a class for the cookie problem

class Cookie(Pmf):
    def __init__(self, hypos):
        """
        Initialize the class by setting each hypothesis in `hypos` with equal probability
        and defining the mixes of each hypotheses via the variable `mixes`
        
        the Pmf initialization creates a `d` variables that is a dictionary of hypothesis where 
        the key is the name of the hypothesis and the value is the probability
        """
        Pmf.__init__(self)
        for hypo in hypos:
            self.Set(hypo, 1)
        self.Normalize()
        self.mixes = {
            'Bowl 1': dict(vanilla=0.75, chocolate=0.25),
            'Bowl 2': dict(vanilla=0.5,  chocolate=0.5)
        }
        
    def Likelihood(self, data, hypo):
        """
        Given a data point and a hypothesis, get the likelihood of the data given the hypothesis
        by looking at the `mixes` variable. 
        
        Example, given the hypothesis of 'Bowl 1' and data of 'vanilla', the likelihood is the probability of 
        'vanilla' given 'Bowl 1' which is contained in the mixes variable.
        """
        mix = self.mixes[hypo]
        like = mix[data]
        return like
        
    def Update(self, data):
        """
        Perform bayesian updating
        For each hypothesis, grab the likelihood of the data given the hypothesis via the Likelihood()
        function. Then, multiply the likelihood by the prior to update it. Finally, normalize the probabilities
        to add up to one
        """
        for hypo in self.Values():
            like = self.Likelihood(data, hypo)
            self.Mult(hypo, like)
        self.Normalize()
        
        
hypos = ['Bowl 1', 'Bowl 2']
pmf = Cookie(hypos)
pmf.Update('vanilla')

for hypo, prob in pmf.Items():
    print(hypo, prob)
    
print('')
    
pmf = Cookie(hypos)
dataset = ['vanilla', 'chocolate', 'vanilla']
for data in dataset:
    pmf.Update(data)
    
for hypo, prob in pmf.Items():
    print(hypo, prob)

Bowl 1 0.6000000000000001
Bowl 2 0.4

Bowl 1 0.5294117647058824
Bowl 2 0.4705882352941176


Monty Hall problem:

There are three doors, there is a car behind one door and no prize behind the other two. You pick door A and then Monty opens a different door that doesn't have a car behgind it.

What is the probability it is behind each door?

In [72]:
class Monty(Pmf):
    
    def __init__(self, hypos):
        """
        Initialize the class by setting each hypothesis in `hypos` with equal probability

        the Pmf initialization creates a `d` variable that is a dictionary of hypotheses where 
        the key is the name of the hypothesis and the value is the probability
        """
        Pmf.__init__(self)
        for hypo in hypos:
            self.Set(hypo, 1)
        self.Normalize()
        
    def Likelihood(self, data, hypo):
        """
        Given a data point and a hypothesis, get the likelihood of the data given the hypothesis
        by looking at the `mixes` variable. 
        
        In this case the data point is the door that Monty opens to reveal nothing behind it
        """
        if hypo == data:
            "The likelihood of Monty opening the door with the car is 0"
            return 0
        elif hypo == 'A':
            "The likelihood of Monty opening one of the other doors given the car is behind Door A is 0.5"
            return 0.5
        else:
            """
            If the car is not behind the Door we chose (and we know it's not behind the Door Monty opened)
            then there is a 100% probability it is behind the door Monty did not open (i.e. not Door A and 
            not the door he opened)
            """ 
            return 1
        return like
        
    def Update(self, data):
        """
        Perform bayesian updating
        For each hypothesis, grab the likelihood of the data given the hypothesis via the Likelihood()
        function. Then, multiply the likelihood by the prior to update it. Finally, normalize the probabilities
        to add up to one
        """
        for hypo in self.Values():
            like = self.Likelihood(data, hypo)
            self.Mult(hypo, like)
        self.Normalize()
    
hypos = ['A', 'B', 'C']
pmf = Monty(hypos)
data = 'B' # After we choose Door A, Monty reveals nothing is behind Door B
pmf.Update(data)

for hypo, prob in pmf.Items():
    print(hypo, prob)

A 0.3333333333333333
B 0.0
C 0.6666666666666666


In [76]:
class Monty(Suite):
    
    def Likelihood(self, data, hypo):
        """
        Given a data point and a hypothesis, get the likelihood of the data given the hypothesis
        by looking at the `mixes` variable. 
        
        In this case the data point is the door that Monty opens to reveal nothing behind it
        """
        if hypo == data:
            "The likelihood of Monty opening the door with the car is 0"
            return 0
        elif hypo == 'A':
            "The likelihood of Monty opening one of the other doors given the car is behind Door A is 0.5"
            return 0.5
        else:
            """
            If the car is not behind the Door we chose (and we know it's not behind the Door Monty opened)
            then there is a 100% probability it is behind the door Monty did not open (i.e. not Door A and 
            not the door he opened)
            """ 
            return 1
        return like
    
suite = Monty(['A', 'B', 'C'])
suite.Update('B')
suite.Print()

A 0.3333333333333333
B 0.0
C 0.6666666666666666


The M&M Problem:

In 1995 blue M&M's were first introduced. 

Pre-1995 mix:  30% Brown, 20% Yellow, 20% Red, 10% Green, 10% Orange, 10% Tan
Post-1995 mix:  24% Blue, 20% Green, 16% Orange, 14% Yellow, 13% Red, 13% Brown

There is one bag of M&Ms from 1994 and one from 1996, but we don't know which is which. We take one M&M from each bag.
Bag 1: pulled a yellow
Bag 2: pulled a green

What is the probability the yellow one came from 1994 bag? I.e, what is the probability Bag 1 is from 1994?

Hypotheses:
A: Bag 1 is 1994, Bag 2 is 1996
B: Bag 1 is 1996, Bag 2 is 1994

In [81]:
class M_and_M(Suite):
    

    mix94 = dict(
        brown=30,
        yellow=20,
        red=20,
        green=10
        , orange=10
        , tan=10
    )

    mix96 = dict(
        blue=24,
        green=20,
        orange=16,
        yellow=14,
        red=13,
        brown=13
    )
    
    hypoA = dict(bag1=mix94, bag2=mix96)
    hypoB = dict(bag1=mix96, bag2=mix94)
    hypotheses = dict(A=hypoA, B=hypoB)
    
    def Likelihood(self, data, hypo):
        """
        Given a data point and a hypothesis, get the likelihood of the data given the hypothesis
        by looking at the `mixes` variable. 
        
        In this case the data point is a tuple that specifies a bag (i.e. "bag1" or "bag2") and a color (e.g. "yellow").
        Given a hypothesis that the bag is from 1994 or 1996, the likelihood is the probability the color came from that bag.
        """
        bag, color = data
        mix = self.hypotheses[hypo][bag]
        like = mix[color]
        
        return like
    
suite = M_and_M('AB')

suite.Update(('bag1', 'yellow'))
suite.Update(('bag2', 'green'))

suite.Print()

A 0.7407407407407407
B 0.2592592592592592
