In [1]:
from thinkbayes2 import Suite, Pmf

  return f(*args, **kwds)


## The cookie problem

In [2]:
pmf = Pmf()
pmf.Set('Bowl 1', 0.5)
pmf.Set('Bowl 2', 0.5)

pmf.Mult('Bowl 1', 0.75)
pmf.Mult('Bowl 2', 0.5)

pmf.Normalize()
pmf.Print()

Bowl 1 0.6000000000000001
Bowl 2 0.4


### As a Bayesian framework

In [3]:
class Cookie(Pmf):
    
    def __init__(self, hypos):
        """
        Initializes the prior hypothesis
        hypos - list of string names for each hypothesis
        """
        Pmf.__init__(self)
        for hypo in hypos:
            self.Set(hypo, 1)
        self.Normalize()
    
    def Update(self, data):
        """
        Multiplies the prior distribution with the likelihood
        data - string of cookie flavor
        """
        for hypo in self.Values():
            like = self.Likelihood(data, hypo)
            self.Mult(hypo, like)
        self.Normalize()
    
    mixes = {
        'Bowl 1':dict(vanilla=0.75, chocolate=0.25), # Likelihood
        'Bowl 2':dict(vanilla=0.5, chocolate=0.5)
    }
    
    def Likelihood(self, data, hypo):
        """
        Returns likelihood given the data
        """
        mix = self.mixes[hypo]
        like = mix[data]
        return like

In [5]:
pmf = Cookie(['Bowl 1', 'Bowl 2'])
pmf.Update('vanilla')
pmf.Print()

Bowl 1 0.6000000000000001
Bowl 2 0.4


## Monty Hall Problem

In [20]:
class Monty(Pmf):
    
    def __init__(self, hypos):
        """
        Initializes the prior probabilities
        """
        Pmf.__init__(self)
        for hypo in hypos:
            self.Set(hypo, 1)
        self.Normalize()
    
    def Update(self, data):
        """
        Multiplies the prior with the likelihood
        """
        for hypo in self.Values():
            like = self.Likelihood(data, hypo)
            self.Mult(hypo, like)
        self.Normalize()
    
    def Likelihood(self, data, hypo):
        """
        Likelihood of that Monty will open door B given the hypothesis
        """
        if hypo == data:
            return 0 # If person picks A and car is behind B, no chance Monty will open B
        elif hypo == 'A':
            return 0.5 # If person picks A and car is behind A, equal chance Monty will pick B
        else:
            return 1.0 # If person picks A, and the car is in C, Monty will only pick B

In [21]:
pmf = Monty('ABC')
pmf.Update('B')
pmf.Print()

A 0.3333333333333333
B 0.0
C 0.6666666666666666


### Encapsulating the framework

In [22]:
class Monty(Suite):
    
    def Likelihood(self, data, hypo):
        if hypo == data:
            return 0
        elif hypo == 'A':
            return 0.5
        else:
            return 1

In [23]:
suite = Monty('ABC')
suite.Update('B')
suite.Print()

A 0.3333333333333333
B 0.0
C 0.6666666666666666


## The M&M problem

In [27]:
class M_and_M(Suite):
    
    mix94 = dict(
        brown=30,
        yellow=20,
        red=20,
        green=10,
        orange=10,
        tan=10
    )
    
    mix96 = dict(
        blue=24,
        green=20,
        orange=16,
        yellow=14,
        red=13,
        brown=13
    ) 
    
    hypoA = dict(bag1=mix94, bag2=mix96) # Hypothesis
    hypoB = dict(bag1=mix96, bag2=mix94) # Hypothesis
    
    hypotheses = dict(A=hypoA, B=hypoB)
    
    def Likelihood(self, data, hypo):
        """
        data - a tuple defining the bag an m&m is drawn from, and its color
        hypo - the hypothesis string 
        """
        bag, color = data
        mix = self.hypotheses[hypo][bag]
        like = mix[color]
        return like

In [28]:
suite = M_and_M('AB')

suite.Update(('bag1', 'yellow'))
suite.Update(('bag2', 'green'))

suite.Print()

A 0.7407407407407407
B 0.2592592592592592


## Exercises

### Exercise 1
> This one is from one of my favorite books, David MacKay's "Information Theory, Inference, and Learning Algorithms":
> 
> Elvis Presley had a twin brother who died at birth.  What is the probability that Elvis was an identical twin?"
>   
> To answer this one, you need some background information: According to the Wikipedia article on twins:  ``Twins are estimated to be approximately 1.9% of the world population, with monozygotic twins making up 0.2% of the total---and 8% of all twins.''

In [36]:
pmf = Pmf(dict(fraternal=0.92, identical=0.08)) # Given that Elvis was a twin, the prior of being identical

# Data: Twin was a male

pmf['fraternal'] *= 0.5 # 50% chance of being a male given he is a fraternal twin
pmf['identical'] *= 1.0 # 100% chance of being a male given he is an identical twin

pmf.Normalize()
pmf.Print()

fraternal 0.8518518518518517
identical 0.14814814814814814


### Exercise 2
> Let's consider a more general version of the Monty Hall problem where Monty is more unpredictable. As before, Monty never opens the door you chose (let's call it A) and never opens the door with the prize. So if you choose the door with the prize, Monty has to decide which door to open. Suppose he opens B with probability p and C with probability 1-p. If you choose A and Monty opens B, what is the probability that the car is behind A, in terms of p? What if Monty opens C?
> 
> Hint: you might want to use SymPy to do the algebra for you.

In [70]:
from sympy import symbols

p = symbols('p')

pmf = Pmf('ABC')
pmf['A'] *= p # The probability Monty picks B if the car is in A
pmf['B'] *= 0 # The probability Monty picks B if the car is in B
pmf['C'] *= 1 # The probability Monty picks B if the car is in C

pmf.Normalize()

pmf['A'].simplify()

1.0*p/(p + 1)

In [71]:
pmf['A'].evalf(subs={p:0.5})

0.333333333333333

In [72]:
p = symbols('p')

pmf = Pmf('ABC')
pmf['A'] *= 1-p # The probability Monty picks C if the car is in A
pmf['B'] *= 1 # The probability Monty picks C if the car is in B
pmf['C'] *= 0 # The probability Monty picks C if the car is in C

pmf.Normalize()
pmf['A'].simplify()

0.333333333333333*(p - 1)/(0.333333333333333*p - 0.666666666666667)

In [73]:
pmf['A'].evalf(subs={p:0.5})

0.333333333333333

### Exercise 3 
>According to the CDC, ``Compared to nonsmokers, men who smoke are about 23 times more likely to develop lung cancer and women who smoke are about 13 times more likely.''  Also, among adults in the U.S. in 2014:
>
> Nearly 19 of every 100 adult men (18.8%)
> Nearly 15 of every 100 adult women (14.8%)
>
>If you learn that a woman has been diagnosed with lung cancer, and you know nothing else about her, what is the probability that she is a smoker?

In [75]:
pmf = Pmf(dict(smoker=0.148, nonsmoker=0.852)) # Prior distribution

pmf['smoker'] *= 13 # Change in factor when we learn she is diagnosed with lung cancer
pmf['nonsmoker'] *= 1

pmf.Normalize()
pmf.Print()

nonsmoker 0.30691642651296835
smoker 0.6930835734870318


### Exercise 4
>In Section 2.3 I said that the solution to the cookie problem generalizes to the case where we draw multiple cookies with replacement.
>
>But in the more likely scenario where we eat the cookies we draw, the likelihood of each draw depends on the previous draws.
>
>Modify the solution in this chapter to handle selection without replacement. Hint: add instance variables to Cookie to represent the hypothetical state of the bowls, and modify Likelihood accordingly. You might want to define a Bowl object.

In [76]:
class Bowl:
    """ A bowl object """
    
    def __init__(self, n_vanilla, n_chocolate):
        self.n_vanilla = n_vanilla
        self.n_chocolate = n_chocolate
        self.pct_vanilla = None
        self.pct_chocolate = None
    
    def grab_cookie(self, cookie_str):
        """Removes 1 cookie from the bowl"""
        if ((cookie_str == 'vanilla') and (self.n_vanilla > 0)):
            self.n_vanilla -= 1
        elif ((cookie_str == 'chocolate') and (self.n_chocolate > 0)):
            self.n_chocolate -= 1
        self.update_percentages()
    
    def update_percentages(self):
        self.pct_vanilla = self.n_vanilla / (self.n_chocolate + self.n_vanilla)
        self.pct_chocolate = 1.0 - self.pct_vanilla
        
class Cookie(Pmf):
    """A map from string bowl ID to probability"""
    
    def __init__(self, hypos):
        """ Initialize self.
        hypos: sequence of string bowl IDs
        """
        Pmf.__init__(self)
        for hypo in hypos:
            self.Set(hypo, 1)
        self.Normalize()
    
    def Update(self, data):
        """Updates the PMF with new data.
        
        data: string cookie type
        """
        for hypo in self.Values():
            like = self.Likelihood(data, hypo)
            self.Mult(hypo, like)
        self.Normalize()
    
    mixes = {
        'Bowl 1': Bowl(n_vanilla=30, n_chocolate=10),
        'Bowl 2': Bowl(n_vanilla=20, n_chocolate=20)
    }
    
    def Likelihood(self, data, hypo):
        """ The likelihood of the data under the hypothesis
        
        data: string cookie type
        hypo: string bowl ID
        """
        for bowl, bowl_object in self.mixes.items():
            bowl_object.update_percentages()
        bowl_1_mix = dict(
            vanilla=self.mixes['Bowl 1'].pct_vanilla,
            chocolate=self.mixes['Bowl 1'].pct_chocolate
        )
        bowl_2_mix = dict(
            vanilla=self.mixes['Bowl 2'].pct_vanilla,
            chocolate=self.mixes['Bowl 2'].pct_chocolate
        )
        bowls = dict()
        bowls['Bowl 1'] = bowl_1_mix
        bowls['Bowl 2'] = bowl_2_mix
        mix = bowls[hypo]
        like = mix[data]
        return like

In [77]:
pmf = Cookie(['Bowl 1', 'Bowl 2'])
for cookie in ['vanilla', 'chocolate','vanilla', 'vanilla']:
    pmf.Update(cookie)
    pmf.Print()

Bowl 1 0.6000000000000001
Bowl 2 0.4
Bowl 1 0.4285714285714286
Bowl 2 0.5714285714285714
Bowl 1 0.5294117647058824
Bowl 2 0.4705882352941176
Bowl 1 0.6279069767441862
Bowl 2 0.37209302325581395
