# Basic usage of Pmf class

## Import Pmf

In [1]:
from thinkbayes import Pmf

## Distribution of rolling a 6-sided fair die

In [2]:
# Distribution of outcomes by rolling a six sided die
pmf = Pmf()

for x in [1, 2, 3, 4, 5, 6]:
    pmf.Set(x, 1/6.0) # Set value of each probability to 1/6

# Word Counter

## Create word list and call Incr

In [3]:
pmf = Pmf()

word_list = ["Hello", "World", "Bye", "World"]
for word in word_list:
    pmf.Incr(word, 1) # Increases probability of each word by 1, if word is not there it adds it
    
# In this case the probabilities are not normalized (they won't add up to 1)

## Normalization

In [4]:
print("Before normalization:", pmf.Prob('World'))

pmf.Normalize()

print("After normalization:", pmf.Prob("World"))

Before normalization: 2
After normalization: 0.5


# The Cookie Problem

## Set prior distribution

In [5]:
pmf = Pmf()


pmf.Set('Bowl 1', 0.5)
pmf.Set('Bowl 2', 0.5)

## Update distribution

In [6]:
# Multiply each prior by corresponding likelihood
pmf.Mult('Bowl 1', 0.75)
pmf.Mult('Bowl 2', 0.5)

## Normalization

In [7]:
# Since the hypotheses are mutually exclusive and collectively exhaustive we can re-normalize
pmf.Normalize()

0.625

## Posterior distribution

In [8]:
pmf.Prob('Bowl 1')

0.6000000000000001

# The Bayesian Framework

## Rewrite cookie problem in form of class to generalize it

In [42]:
class Cookie(Pmf):
    
    def __init__(self, hypos):
        Pmf.__init__(self)
        
        for hypo in hypos:
            self.Set(hypo, 1)
        
        self.Normalize()
        
    def Update(self, data):
        for hypo in self.Values():
            like = self.Likelihood(data, hypo)
            self.Mult(hypo, like)
        
        self.Normalize()
        
    mixes = {
        'Bowl 1':dict(vanilla=0.75, chocolate=0.25),
        'Bowl 2':dict(vanilla=0.5, chocolate=0.5),
    }
        
    def Likelihood(self, data, hypo):
        mix = self.mixes[hypo]
        like = mix[data]
        
        return like
    
    def Print(self):
        for hypo, prob in Pmf.Items(self):
            print(hypo, prob)

In [43]:
hypos = ['Bowl 1', 'Bowl 2']
pmf = Cookie(hypos)

In [44]:
pmf.Update('vanilla')

pmf.Print()

Bowl 1 0.6000000000000001
Bowl 2 0.4


In [45]:
dataset = ['vanilla', 'chocolate', 'vanilla']

for data in dataset:
    pmf.Update(data)
    
pmf.Print()

Bowl 1 0.627906976744186
Bowl 2 0.37209302325581395


# The Monty Hall Problem

In [52]:
class Monty(Pmf):
    
    def __init__(self, hypos):
        Pmf.__init__(self)
        
        for hypo in hypos:
            self.Set(hypo, 1)
            
        self.Normalize()
        
    def Update(self, data):
        for hypo in self.Values():
            like = self.Likelihood(data, hypo)
            self.Mult(hypo, like)
        
        self.Normalize()
        
    def Likelihood(self, data, hypo):
        if hypo == data:
            return 0
        elif hypo == 'A':
            return 0.5
        else:
            return 1
        
    def Print(self):
        for hypo, prob in Pmf.Items(self):
            print(hypo, prob)

In [53]:
hypos = 'ABC'
pmf = Monty(hypos)

In [54]:
data = 'B'
pmf.Update(data)

In [55]:
pmf.Print()

A 0.3333333333333333
B 0.0
C 0.6666666666666666


# Encapsulating the framework

In [56]:
class Suite(Pmf):
    """
        Represents a suite of hypotheses and their probabilities
    """
    
    def __init__(self, hypo=tuple()):
        """
            Initializes the distribution
        """
        
    def Update(self, data):
        """
            Updates each hypotheses based on the data
        """
        
    def Print(self):
        """
            Prints the hypotheses and their probabilities
        """

- The actual implementation is in thinbayes.py

## Using Suite class

In [60]:
from thinkbayes import Suite

In [61]:
class Monty(Suite):
    
    def Likelihood(self, data, hypo):
        if hypo == data:
            return 0
        elif hypo == 'A':
            return 0.5
        else:
            return 1

In [62]:
suite = Monty('ABC')
suite.Update('B')
suite.Print()

A 0.3333333333333333
B 0.0
C 0.6666666666666666


# The M&M problem

In [63]:
class MnM(Suite):
    
    # Encode the color mixes
    mix94 = dict(
        brown=30,
        yellow=20,
        red=20,
        green=10,
        orange=10,
        tan=10
    )
    
    mix96 = dict(
        blue=24,
        green=20,
        orange=16,
        yellow=14,
        red=13,
        brown=13
    )
    
    # Encode the hypotheses
    hypoA = dict(bag1=mix94, bag2=mix96)
    hypoB = dict(bag1=mix96, bag2=mix94)
    
    # Map name of hypotheses to representation
    hypotheses = dict(A=hypoA, B=hypoB)
    
    def Likelihood(self, data, hypo):
        bag, color = data
        mix = self.hypotheses[hypo][bag]
        like = mix[color]
        
        return like

In [64]:
suite = MnM('AB')

suite.Update(('bag1', 'yellow'))
suite.Update(('bag2', 'green'))

suite.Print()

A 0.7407407407407407
B 0.2592592592592592
