In [None]:
# O'Reilly Bayes 
# Chapter 2 - Baye's Theorem

In [None]:
# Diachronic Bayes
# diachronic means related to change over time
# probability of hypothese H, given body of data D
# P(H|D) = P(H)P(D|H) / P(D)
# P(H) is the probability of the hypothesis before we see the data
# P(H|D) is the probability of the hypothesis afer we see the data called the posterior
# P(D|H) is the probability of the data under the hypothesis called the likelihood
# P(D) is the total probability of the data, under any hypothesis
# can simplify by specifying hypotheses that are mutually exclusive and collectively exhaustive

In [2]:
import pandas as pd

In [17]:
# Cookie problem / Urn Problem
table = pd.DataFrame(index=['Bowl 1', 'Bowl 2'])

In [6]:
table['prior'] = 1/2, 1/2
table

Unnamed: 0,prior,likelihood
Bowl 1,0.5,0.75
Bowl 2,0.5,0.5


In [4]:
table['likelihood'] = 3/4, 1/2
table

Unnamed: 0,prior,likelihood
Bowl 1,0.5,0.75
Bowl 2,0.5,0.5


In [11]:
#unnormalized posteriors
table['unnorm']=table['prior']*table['likelihood']
table

Unnamed: 0,prior,likelihood,unnorm
Bowl 1,0.5,0.75,0.375
Bowl 2,0.5,0.5,0.25


In [None]:
# I am not here to work for dementia.  She can go to hell.  It is not an umbrella.  Elizabeth is NOT dementia.
# Elizabeth's mother is not dementia.  I am not on a team... I am just used as a doormat and fed.

In [13]:
prob_data = table['unnorm'].sum()
prob_data

0.625

In [16]:
# dividing the "unnormalized posteriors" by the sum of the unnormalized posteriors, creates a percentage out
# of 100, and makes the posteriors add to one, thus normalized.  The total probablity of the data is called the
# normalizing constant.
table['posterior'] = table['unnorm'] / prob_data
table

Unnamed: 0,prior,likelihood,unnorm,posterior
Bowl 1,0.5,0.75,0.375,0.6
Bowl 2,0.5,0.5,0.25,0.4


In [None]:
# Dice problem
# A Bayes table can also solve for multiple hypotheses

In [18]:
table2 = pd.DataFrame(index=[6,8,12])

In [19]:
from fractions import Fraction

In [21]:
table2['prior'] = Fraction(1,3)
table2['likelihood'] = Fraction(1,6), Fraction(1,8), Fraction (1,12)
table2

Unnamed: 0,prior,likelihood
6,1/3,1/6
8,1/3,1/8
12,1/3,1/12


In [8]:
def update(table):
    """Compute the posterior probabilities."""
    table['unnorm'] = table['prior']*table['likelihood']
    prob_data = table['unnorm'].sum()
    table['posterior'] = table['unnorm'] / prob_data
    return prob_data

In [24]:
prob_data = update(table2)

In [None]:
# The Monty Hall Problem
# Three doors, one with a car and two with goats
# Assumptions about Monty Hall's behavior
# a) Monty always opens a door and then offers you the option to switch
# b) He never opens the door you picked or the door with your car
# c) If you choose the door with the car, he chooses one of the other doors at random

In [25]:
table3 = pd.DataFrame(index=['Door 1', 'Door 2', 'Door 3'])
table3['prior'] = Fraction(1,3)
table3

Unnamed: 0,prior
Door 1,1/3
Door 2,1/3
Door 3,1/3


In [26]:
table3['likelihood'] = Fraction(1,2), 1, 0
table3

Unnamed: 0,prior,likelihood
Door 1,1/3,1/2
Door 2,1/3,1
Door 3,1/3,0


In [27]:
update(table3)
table3

Unnamed: 0,prior,likelihood,unnorm,posterior
Door 1,1/3,1/2,1/6,1/3
Door 2,1/3,1,1/3,2/3
Door 3,1/3,0,0,0


In [None]:
# Exercise 2-1
# There are two coins in a box.  One is a normal coin with heads on one side and tails on another, and one is
# a trick coin with heads on both sides.  You choose a coin at random and see that one of the sides is heads.
# What is the probability that you chose the trick coin?

In [35]:
# coin chosen, heads
# answering question what is probability that chosen trick coin, given heads on selected coin
# This is a simpler version of the dice problem
table4 = pd.DataFrame(index=['normal', 'trick'])
table4['prior'] = 0.5
table4['likelihood'] = 0.5, 1
table4

Unnamed: 0,prior,likelihood
normal,0.5,0.5
trick,0.5,1.0


In [38]:
prob_data = update(table4)
table4

Unnamed: 0,prior,likelihood,unnorm,posterior
normal,0.5,0.5,0.25,0.333333
trick,0.5,1.0,0.5,0.666667


In [39]:
# Thus there is a 66% chance that you have chosen the trick coin based on the data of finding a heads "value" on
# your selected coin

In [45]:
#Exercise 2-2
#Suppose you meet someone and learn why they have two children.  You ask if either child is a girl and 
# they say yes.  What is the probability that both children are girls?

# hypothesis 1 - neither child is a girl 
# hypothesis 2 - child a is a girl, child b is a boy
# hypothesis 3 - child a is a boy, child b is a girl
# hypothesis 4 - both children are girls

table5 = pd.DataFrame(index=['A,B = boys', 'A=girl,B=boy', 'A=boy,B=girl', 'A,B = girls'])
table5['prior']= 0.25
table5['likelihood']= 0,1/2, 1/2, 1/4
update(table5)
table5

Unnamed: 0,prior,likelihood,unnorm,posterior
"A,B = boys",0.25,0.0,0.0,0.0
"A=girl,B=boy",0.25,0.5,0.125,0.4
"A=boy,B=girl",0.25,0.5,0.125,0.4
"A,B = girls",0.25,0.25,0.0625,0.2


In [None]:
# Exercise 2-3
# There are many variations of the Monty Hall Problem.  For example, suppose Monty always chooses Door 2 if he can,
# and only chooses Door 3 if he has to (because the car is behind door 2).

# If you choose Door 1 and Monty opens Door 2, what is the probability that the car is behind Door 3?
# If you choose Door 1 and Monty opens Door 3, what is the probability that the car is behind Door 2?
# 

In [None]:
# The Monty Hall Problem
# Three doors, one with a car and two with goats
# Assumptions about Monty Hall's behavior
# a) Monty always opens a door and then offers you the option to switch
# b) He never opens the door you picked or the door with your car
# c) If you choose the door with the car, he chooses door two unless our selection or the car is behind 2

In [24]:
# Door 1 chosen
# Data = car behind 3 given hypothesis
# Hypothesis:
# a) If the car is behind door 1, Monty chooses Door 2, so the prob he opens 2 is 1
# b) If the car is behind door 2, Monty chooses Door 3, so the prob he opens 2 is 0
# c) If the car is behind door 3, Monty chooses Door 2, so the prob he opens 2 is 1
# Well I can't be past 101, because you don't let me on the path.  I just skip around as if this was a tour because you won't
# hire me, so I cannot sustain anything.  I didn't fail FlatIron, I couldn't pay to keep going.  I pawned the laptop for shelter.
# I was late to LA because I got here blindly after doing a lot of vanlife alone and confused. Only a game of bowling with some
# folks in Utah and some swimming as my only diversion.

In [13]:
# Probability of car behind door 3, given
table6 = pd.DataFrame(index=['Door 1', 'Door 2', 'Door 3'])
table6['prior'] = 1/3
table6['likelihood'] = 1, 0, 1
update(table6)
table6

Unnamed: 0,prior,likelihood,unnorm,posterior
Door 1,0.333333,1,0.333333,0.5
Door 2,0.333333,0,0.0,0.0
Door 3,0.333333,1,0.333333,0.5


In [None]:
# The likelihood of the car being behind 3, given monty selected door two is 50%

In [None]:
# door 1 chosen
# Data = car behind 2 given hypothesis
# Hypothesis:
# a) If the car is behind door 1, Monty chooses Door 2, so the prob he opens 3 is 0
# b) If the car is behind door 2, Monty chooses Door 3, so the prob he opens 3 is 1
# c) If the car is behind door 3, Monty chooses Door 2, so the prob he opens 3 is 0

In [14]:
# Probability of car behind door 3, given
table7 = pd.DataFrame(index=['Door 1', 'Door 2', 'Door 3'])
table7['prior'] = 1/3
table7['likelihood'] = 0, 1, 0
update(table7)
table7

Unnamed: 0,prior,likelihood,unnorm,posterior
Door 1,0.333333,0,0.0,0.0
Door 2,0.333333,1,0.333333,1.0
Door 3,0.333333,0,0.0,0.0


In [15]:
# The probability of the car being behind 2, given that Monty opens door 3 is 100%.

In [25]:
# Exercise 2-4
# M&Ms are small candy-coated chocolates that come in a variety of colors
# In 1994, the color mix in a bag was 30% brown, 20% yellow, 20% red, 10% green, 10% orange, 10% tan
# In 1996, it was 24% blue, 20% green, 16% orange, 14% yellow, 13% red, 13% brown
# suppose a friend has two bags, one from '94 and one from '96.  Gives me two M&Ms, one from each bag,
# what is the probability that they yellow one came from the 1994 bag?

# Fact: two selected from two bags
# Data: yellow M&M selected
# Hypothesis 1) yellow M&M selected from 1994 bag (6 colors)
# Hypothesis 2) yellow M&M selected from 1996 bag (6 colors)

# Priors for each year determined by the number of colors, before you know the percentage of colors? Both have 6 colors
# but different colors


In [40]:
# Data: yellow selected, Hypothesis: a certain year bag was chosen
# This yields the probability that you select one yellow M&M and get a yellow from a certain bag. 
# Sometimes you got to look at potential, not current.  You must consider what my life not only is now, but has been for the
# last 10 years. Bowling doesn't exactly lead to Simpson's Paradox.
# (but you also have selected a green M&M from the other bag)
table7 = pd.DataFrame(index=['1994 bag', '1996 bag'])
table7['prior'] = 1/6, 1/6 
table7['likelihood'] = 0.2, 0.14
update(table7)
table7

Unnamed: 0,prior,likelihood,unnorm,posterior
1994 bag,0.166667,0.2,0.033333,0.588235
1996 bag,0.166667,0.14,0.023333,0.411765


In [39]:
# This yields the likelihood that you select one M&M and get a green. 
# (but you also have selected a yellow M&M from the other bag)
table8 = pd.DataFrame(index=['1994 bag', '1996 bag'])
table8['prior'] = 1/6, 1/6 
table8['likelihood'] = 0.1, 0.20
table8

Unnamed: 0,prior,likelihood
1994 bag,0.166667,0.1
1996 bag,0.166667,0.2


In [27]:
# So there are two possibilities to get a yellow and a green, 
# a) you selected the yellow from 1994 and the green from 1996 OR
# b) you selected the yellow from 1996 and the green from 1994
# we are only interested in the case a, the probability that you selected a yellow from 1994 given that you selected a 
# green from 1996

In [37]:
# I think this need to be multiplied, as the chance that we pull a yellow from a bag, given that we have pulled a 
# green is less than the probability of just one of these things occurring.
# probability green M&M from 1996 bag is 20%
# probabilty yellow M&M from 1994 bag given green from '96 

FinalProb = (.2) * (0.588235)
FinalProb

0.117647