In [None]:
#O'Reilly: Think Bayes: Bayesian Statistics in Python
# Chapter 1: Probability

In [None]:
# Example with GSS (General Social Survey) Data from http://gss.norc.org

In [1]:
#import packages
import pandas as pd
import numpy as np


In [2]:
# load data
gss = pd.read_csv('gss_bayes.csv', index_col=0)
gss.head()


Unnamed: 0_level_0,year,age,sex,polviews,partyid,indus10
caseid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1974,21.0,1,4.0,2.0,4970.0
2,1974,41.0,1,5.0,0.0,9160.0
5,1974,58.0,2,6.0,1.0,2670.0
6,1974,30.0,1,5.0,4.0,6870.0
7,1974,48.0,1,5.0,4.0,7860.0


In [5]:
## Fractions of bankers
banker = (gss['indus10'] == 6870)
banker.head()

caseid
1    False
2    False
5    False
6     True
7    False
Name: indus10, dtype: bool

In [6]:
banker.sum()

728

In [7]:
banker.mean()

0.014769730168391155

In [9]:
def prob(A):
    "Computes the probability of A"
    return A.mean()

In [10]:
prob(banker)

0.014769730168391155

In [11]:
female = (gss['sex'] == 2)
prob(female)

0.5378575776019476

In [13]:
# polviews on 7 point scale

liberal = (gss['polviews']  <= 3)

In [14]:
prob(liberal)

0.27374721038750255

In [15]:
democrat = (gss['partyid'] <= 1)

In [16]:
prob(democrat)

0.3662609048488537

In [17]:
# Conjunction, both A and B are True, commutative
prob (banker & democrat)

0.004686548995739501

In [19]:
# Conditional Probability, probability that A given B, not commutative

# prob that democrat, given liberal
selected = democrat[liberal]
prob (selected)

0.5206403320240125

In [20]:
selected = female[banker]
prob(selected)

0.7706043956043956

In [21]:
def conditional(proposition, given):
    return prob(proposition[given])

In [22]:
conditional(liberal, given=female)

0.27581004111500884

In [23]:
conditional(female, given=banker)

0.7706043956043956

In [None]:
#Condition and Conjunction

In [24]:
conditional (female, given= liberal & democrat)

0.576085409252669

In [25]:
conditional (liberal & female, given = banker)

0.17307692307692307

In [None]:
#Laws of Probability
# Theorem 1: computing conditional probability as a ratio of two probabilities
# P(A|B) = P(A and B) / P(B)
# Theorem 2: P(A and B) = P(B) P(A|B)
# Theorem 3: 
# P(A and B) = P(B and A) , commutative property
# apply theorem 2
# P(B)P(A|B) = P(A)P(B|A)
# P(A|B) = P(A)P(B|A)/P(B), Baye's Theorem

In [26]:
conditional (liberal, given = banker)

0.2239010989010989

In [27]:
prob(liberal) * conditional(banker, liberal) / prob(banker)

0.2239010989010989

In [None]:
# Law of Total Probability
#P(A) = P(B1 and A) + P(B2 and A)
#This theorem only applies if mutually exclusive of conditionally exhaustive

In [29]:
male = (gss['sex'] == 1)

In [30]:
prob(male & banker) + prob (female & banker)

0.014769730168391155

In [31]:
(prob(male)*conditional(banker, given=male) + prob(female) * conditional(banker, given=female))

0.014769730168391153

In [32]:
# summarizing P(A) = SigmaP(Bi)P(A|B)

In [33]:
B = gss['polviews']
B.value_counts().sort_index()

1.0     1442
2.0     5808
3.0     6243
4.0    18943
5.0     7940
6.0     7319
7.0     1595
Name: polviews, dtype: int64

In [34]:
i = 4
prob(B==i)*conditional(banker, B==i)

0.005822682085615744

In [35]:
sum(prob(B==i)*conditional(banker,B==i)
    for i in range(1,8))

0.014769730168391157

In [37]:
#Exercises
#Exercise 1-1
# Compute a) the probability that Linda is a female banker, b) the probability that Linda is a liberal female banker,
# and c) the probability that Linda is a liberal female banker and a Democrat

In [38]:
prob(female & banker)

0.011381618989653074

In [39]:
prob (liberal & female & banker)

0.002556299452221546

In [40]:
prob (liberal & female & banker & democrat)

0.0012375735443294787

In [43]:
#Exercise 1-2
# Use conditional to compute the following
# What is the probability that a respondent is liberal, given that they are a democrat?
# What is the probability that a respondent is a Democrat, given that they are liberal?

In [46]:
conditional (liberal, given=democrat)

0.3891320002215698

In [47]:
conditional (democrat, given=liberal)

0.5206403320240125

In [45]:
# who will you give my mic to today that is more deserving than me?  You give everything to everyone else
# and I get nothing forever, you give me hell by asking me to limit myself to only what serves you.  I have paid
# and you take more and more and more, I cannot even clothe myself.  You will leave me naked.  Nuns have more
# than me.

In [48]:
# Exercise 1-3
# young is under 30
# old is over 65

In [49]:
young = (gss['age'] < 30)
prob(young)

0.19435991073240008

In [52]:
old = (gss['age'] >= 65)
prob(old)

0.17328058429701765

In [None]:
# using prob and conditional, to compute the following probabilities
# What is the probability that a randomly chose respondent is a young liberal?
# What is the probably that a young person is a liberal?
# What fraction of respondents are old conservatives?
# What fraction of conservatives are old?

In [53]:
conservative = (gss['polviews'] >=5)
prob(conservative)

0.3419354838709677

In [54]:
prob (young & liberal)

0.06579427875836884

In [55]:
conditional (liberal, given=young)

0.338517745302714

In [56]:
prob (old & conservative)

0.06701156421180766

In [57]:
conditional (old, given=conservative)

0.19597721609113564