In [1]:
import pandas as pd
import numpy as np

In [2]:
books = pd.read_csv('ArtHistBooks.csv')
books

Unnamed: 0,ArtBooks,HistoryBooks,TableBooks,Purchase
0,0,0,1,0
1,0,1,0,0
2,0,0,0,0
3,1,0,1,0
4,1,1,1,0
...,...,...,...,...
995,1,1,0,1
996,0,1,0,0
997,1,0,1,0
998,1,1,0,0


### Change into categorical variables

In [3]:
books['ArtBooks'] = books['ArtBooks'].apply(lambda x:1 if x > 0 else 0)
books['HistoryBooks'] = books['HistoryBooks'].apply(lambda x:1 if x > 0 else 0)
books['TableBooks'] = books['TableBooks'].apply(lambda x:1 if x > 0 else 0)
books

Unnamed: 0,ArtBooks,HistoryBooks,TableBooks,Purchase
0,0,0,1,0
1,0,1,0,0
2,0,0,0,0
3,1,0,1,0
4,1,1,1,0
...,...,...,...,...
995,1,1,0,1
996,0,1,0,0
997,1,0,1,0
998,1,1,0,0


### No info on prior:  

$\theta_{prior} \sim Beta(1,1) = Unif(0,1)$

$\therefore \alpha_{prior} = \beta_{prior} = 1$

In [4]:
# Beta(alpha = 1, beta = 1) = Unif(0,1)

a_prior_art = 1
a_prior_history = 1
a_prior_table = 1
a_prior = [a_prior_art, a_prior_history, a_prior_table]

b_prior_art = 1
b_prior_history = 1
b_prior_table = 1
b_prior = [b_prior_art, b_prior_history, b_prior_table]

$\theta_{prior} = \dfrac{\alpha_{prior}}{\alpha_{prior} + \beta_{prior}} = \frac{1}{2}$

In [5]:
theta_prior = [a_prior_art / (a_prior_art + b_prior_art), a_prior_history / (a_prior_history + b_prior_history), 
               a_prior_table / (a_prior_table + b_prior_table)]
theta_prior

[0.5, 0.5, 0.5]

$X_i = \Sigma{x_i}$

In [6]:
X_art = books['ArtBooks'].sum()
X_history = books['HistoryBooks'].sum()
X_table = books['TableBooks'].sum()

In [7]:
X = [X_art, X_history, X_table]

$\alpha_{post} = \alpha_{prior} + \Sigma{x_i}$

In [8]:
a_post_art = a_prior_art + X_art # alpha_post = a_prior + X
a_post_history = a_prior_history + X_history
a_post_table = a_prior_table + X_table

In [9]:
a_post = [a_post_art, a_post_history, a_post_table]

$\beta_{post} = N - \Sigma{x_i} + \beta_{prior}$

In [10]:
b_post_art = 1000 - X_art + b_prior_art # beta = N - sigma(x) + beta prior
b_post_history = 1000 - X_history + b_prior_history
b_post_table = 1000 - X_table + b_prior_table

In [11]:
b_post = [b_post_art, b_post_history, b_post_table]

$\hat{\theta_{post}} = E(\theta | x) = \dfrac{\alpha_{post}}{\alpha_{post} + \beta_{post}}$

In [12]:
theta_art = a_post_art / (a_post_art + b_post_art)
theta_post = a_post_history / (a_post_history + b_post_history)
theta_table = a_post_table / (a_post_table + b_post_table)

In [13]:
theta_post = [theta_art, theta_post, theta_table]

Likelihood : $p(x|\theta) = \prod_{i=1}^{N}\theta ^ {x_i} (1-\theta)^{1-x_i}   : Bin(\theta, N)$

In [14]:
likelihood_art = ((0.5) ** 302) * (0.5 ** (1000 - 302)) # OR, 0.5 ** 1000
likelihood_history = ((0.5) ** 458) * (0.5 ** (1000 - 458))
likelihood_table = ((0.5) ** 380) * (0.5 ** (1000 - 380))
likelihood = [likelihood_art, likelihood_history, likelihood_table]
likelihood

[9.332636185032189e-302, 9.332636185032189e-302, 9.332636185032189e-302]

In [15]:
df = pd.DataFrame({'X_i': X, 'a_prior': a_prior, 'b_prior' : b_prior, 'theta_prior': theta_prior,
                   'likelihood': likelihood,'a_post': a_post, 'b_post': b_post, 'theta_post': theta_post})
df.index = ["Art", "History", "Table"]
df

Unnamed: 0,X_i,a_prior,b_prior,theta_prior,likelihood,a_post,b_post,theta_post
Art,301,1,1,0.5,9.332636e-302,302,700,0.301397
History,543,1,1,0.5,9.332636e-302,544,458,0.542914
Table,380,1,1,0.5,9.332636e-302,381,621,0.38024


### Part 2

In [16]:
art_cases = [0,1,0,0,1,1,0,1]
history_cases = [0,0,1,0,1,0,1,1]
table_cases = [0,0,0,1,0,1,1,1]

In [26]:
cases = pd.DataFrame({'Art': art_cases, 'History': history_cases, 'Table': table_cases})
cases

Unnamed: 0,Art,History,Table
0,0,0,0
1,1,0,0
2,0,1,0
3,0,0,1
4,1,1,0
5,1,0,1
6,0,1,1
7,1,1,1


In [27]:
cases['a_prior'] = [1] * 8
cases['b_prior'] = [1] * 8
cases

Unnamed: 0,Art,History,Table,a_prior,b_prior
0,0,0,0,1,1
1,1,0,0,1,1
2,0,1,0,1,1
3,0,0,1,1,1
4,1,1,0,1,1
5,1,0,1,1,1
6,0,1,1,1,1
7,1,1,1,1,1


In [28]:
X_cases = []
for i in range(0,8):
    curr_case = cases.loc[i]
    case = len(books[(books['ArtBooks'] == curr_case[0]) & (books['HistoryBooks'] == curr_case[1]) 
                     & (books['TableBooks'] == curr_case[2])])
    X_cases.append(case)
X_cases    

[193, 76, 251, 134, 100, 54, 121, 71]

In [29]:
purchases = []
for i in range(0,8):
    curr_case = cases.loc[i]
    target_case = books[(books['ArtBooks'] == curr_case[0]) & (books['HistoryBooks'] == curr_case[1]) 
                     & (books['TableBooks'] == curr_case[2])]
    purchase = target_case[['Purchase']].sum().values[0]
    purchases.append(purchase)
    
purchases

[4, 12, 10, 3, 16, 4, 14, 26]

In [30]:
cases = cases.assign(N = X_cases, X = purchases)
cases

Unnamed: 0,Art,History,Table,a_prior,b_prior,N,X
0,0,0,0,1,1,193,4
1,1,0,0,1,1,76,12
2,0,1,0,1,1,251,10
3,0,0,1,1,1,134,3
4,1,1,0,1,1,100,16
5,1,0,1,1,1,54,4
6,0,1,1,1,1,121,14
7,1,1,1,1,1,71,26


In [31]:
cases['N-X'] = cases['N'] - cases['X']
cases

Unnamed: 0,Art,History,Table,a_prior,b_prior,N,X,N-X
0,0,0,0,1,1,193,4,189
1,1,0,0,1,1,76,12,64
2,0,1,0,1,1,251,10,241
3,0,0,1,1,1,134,3,131
4,1,1,0,1,1,100,16,84
5,1,0,1,1,1,54,4,50
6,0,1,1,1,1,121,14,107
7,1,1,1,1,1,71,26,45


In [32]:
sum(X_cases)

1000

In [33]:
cases['a_post'] = cases['a_prior'] + cases['X']
cases['b_post'] = cases['b_prior'] + cases['N-X']
cases

Unnamed: 0,Art,History,Table,a_prior,b_prior,N,X,N-X,a_post,b_post
0,0,0,0,1,1,193,4,189,5,190
1,1,0,0,1,1,76,12,64,13,65
2,0,1,0,1,1,251,10,241,11,242
3,0,0,1,1,1,134,3,131,4,132
4,1,1,0,1,1,100,16,84,17,85
5,1,0,1,1,1,54,4,50,5,51
6,0,1,1,1,1,121,14,107,15,108
7,1,1,1,1,1,71,26,45,27,46


In [34]:
cases['theta_prior'] = cases['a_post'] / (cases['a_post'] + cases['b_post'])

#cases = cases.assign(a_art_prior = a_art_prior, a_history_prior = a_history_prior, a_table_prior = a_table_prior)
cases

Unnamed: 0,Art,History,Table,a_prior,b_prior,N,X,N-X,a_post,b_post,theta_prior
0,0,0,0,1,1,193,4,189,5,190,0.025641
1,1,0,0,1,1,76,12,64,13,65,0.166667
2,0,1,0,1,1,251,10,241,11,242,0.043478
3,0,0,1,1,1,134,3,131,4,132,0.029412
4,1,1,0,1,1,100,16,84,17,85,0.166667
5,1,0,1,1,1,54,4,50,5,51,0.089286
6,0,1,1,1,1,121,14,107,15,108,0.121951
7,1,1,1,1,1,71,26,45,27,46,0.369863
