In [1]:
import pandas as pd
import numpy as np

In [2]:
books = pd.read_csv('ArtHistBooks.csv')
books

Unnamed: 0,ArtBooks,HistoryBooks,TableBooks,Purchase
0,0,0,1,0
1,0,1,0,0
2,0,0,0,0
3,1,0,1,0
4,1,1,1,0
...,...,...,...,...
995,1,1,0,1
996,0,1,0,0
997,1,0,1,0
998,1,1,0,0


### Change into categorical variables

In [3]:
books['ArtBooks'] = books['ArtBooks'].apply(lambda x:1 if x > 0 else 0)
books['HistoryBooks'] = books['HistoryBooks'].apply(lambda x:1 if x > 0 else 0)
books['TableBooks'] = books['TableBooks'].apply(lambda x:1 if x > 0 else 0)
books

Unnamed: 0,ArtBooks,HistoryBooks,TableBooks,Purchase
0,0,0,1,0
1,0,1,0,0
2,0,0,0,0
3,1,0,1,0
4,1,1,1,0
...,...,...,...,...
995,1,1,0,1
996,0,1,0,0
997,1,0,1,0
998,1,1,0,0


### Part 1: No info on prior  

$\theta_{prior} \sim Beta(1,1) = Unif(0,1)$

$\therefore \alpha_{prior} = \beta_{prior} = 1$

In [4]:
# Beta(alpha = 1, beta = 1) = Unif(0,1)

a_prior_art = 1
a_prior_history = 1
a_prior_table = 1
a_prior = [a_prior_art, a_prior_history, a_prior_table]

b_prior_art = 1
b_prior_history = 1
b_prior_table = 1
b_prior = [b_prior_art, b_prior_history, b_prior_table]

$\theta_{prior} = \dfrac{\alpha_{prior}}{\alpha_{prior} + \beta_{prior}} = \frac{1}{2}$

In [5]:
theta_prior = [a_prior_art / (a_prior_art + b_prior_art), a_prior_history / (a_prior_history + b_prior_history), 
               a_prior_table / (a_prior_table + b_prior_table)]
theta_prior

[0.5, 0.5, 0.5]

$X_i = \Sigma{x_i}$

In [6]:
X_art = books['ArtBooks'].sum()
X_history = books['HistoryBooks'].sum()
X_table = books['TableBooks'].sum()

In [7]:
X = [X_art, X_history, X_table]

$\alpha_{post} = \alpha_{prior} + \Sigma{x_i}$

In [8]:
a_post_art = a_prior_art + X_art # alpha_post = a_prior + X
a_post_history = a_prior_history + X_history
a_post_table = a_prior_table + X_table

In [9]:
a_post = [a_post_art, a_post_history, a_post_table]

$\beta_{post} = N - \Sigma{x_i} + \beta_{prior}$

In [10]:
b_post_art = 1000 - X_art + b_prior_art # beta = N - sigma(x) + beta prior
b_post_history = 1000 - X_history + b_prior_history
b_post_table = 1000 - X_table + b_prior_table

In [11]:
b_post = [b_post_art, b_post_history, b_post_table]

$\hat{\theta_{post}} = E(\theta | x) = \dfrac{\alpha_{post}}{\alpha_{post} + \beta_{post}}$

In [12]:
theta_art = a_post_art / (a_post_art + b_post_art)
theta_post = a_post_history / (a_post_history + b_post_history)
theta_table = a_post_table / (a_post_table + b_post_table)

In [13]:
theta_post = [theta_art, theta_post, theta_table]

Likelihood : $p(x|\theta) = \prod_{i=1}^{N}\theta ^ {x_i} (1-\theta)^{1-x_i}   : Bin(\theta, N)$

In [14]:
likelihood_art = ((0.5) ** 302) * (0.5 ** (1000 - 302)) # OR, 0.5 ** 1000
likelihood_history = ((0.5) ** 458) * (0.5 ** (1000 - 458))
likelihood_table = ((0.5) ** 380) * (0.5 ** (1000 - 380))
likelihood = [likelihood_art, likelihood_history, likelihood_table]
likelihood

[9.332636185032189e-302, 9.332636185032189e-302, 9.332636185032189e-302]

In [15]:
df = pd.DataFrame({'X_i': X, 'a_prior': a_prior, 'b_prior' : b_prior, 'theta_prior': theta_prior,
                   'likelihood': likelihood,'a_post': a_post, 'b_post': b_post, 'theta_post': theta_post})
df.index = ["Art", "History", "Table"]
df

Unnamed: 0,X_i,a_prior,b_prior,theta_prior,likelihood,a_post,b_post,theta_post
Art,301,1,1,0.5,9.332636e-302,302,700,0.301397
History,543,1,1,0.5,9.332636e-302,544,458,0.542914
Table,380,1,1,0.5,9.332636e-302,381,621,0.38024


### Part 2: No info on prior

$\theta_{prior} \sim Beta(1,1) = Unif(0,1)$

$\therefore \alpha_{prior} = \beta_{prior} = 1$

In [16]:
art_cases = [0,1,0,0,1,1,0,1]
history_cases = [0,0,1,0,1,0,1,1]
table_cases = [0,0,0,1,0,1,1,1]

In [17]:
cases = pd.DataFrame({'Art': art_cases, 'History': history_cases, 'Table': table_cases})
cases

Unnamed: 0,Art,History,Table
0,0,0,0
1,1,0,0
2,0,1,0
3,0,0,1
4,1,1,0
5,1,0,1
6,0,1,1
7,1,1,1


In [18]:
cases['a_prior'] = [1] * 8
cases['b_prior'] = [1] * 8
cases['theta_prior'] = [0.5] * 8
cases

Unnamed: 0,Art,History,Table,a_prior,b_prior,theta_prior
0,0,0,0,1,1,0.5
1,1,0,0,1,1,0.5
2,0,1,0,1,1,0.5
3,0,0,1,1,1,0.5
4,1,1,0,1,1,0.5
5,1,0,1,1,1,0.5
6,0,1,1,1,1,0.5
7,1,1,1,1,1,0.5


In [19]:
X_cases = []
for i in range(0,8):
    curr_case = cases.loc[i]
    case = len(books[(books['ArtBooks'] == curr_case[0]) & (books['HistoryBooks'] == curr_case[1]) 
                     & (books['TableBooks'] == curr_case[2])])
    X_cases.append(case)
X_cases    

[193, 76, 251, 134, 100, 54, 121, 71]

In [20]:
purchases = []
for i in range(0,8):
    curr_case = cases.loc[i]
    target_case = books[(books['ArtBooks'] == curr_case[0]) & (books['HistoryBooks'] == curr_case[1]) 
                     & (books['TableBooks'] == curr_case[2])]
    purchase = target_case[['Purchase']].sum().values[0]
    purchases.append(purchase)
    
purchases

[4, 12, 10, 3, 16, 4, 14, 26]

In [21]:
cases = cases.assign(N = X_cases, X = purchases)
cases['N-X'] = cases['N'] - cases['X']
cases['a_post'] = cases['a_prior'] + cases['X']
cases['b_post'] = cases['b_prior'] + cases['N-X']
cases['theta_post'] = cases['a_post'] / (cases['a_post'] + cases['b_post'])
cases

Unnamed: 0,Art,History,Table,a_prior,b_prior,theta_prior,N,X,N-X,a_post,b_post,theta_post
0,0,0,0,1,1,0.5,193,4,189,5,190,0.025641
1,1,0,0,1,1,0.5,76,12,64,13,65,0.166667
2,0,1,0,1,1,0.5,251,10,241,11,242,0.043478
3,0,0,1,1,1,0.5,134,3,131,4,132,0.029412
4,1,1,0,1,1,0.5,100,16,84,17,85,0.166667
5,1,0,1,1,1,0.5,54,4,50,5,51,0.089286
6,0,1,1,1,1,0.5,121,14,107,15,108,0.121951
7,1,1,1,1,1,0.5,71,26,45,27,46,0.369863


### Part 3: Compute (1) with beta priors that show strong weighting for low likelihood of purchase

Beta(5, 95): $mean = \frac{5}{5+95} = 0.05$

In [26]:
weighted_a_prior = [5] * 3
weighted_b_prior = [95] * 3
weighted_theta_prior = [0.05] * 3
weighted_a_post = [weighted_a_prior[i] + X[i] for i in range(0,3)]
weighted_b_post = [weighted_b_prior[i] + X[i] for i in range(0,3)]
weighted_theta_post = [weighted_a_post[i] / (weighted_a_post[i] + weighted_b_post[i]) for i in range(0,3)]

In [27]:
weighted_df = pd.DataFrame({'X_i': X, 'a_prior': weighted_a_prior, 'b_prior' : weighted_b_prior, 
                            'theta_prior': weighted_theta_prior, 'a_post': weighted_a_post, 
                            'b_post': weighted_b_post, 'theta_post': weighted_theta_post})
weighted_df.index = ["Art", "History", "Table"]
weighted_df

Unnamed: 0,X_i,a_prior,b_prior,theta_prior,a_post,b_post,theta_post
Art,301,5,95,0.05,306,396,0.435897
History,543,5,95,0.05,548,638,0.462057
Table,380,5,95,0.05,385,475,0.447674


### Part 4: Compute (1) with beta priors that show strong weighting for low likelihood of purchase

Beta(5, 95): $mean = \frac{5}{5+95} = 0.05$

In [28]:
weighted_cases = cases.copy()
weighted_cases['a_prior'] = [5] * 8
weighted_cases['b_prior'] = [95] * 8
weighted_cases['theta_prior'] = [0.05] * 8
weighted_cases = weighted_cases.assign(N = X_cases, X = purchases)
weighted_cases['N-X'] = weighted_cases['N'] - weighted_cases['X']
weighted_cases['a_post'] = weighted_cases['a_prior'] + weighted_cases['X']
weighted_cases['b_post'] = weighted_cases['b_prior'] + weighted_cases['N-X']
weighted_cases['theta_post'] = weighted_cases['a_post'] / (weighted_cases['a_post'] + weighted_cases['b_post'])
weighted_cases

Unnamed: 0,Art,History,Table,a_prior,b_prior,theta_prior,N,X,N-X,a_post,b_post,theta_post
0,0,0,0,5,95,0.05,193,4,189,9,284,0.030717
1,1,0,0,5,95,0.05,76,12,64,17,159,0.096591
2,0,1,0,5,95,0.05,251,10,241,15,336,0.042735
3,0,0,1,5,95,0.05,134,3,131,8,226,0.034188
4,1,1,0,5,95,0.05,100,16,84,21,179,0.105
5,1,0,1,5,95,0.05,54,4,50,9,145,0.058442
6,0,1,1,5,95,0.05,121,14,107,19,202,0.085973
7,1,1,1,5,95,0.05,71,26,45,31,140,0.181287
