In [22]:
import pandas as pd
import numpy as np
import scipy.stats as stats


clicks_data = pd.read_excel("clicks.dataset.2.xlsx")
volumes_data = pd.read_excel("volumes.dataset.2.xlsx")



In [23]:
# Load data
clicks_data

Unnamed: 0,ad,1,2,3,4,5
0,clicks,52,38,51,45,25
1,exposures,1000,1000,1000,1000,1000


In [24]:
volumes_data

Unnamed: 0,cust,ad,volume
0,1,1,32
1,2,1,54
2,3,1,31
3,4,1,24
4,5,1,42
...,...,...,...
206,207,5,47
207,208,5,61
208,209,5,63
209,210,5,102


In [25]:


# Extract clicks and exposures
clicks = clicks_data.iloc[0, 1:].values
exposures = clicks_data.iloc[1, 1:].values

# Compute click-through rates (CTR)
ctr = clicks / exposures

# Compute volumes and average volumes per click
volumes = volumes_data.groupby('ad')['volume'].sum().values
avg_volumes = volumes / clicks

# Bayesian posterior probability computations
num_draws = 100000
campaigns = len(clicks)

# Initialize matrices to store draws
ctr_draws = np.zeros((num_draws, campaigns))
avg_volume_draws = np.zeros((num_draws, campaigns))

# Generate draws from beta distribution for CTR
for i in range(campaigns):
    alpha = clicks[i] + 1
    beta = exposures[i] - clicks[i] + 1
    ctr_draws[:, i] = stats.beta.rvs(a=alpha, b=beta, size=num_draws)

# Generate draws from t distribution for average volume per click
for i in range(campaigns):
    n = clicks[i]
    sample_mean = avg_volumes[i]
    sample_std = np.std(volumes_data[volumes_data['ad'] == i+1]['volume'])
    std_error = sample_std / np.sqrt(n)
    df = n - 1
    avg_volume_draws[:, i] = sample_mean + std_error * stats.t.rvs(df=df, size=num_draws)

# Compute expected volume per exposure (EVI) draws
evi_draws = ctr_draws * avg_volume_draws

# Compute posterior probabilities
ctr_prob = (ctr_draws == ctr_draws.max(axis=1)[:, None]).mean(axis=0)
avg_volume_prob = (avg_volume_draws == avg_volume_draws.max(axis=1)[:, None]).mean(axis=0)
evi_prob = (evi_draws == evi_draws.max(axis=1)[:, None]).mean(axis=0)

# Create a DataFrame to display the results
results = pd.DataFrame({
    'Campaign': range(1, campaigns + 1),
    'CTR': ctr,
    'Average Volume per Click': avg_volumes,
    'Posterior Probability of Highest CTR': ctr_prob,
    'Posterior Probability of Highest Average Volume per Click': avg_volume_prob,
    'Posterior Probability of Highest EVI': evi_prob
})


results


Unnamed: 0,Campaign,CTR,Average Volume per Click,Posterior Probability of Highest CTR,Posterior Probability of Highest Average Volume per Click,Posterior Probability of Highest EVI
0,1,0.052,42.076923,0.47051,0.0,0.16904
1,2,0.038,57.289474,0.01627,1e-05,0.19974
2,3,0.051,41.803922,0.39374,0.0,0.13126
3,4,0.045,47.888889,0.11947,0.0,0.1594
4,5,0.025,90.64,1e-05,0.99999,0.34056


In [26]:
results[['Campaign','Posterior Probability of Highest CTR']]

Unnamed: 0,Campaign,Posterior Probability of Highest CTR
0,1,0.47051
1,2,0.01627
2,3,0.39374
3,4,0.11947
4,5,1e-05


In [27]:
results[['Campaign','Posterior Probability of Highest Average Volume per Click']]

Unnamed: 0,Campaign,Posterior Probability of Highest Average Volume per Click
0,1,0.0
1,2,1e-05
2,3,0.0
3,4,0.0
4,5,0.99999


In [28]:
sorted_results = results[['Campaign', 'Posterior Probability of Highest EVI']].sort_values(by='Posterior Probability of Highest EVI', ascending=False)

# Display the sorted DataFrame
print(sorted_results)

   Campaign  Posterior Probability of Highest EVI
4         5                               0.34056
1         2                               0.19974
0         1                               0.16904
3         4                               0.15940
2         3                               0.13126
