# Marketing and customer analytics (MCAN) Group Assignment
## Q4 Display Advertising Assessment
## Group 9
* Name | Email | PGID


---


* Pradeep Kara | Pradeep_Kara_ampba2025S@isb.edu |  12410055
* Siddhesh Vanikar | Siddhesh_Vanikar_ampba2025S@isb.edu |  12410069
* Himanshu Dubey | Himanshu_Dubey_ampba2025S@isb.edu | 12410051
* Anagh Mahajan | Anagh_Mahajan_ampba2025S@isb.edu | 12410067

In [12]:
!pip install openpyxl
import pandas as pd
import numpy as np
from scipy.stats import beta, t

In [7]:
!git clone https://github.com/harss123/Display-Advertising-Assessment.git

fatal: destination path 'Display-Advertising-Assessment' already exists and is not an empty directory.


In [26]:
# Load data directly from GitHub raw links
clicks_url = "https://raw.githubusercontent.com/harss123/Display-Advertising-Assessment/main/clicks.dataset.2.xlsx"
volumes_url = "https://raw.githubusercontent.com/harss123/Display-Advertising-Assessment/main/volumes.dataset.2.xlsx"


In [27]:
clicks_df = pd.read_excel(clicks_url, header=None) # header=None to avoid using first row as header
volumes_df = pd.read_excel(volumes_url)

In [28]:
# Set column names for clicks_df
clicks_df.columns = ['metric'] + [str(i) for i in range(1, 6)]  # assuming 5 ad campaigns

In [29]:
# Extract click data
clicks = clicks_df.loc[0, '1':'5'].astype(int).values
exposures = clicks_df.loc[1, '1':'5'].astype(int).values

In [30]:
# Compute alpha and beta for Beta distribution (CTR)
alpha = clicks + 1
beta_params = exposures - clicks + 1

In [31]:
# Number of simulations
n_sim = 100000

In [32]:
# Simulate CTR draws
ctr_draws = np.array([
    beta.rvs(a=a, b=b, size=n_sim)
    for a, b in zip(alpha, beta_params)
])

In [33]:
# Compute posterior probability of highest CTR
ctr_highest_probs = np.mean(ctr_draws.argmax(axis=0) == np.arange(5)[:, None], axis=1)


In [34]:
# Post-click volume stats
volume_stats = volumes_df.groupby("ad")["volume"].agg(['mean', 'std', 'count']).reindex([1, 2, 3, 4, 5])
means = volume_stats['mean'].values
stds = volume_stats['std'].values
counts = volume_stats['count'].values
dfs = counts - 1
ses = stds / np.sqrt(counts)

In [35]:
# Simulate post-click volume draws from t-distribution
volume_draws = np.array([
    t.rvs(df=df, size=n_sim) * se + mean
    for df, se, mean in zip(dfs, ses, means)
])

In [36]:
# Compute posterior probability of highest volume
volume_highest_probs = np.mean(volume_draws.argmax(axis=0) == np.arange(5)[:, None], axis=1)


In [37]:
# Compute EVI draws (CTR * volume)
evi_draws = ctr_draws * volume_draws

In [38]:
# Compute posterior probability of highest EVI
evi_highest_probs = np.mean(evi_draws.argmax(axis=0) == np.arange(5)[:, None], axis=1)

In [39]:
# Output results
for i in range(5):
    print(f"Ad {i+1}:")
    print(f"  P(highest CTR) = {ctr_highest_probs[i]:.4f}")
    print(f"  P(highest Volume) = {volume_highest_probs[i]:.4f}")
    print(f"  P(highest EVI) = {evi_highest_probs[i]:.4f}\n")

Ad 1:
  P(highest CTR) = 0.0014
  P(highest Volume) = 0.0000
  P(highest EVI) = 0.0000

Ad 2:
  P(highest CTR) = 0.0276
  P(highest Volume) = 0.0000
  P(highest EVI) = 0.0067

Ad 3:
  P(highest CTR) = 0.0202
  P(highest Volume) = 0.0000
  P(highest EVI) = 0.0007

Ad 4:
  P(highest CTR) = 0.0803
  P(highest Volume) = 0.0000
  P(highest EVI) = 0.0080

Ad 5:
  P(highest CTR) = 0.8704
  P(highest Volume) = 1.0000
  P(highest EVI) = 0.9846

