In [1]:
import pandas as pd

# Load both groups
control = pd.read_csv("control_group.csv", sep=';')
test = pd.read_csv("test_group.csv", sep=';')


In [2]:
control['group'] = 'control'
test['group'] = 'test'

# Combine into one DataFrame
df = pd.concat([control, test], ignore_index=True)


In [5]:
df.rename(columns={
    'Spend [USD]': 'Spend',
    ' of Impressions': 'Impressions',
    ' of Website Clicks': 'Website_Clicks',
    ' of Purchase': 'Purchases'
}, inplace=True)


In [7]:
print(control.columns.tolist())


['Campaign Name', 'Date', 'Spend [USD]', '# of Impressions', 'Reach', '# of Website Clicks', '# of Searches', '# of View Content', '# of Add to Cart', '# of Purchase', 'group']


In [8]:
def clean_columns(df):
    df.columns = (
        df.columns
        .str.strip()
        .str.replace('#', '', regex=False)
        .str.replace('[', '', regex=False)
        .str.replace(']', '', regex=False)
        .str.replace(' ', '_')
    )
    return df

control = clean_columns(control)
test = clean_columns(test)


In [9]:
control.rename(columns={'# of Purchase': 'Purchases', '# of Website Clicks': 'Website_Clicks'}, inplace=True)
test.rename(columns={'# of Purchase': 'Purchases', '# of Website Clicks': 'Website_Clicks'}, inplace=True)


In [12]:
print(df.columns.tolist())


['Campaign_Name', 'Date', 'Spend_USD', '_of_Impressions', 'Reach', '_of_Website_Clicks', '_of_Searches', '_of_View_Content', '_of_Add_to_Cart', '_of_Purchase', 'group']


In [13]:
df.rename(columns={
    '_of_Website_Clicks': 'Website_Clicks',
    '_of_Purchase': 'Purchases'
}, inplace=True)


In [14]:
df['conversion_rate'] = df['Purchases'] / df['Website_Clicks']


In [15]:
summary = df.groupby('group')[['Website_Clicks', 'Purchases']].sum()
summary['conversion_rate'] = summary['Purchases'] / summary['Website_Clicks']
print(summary)


         Website_Clicks  Purchases  conversion_rate
group                                              
control        154303.0    15161.0         0.098255
test           180970.0    15637.0         0.086407


In [16]:
from statsmodels.stats.proportion import proportions_ztest

counts = summary['Purchases'].values
nobs = summary['Website_Clicks'].values

z_stat, p_val = proportions_ztest(count=counts, nobs=nobs)
print("Z-statistic:", z_stat)
print("P-value:", p_val)


Z-statistic: 11.8386745814439
P-value: 2.4631475628426749e-32
