# A/B Test with BigQuery and GA4

### Import Libraries

In [1]:
# Basics
import numpy as np
import pandas as pd
import warnings
import os 
warnings.filterwarnings('ignore')

# Connector to BigQuery
from google.cloud import bigquery
from google.oauth2 import service_account

# A/B Test Analytics Libraries
import scipy.stats as stats
from scipy.stats import chi2_contingency
from statsmodels.stats import proportion

# Data Viz
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

### Connect to bigquery

In [2]:
#gcp service acount key
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'C:/Users/charl\OneDrive/Desktop/my-awesome-site-key.json'
# initiates BQ service
bigquery_client = bigquery.Client()

In [3]:
QUERY = """
SELECT
 *
FROM
  `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_20210131`
   WHERE user_pseudo_ID IS NOT NULL
  """

In [4]:
#save bigquery results into pandas df
Query_Results = bigquery_client.query(QUERY)
df = Query_Results.to_dataframe()

### Data Prep

In [5]:
df['purchase'] = np.where(df['event_name']!= 'purchase', 1, 0)

In [6]:
population_users=df[['user_pseudo_id', 'purchase']]
population_users=population_users.groupby(['user_pseudo_id']).sum().reset_index()

# A/B Test

### Step 1: Get Null Hypothesis

* The null hypothesis is the claim that the effect being studied does not exist. 

* Type I error, or a false positive, is the rejection of the null hypothesis when it is actually true. 

* Type II error, or a false negative, is the failure to reject a null hypothesis that is actually false.

In [7]:
#The null is the population mean does not change

population_mean = population_users.purchase.mean()
population_std = population_users.purchase.std()

### Step 2 Statistical Significance of Test

Statistical significance refers to the claim that a set of observed data are not the result of chance but can instead be attributed to a specific cause.  

Statistical Significance,Alpha, is the probability of rejecting the null hypothesis when the null hypothesis is true

In [8]:
alpha=.05

### Step 3 Power of Test

Power is the likelihood of discovering an effect, provided that the effect exists. To put it another way, power is the likelihood of rejecting a false null hypothesis. 

In [9]:
power_test = 0.8

### Step 4 Effect Size of Test

Effect Size represents the magnitude of difference between averages of test and control group. It is the variance in averages between test and control groups divided by the standard deviation of the control.

In [11]:
target=12

effect_size=(target-population_mean)/population_std

### Step 5 Test Group Size

In [12]:
test_size=.5
ratio= (1-test_size)/test_size

### Step 5 Sample Size of Test

In [13]:
from statsmodels.stats import power
ss_analysis = power.TTestIndPower()

ss_result = ss_analysis.solve_power(effect_size=effect_size, power= power_test ,alpha=alpha, ratio=ratio, nobs1=None) 
print(ss_result)

1561.0429968972683


### Step 6 Run the Test

Get Total Population

In [14]:
population=df[['user_pseudo_id', 'purchase']]
population=population.groupby(['user_pseudo_id']).sum().reset_index()

#Assign random value to users
population['index_col'] = population.index

population['Sample']="Out of Sample"
population.loc[population.index_col <=(ss_result),'Sample'] = 'In Sample'

Get Sample

In [15]:
#get sample
sample=population[population["Sample"] == 'In Sample']

#test group size
test_size=.5

sample['Group']='Control'
sample.loc[sample.index_col <=(ss_result*test_size),'Group'] = 'Test'

### Step 8 Statistical Inference

* p-value : A p-value, or probability value, is a number describing how likely it is that your data would have occurred by random chance (i.e., that the null hypothesis is true).

In [16]:
test_results = sample[sample.Group == 'Test'].purchase.mean()
control_results = sample[sample.Group == 'Control'].purchase.mean()

test_std = sample[sample.Group == 'Test'].purchase.std()
control_std = sample[sample.Group == 'Control'].purchase.std()

z-test

z-test is a statistical test used to determine whether two population means are different when the variances are known and the sample size is large(sample_size>30).

In [17]:
from scipy.stats import norm

z_score = (test_results - control_results) / np.sqrt(test_std**2 + control_std**2)
print(f"z-score is {z_score:0.3f}, with p-value {norm().sf(z_score):0.3f}")


z-score is -0.023, with p-value 0.509


2 Sample T-Test

A t-test is an inferential statistic used to determine if there is a statistically significant difference between the means of two variables. 1 tail looks for a change in 1 direction, two tail t-test looks for a change in either direction

In [18]:
from scipy import stats

t_stat, p_val = stats.ttest_ind(sample[sample.Group == 'Control'].purchase,
                                sample[sample.Group == 'Test'].purchase) 

if p_val < 0.05:
    print("We reject the null hypothesis")
else:
    print("We fail to reject the null hypothesis")  

We fail to reject the null hypothesis


1 Sample T-Test

In [19]:
# perform one sample t-test 
t_statistic, p_value = stats.ttest_1samp(a= sample.purchase, popmean=population_mean) 
print(t_statistic , p_value)

-0.6838105401919836 0.494196301692688
