### 1. Loading Libraries

In [1]:
# Computation
import numpy as np
import pandas as pd 

# Visualization
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

# Statistics
from scipy.stats import shapiro
import scipy.stats as stats

# Utils
import warnings
import os

%matplotlib inline

In [4]:
# Packages
from scipy.stats import shapiro
import scipy.stats as stats

In [2]:
mpl.style.use("fivethirtyeight")

### 2. Loading Data

In [19]:
df = pd.read_csv('../data/final_df.csv')

### 3. A/B Testing

Assumptions:

1. Check normality
2. If Normal Distribution, check homogeneity

Steps:

1. Split & Define Control Group & Test Group
2. Apply Shapiro Test for normality
3. If parametric apply Levene Test for homogeneity of variances
4. If Parametric + homogeneity of variances apply T-Test
5. If Parametric - homogeneity of variances apply Welch Test
6. If Non-parametric apply Mann Whitney U Test directly

In [22]:
# Split A/B
dataframe = df
group = 'version'
target = 'retention_1'

groupA = dataframe[dataframe[group] == "gate_30"][target]
groupB = dataframe[dataframe[group] == "gate_40"][target]

In [23]:
groupA.shape, groupB.shape

((44699,), (45489,))

#### Shapiro-Wilk Test for normality

* `H_0`: The null hypothesis for this test is that the data are normally distributed.
* `H_A`: The alternative hypothesis is that the data is **not** normally distributed.

If the chosen alpha level is 0.05 and the p-value is less than 0.05, then the null hypothesis that the data are normally distributed is rejected. If the p-value is greater than 0.05, then the null hypothesis is not rejected.

In our case, both groupA and groupB are **not** normally distributed as the p-values are both less than 0.05.

In [26]:
shapiro(groupA)



ShapiroResult(statistic=0.6324949264526367, pvalue=0.0)

In [27]:
shapiro(groupB)



ShapiroResult(statistic=0.6315157413482666, pvalue=0.0)

In [24]:
# Assumption: Normality
ntA = shapiro(groupA)[1] < 0.05
ntB = shapiro(groupB)[1] < 0.05

# H0: Distribution is Normal! - False
# H1: Distribution is not Normal! - True



#### Mann-Whitney rank test

The Mann–Whitney U test (also called the Mann–Whitney–Wilcoxon (MWW), Wilcoxon rank-sum test, or Wilcoxon–Mann–Whitney test) is a **`nonparametric`** test:

* `H_0`: null hypothesis that, for randomly selected values X and Y from two populations, the probability of X being greater than Y is equal to the probability of Y being greater than X.
* `H_A`: alternative hypothesis that, for randomly selected values X and Y from two populations, the probability of X being greater than Y is **not** equal to the probability of Y being greater than X.

In our case, p-value < 0.05, we reject the null hypothesis.

In [28]:
stats.mannwhitneyu(groupA, groupB)

MannwhitneyuResult(statistic=1010642683.0, pvalue=0.03696119270061944)

In [None]:
# Non-Parametric Test
ttest = stats.mannwhitneyu(groupA, groupB)[1] 

# H0: M1 == M2 - False
# H1: M1 != M2 - True

#### A/B Testing function

In [30]:
# A/B Testing Function - Quick Solution
def AB_Test(dataframe, group, target):
    
    # Packages
    from scipy.stats import shapiro
    import scipy.stats as stats
    
    # Split A/B
    groupA = dataframe[dataframe[group] == "gate_30"][target]
    groupB = dataframe[dataframe[group] == "gate_40"][target]
    
    # Assumption: Normality
    ntA = shapiro(groupA)[1] < 0.05
    ntB = shapiro(groupB)[1] < 0.05
    # H0: Distribution is Normal! - False
    # H1: Distribution is not Normal! - True
    
    if (ntA == False) & (ntB == False): # "H0: Normal Distribution"
        # Parametric Test
        # Assumption: Homogeneity of variances
        leveneTest = stats.levene(groupA, groupB)[1] < 0.05
        # H0: Homogeneity: False
        # H1: Heterogeneous: True
        
        if leveneTest == False:
            # Homogeneity
            ttest = stats.ttest_ind(groupA, groupB, equal_var=True)[1]
            # H0: M1 == M2 - False
            # H1: M1 != M2 - True
        else:
            # Heterogeneous
            ttest = stats.ttest_ind(groupA, groupB, equal_var=False)[1]
            # H0: M1 == M2 - False
            # H1: M1 != M2 - True
    else:
        # Non-Parametric Test
        ttest = stats.mannwhitneyu(groupA, groupB)[1] 
        # H0: M1 == M2 - False
        # H1: M1 != M2 - True
        
    # Result
    temp = pd.DataFrame({
        "AB Hypothesis":[ttest < 0.05], 
        "p-value":[ttest]
    })
    temp["Test Type"] = np.where((ntA == False) & (ntB == False), "Parametric", "Non-Parametric")
    temp["AB Hypothesis"] = np.where(temp["AB Hypothesis"] == False, "Fail to Reject H0", "Reject H0")
    temp["Comment"] = np.where(temp["AB Hypothesis"] == "Fail to Reject H0", "A/B groups are similar!", "A/B groups are not similar!")
    
    # Columns
    if (ntA == False) & (ntB == False):
        temp["Homogeneity"] = np.where(leveneTest == False, "Yes", "No")
        temp = temp[["Test Type", "Homogeneity","AB Hypothesis", "p-value", "Comment"]]
    else:
        temp = temp[["Test Type","AB Hypothesis", "p-value", "Comment"]]
    
    # Print Hypothesis
    print("# A/B Testing Hypothesis")
    print("H0: A == B")
    print("H1: A != B", "\n")
    
    return temp

In [31]:
AB_Test(dataframe=df, group = "version", target = "sum_gamerounds")

# A/B Testing Hypothesis
H0: A == B
H1: A != B 



Unnamed: 0,Test Type,AB Hypothesis,p-value,Comment
0,Non-Parametric,Reject H0,0.025446,A/B groups are not similar!


In [32]:
AB_Test(dataframe=df, group = "version", target = "retention_1")

# A/B Testing Hypothesis
H0: A == B
H1: A != B 



Unnamed: 0,Test Type,AB Hypothesis,p-value,Comment
0,Non-Parametric,Reject H0,0.036961,A/B groups are not similar!


In [33]:
AB_Test(dataframe=df, group = "version", target = "retention_7")

# A/B Testing Hypothesis
H0: A == B
H1: A != B 



Unnamed: 0,Test Type,AB Hypothesis,p-value,Comment
0,Non-Parametric,Reject H0,0.000796,A/B groups are not similar!
