In [21]:
import pandas as pd
from scipy.stats import mannwhitneyu
import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv('email_feature_raw_data.csv')
df.fillna(0, inplace=True)

## Feature ON Vs. Feature OFF in KR and DE

In [22]:
def test_significance(country, col):
    """Function to test the significance of 2 groups

    Args:
        country (str): country code
        col (str): column names
    """
    off = df[(df['country'] == country) & (df['feature_on_off'] == 'OFF')].groupby(['uid']).mean().reset_index()[['rev','spend','num_installs']]
    on = df[(df['country'] == country) & (df['feature_on_off'] == 'ON')].groupby(['uid']).mean().reset_index()[['rev','spend','num_installs']]
    data1 = on[[col]]
    data2 = off[[col]]
    
    _, p_value = mannwhitneyu(data1, data2, alternative='two-sided')
    
    return p_value

In [23]:
arpu_kr_p_value = test_significance('KR','rev')
anipu_kr_p_value = test_significance('KR','num_installs')
aspu_kr_p_value = test_significance('KR','spend')

arpu_de_p_value = test_significance('DE','rev')
anipu_de_p_value = test_significance('DE','num_installs')
aspu_de_p_value = test_significance('DE','spend')

print('---------KR---------')
print('ARPU p-value:', arpu_kr_p_value[0])
print('ANIPU p-value:', anipu_kr_p_value[0])
print('ASPU p-value:', aspu_kr_p_value[0])
print('\n')
print('---------DE---------')
print('ARPU p-value:', arpu_de_p_value[0])
print('ANIPU p-value:', anipu_de_p_value[0])
print('ASPU p-value:', aspu_de_p_value[0])

---------KR---------
ARPU p-value: 0.7643202353764017
ANIPU p-value: 0.6707058963463418
ASPU p-value: 0.5448189937814227


---------DE---------
ARPU p-value: 0.19534020211174785
ANIPU p-value: 0.2098068654142219
ASPU p-value: 0.9943619002867307


- **KR**:
   - **ARPU**
   - Null Hypothesis:
      - users who were exposed to the new signup page had a similar ARPU as those who had the feature off
   - Alternative Hypothesis:
      - Users who were exposed to the new signup page had a different ARPU as those who had the feature off
   - p-value: 0.195
      - Since the p-value (0.195) is not less tan our significance level 0.05, we fail to reject the null hypothesis
      - This means we do not have sufficient evidence to say that the true means are different between the groups
   - **ASPU**
   - Null Hypothesis:
      - users who were exposed to the new signup page had a similar ASPU as those who had the feature off
   - Alternative Hypothesis:
      - Users who were exposed to the new signup page had a different ASPU as those who had the feature off
   - p-value: 0.545
      - Since the p-value (0.545) is not less tan our significance level 0.05, we fail to reject the null hypothesis
      - This means we do not have sufficient evidence to say that the true means are different between the groups
   - **ANIPU**
   - Null Hypothesis:
      - users who were exposed to the new signup page had a similar ANIPU as those who had the feature off
   - Alternative Hypothesis:
      - Users who were exposed to the new signup page had a different ANIPU as those who had the feature off
   - p-value: 0.671
      - Since the p-value (0.671) is not less tan our significance level 0.05, we fail to reject the null hypothesis
      - This means we do not have sufficient evidence to say that the true means are different between the groups

- **DE**:
   - **ARPU**
   - Null Hypothesis:
      - users who were exposed to the new signup page had a similar ARPU as those who had the feature off
   - Alternative Hypothesis:
      - Users who were exposed to the new signup page had a different ARPU as those who had the feature off
   - p-value: 0.764
      - Since the p-value (0.764) is not less tan our significance level 0.05, we fail to reject the null hypothesis
      - This means we do not have sufficient evidence to say that the true means are different between the groups
   - **ASPU**
   - Null Hypothesis:
      - users who were exposed to the new signup page had a similar ASPU as those who had the feature off
   - Alternative Hypothesis:
      - Users who were exposed to the new signup page had a different ASPU as those who had the feature off
   - p-value: 0.994
      - Since the p-value (0.994) is not less tan our significance level 0.05, we fail to reject the null hypothesis
      - This means we do not have sufficient evidence to say that the true means are different between the groups
   - **ANIPU**
   - Null Hypothesis:
      - users who were exposed to the new signup page had a similar ANIPU as those who had the feature off
   - Alternative Hypothesis:
      - Users who were exposed to the new signup page had a different ANIPU as those who had the feature off
   - p-value: 0.210
      - Since the p-value (0.210) is not less tan our significance level 0.05, we fail to reject the null hypothesis
      - This means we do not have sufficient evidence to say that the true means are different between the groups
    

## Feature ON Signups

In [33]:
df = pd.read_csv('email_signup_screens_raw.csv')
df.fillna(0, inplace=True)

def test_significance(country, col, event_type1, event_type2):
    """Function to test the significance of 2 groups

    Args:
        country (str): country code
        col (str): column names
        event_type1 (str): signup screen
        event_type2 (str): signup screen
    """
    a = df[(df['country'] == country) & (df['event_type'] == event_type1)].groupby(['uid']).mean().reset_index()[['rev','num_installs','spend']]
    b = df[(df['country'] == country) & (df['event_type'] == event_type2)].groupby(['uid']).mean().reset_index()[['rev','num_installs','spend']]

    data1=a[[col]]
    data2=b[[col]]
    
    _, p_value = mannwhitneyu(data1, data2, alternative='two-sided')
    
    return p_value

In [39]:
arpu_kr_p_value = test_significance('KR', 'rev', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_FACEBOOK_CLICK')
anipu_kr_p_value = test_significance('KR', 'num_installs', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_FACEBOOK_CLICK')
aspu_kr_p_value = test_significance('KR', 'spend', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_FACEBOOK_CLICK')

arpu_kr_goog_p_value = test_significance('KR', 'rev', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_GOOGLE_CLICK')
anipu_kr_goog_p_value = test_significance('KR', 'num_installs', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_GOOGLE_CLICK')
aspu_kr_goog_p_value = test_significance('KR', 'spend', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_GOOGLE_CLICK')

arpu_kr_kk_p_value = test_significance('KR', 'rev', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')
anipu_kr_kk_p_value = test_significance('KR', 'num_installs', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')
aspu_kr_kk_p_value = test_significance('KR', 'spend', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')

arpu_kr_fbg_p_value = test_significance('KR', 'rev', 'SIGNUP_SCREEN_FACEBOOK_CLICK', 'SIGNUP_SCREEN_GOOGLE_CLICK')
anipu_kr_fbg_p_value = test_significance('KR', 'num_installs', 'SIGNUP_SCREEN_FACEBOOK_CLICK', 'SIGNUP_SCREEN_GOOGLE_CLICK')
aspu_kr_fbg_p_value = test_significance('KR', 'spend', 'SIGNUP_SCREEN_FACEBOOK_CLICK', 'SIGNUP_SCREEN_GOOGLE_CLICK')

arpu_kr_fbk_p_value = test_significance('KR', 'rev', 'SIGNUP_SCREEN_FACEBOOK_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')
anipu_kr_fbk_p_value = test_significance('KR', 'num_installs', 'SIGNUP_SCREEN_FACEBOOK_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')
aspu_kr_fbk_p_value = test_significance('KR', 'spend', 'SIGNUP_SCREEN_FACEBOOK_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')

arpu_kr_gk_p_value = test_significance('KR', 'rev', 'SIGNUP_SCREEN_GOOGLE_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')
anipu_kr_gk_p_value = test_significance('KR', 'num_installs', 'SIGNUP_SCREEN_GOOGLE_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')
aspu_kr_gk_p_value = test_significance('KR', 'spend', 'SIGNUP_SCREEN_GOOGLE_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')

arpu_de_p_value = test_significance('DE','rev','SIGNUP_SCREEN_EMAIL_CLICK','SIGNUP_SCREEN_GOOGLE_CLICK')
anipu_de_p_value = test_significance('DE','num_installs','SIGNUP_SCREEN_EMAIL_CLICK','SIGNUP_SCREEN_GOOGLE_CLICK')
aspu_de_p_value = test_significance('DE','spend','SIGNUP_SCREEN_EMAIL_CLICK','SIGNUP_SCREEN_GOOGLE_CLICK')

print('---------KR signup screen email---------')
print('ARPU p-value:', arpu_kr_p_value[0])
print('ANIPU p-value:', anipu_kr_p_value[0])
print('ASPU p-value:', aspu_kr_p_value[0])

print('ARPU p-value:', arpu_kr_goog_p_value[0])
print('ANIPU p-value:', anipu_kr_goog_p_value[0])
print('ASPU p-value:', aspu_kr_goog_p_value[0])

print('ARPU p-value:', arpu_kr_kk_p_value[0])
print('ANIPU p-value:', anipu_kr_kk_p_value[0])
print('ASPU p-value:', aspu_kr_kk_p_value[0])
print('---------KR signup screen FB---------')
print('ARPU p-value:', arpu_kr_fbg_p_value[0])
print('ANIPU p-value:', anipu_kr_fbg_p_value[0])
print('ASPU p-value:', aspu_kr_fbg_p_value[0])

print('ARPU p-value:', arpu_kr_fbk_p_value[0])
print('ANIPU p-value:', anipu_kr_fbk_p_value[0])
print('ASPU p-value:', aspu_kr_fbk_p_value[0])
print('---------KR signup screen google---------')

print('ARPU p-value:', arpu_kr_gk_p_value[0])
print('ANIPU p-value:', anipu_kr_gk_p_value[0])
print('ASPU p-value:', aspu_kr_gk_p_value[0])
print('\n')
print('---------DE signup email---------')
print('ARPU p-value:', arpu_de_p_value[0])
print('ANIPU p-value:', anipu_de_p_value[0])
print('ASPU p-value:', aspu_de_p_value[0])

---------KR signup screen email---------
ARPU p-value: 0.5613702592685887
ANIPU p-value: 0.5970310367569236
ASPU p-value: 0.29128663132406163
ARPU p-value: 0.7347523916920529
ANIPU p-value: 0.9480116391472009
ASPU p-value: 0.7750436494507966
ARPU p-value: 0.40594362245091997
ANIPU p-value: 0.42395231845092773
ASPU p-value: 0.7198169676274526
---------KR signup screen FB---------
ARPU p-value: 0.6879393075273876
ANIPU p-value: 0.5336107106545557
ASPU p-value: 0.36143195892588464
ARPU p-value: 0.12843897157538484
ANIPU p-value: 0.13477596257353092
ASPU p-value: 0.19717306459684192
---------KR signup screen google---------
ARPU p-value: 0.034859445837015605
ANIPU p-value: 0.11822741327884949
ASPU p-value: 0.20449636432216955


---------DE signup email---------
ARPU p-value: 0.00033268564359943654
ANIPU p-value: 0.00019781740041833256
ASPU p-value: 0.1835013679198042
