In [1]:
import pandas as pd
from scipy.stats import mannwhitneyu
import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv('feature_on_off_sept20.csv')
df.fillna(0, inplace=True)

In [4]:
df.head()

Unnamed: 0,uid,country,date,feature_on_off,rev,num_installs,spend,is_d1_ret,is_d7_ret
0,b04a520b-e241-47ee-908b-db05542d9345,KR,2023-09-17,ON,20.37,5,0.0,1,0
1,fc8c5f3f-9304-418d-bc5c-e16346a5cfd2,KR,2023-09-17,OFF,0.0,0,0.0,0,0
2,2b0eceb8-23b0-4e5e-9a76-ac56693efec2,KR,2023-09-17,ON,3.6,1,0.0,0,0
3,438851c2-5623-46c6-a12f-6918007cdb16,KR,2023-09-17,OFF,0.0,0,0.0,0,0
4,b5295845-0d2d-4282-acde-ce028ec86890,KR,2023-09-17,ON,0.0,0,0.0,0,0


## Feature ON Vs. Feature OFF in KR and DE

In [5]:
def test_significance(country, col):
    """Function to test the significance of 2 groups

    Args:
        country (str): country code
        col (str): column names
    """
    off = df[(df['country'] == country) & (df['feature_on_off'] == 'OFF')].groupby(['uid']).mean().reset_index()[['rev','spend','num_installs','is_d1_ret','is_d7_ret']]
    on = df[(df['country'] == country) & (df['feature_on_off'] == 'ON')].groupby(['uid']).mean().reset_index()[['rev','spend','num_installs','is_d1_ret','is_d7_ret']]
    data1 = on[[col]]
    data2 = off[[col]]
    
    _, p_value = mannwhitneyu(data1, data2, alternative='two-sided')
    
    return p_value

In [6]:
arpu_kr_p_value = test_significance('KR','rev')
anipu_kr_p_value = test_significance('KR','num_installs')
aspu_kr_p_value = test_significance('KR','spend')
d1_kr_p_value = test_significance('KR','is_d1_ret')
d7_kr_p_value = test_significance('KR','is_d7_ret')

arpu_de_p_value = test_significance('DE','rev')
anipu_de_p_value = test_significance('DE','num_installs')
aspu_de_p_value = test_significance('DE','spend')
d1_de_p_value = test_significance('DE','is_d1_ret')
d7_de_p_value = test_significance('DE','is_d7_ret')

print('---------KR---------')
print('ARPU p-value:', arpu_kr_p_value[0])
print('ANIPU p-value:', anipu_kr_p_value[0])
print('ASPU p-value:', aspu_kr_p_value[0])
print('d1_ret p-value:', d1_kr_p_value[0])
print('d7_ret p-value:', d7_kr_p_value[0])
print('\n')
print('---------DE---------')
print('ARPU p-value:', arpu_de_p_value[0])
print('ANIPU p-value:', anipu_de_p_value[0])
print('ASPU p-value:', aspu_de_p_value[0])
print('d1_ret p-value:', d1_de_p_value[0])
print('d7_ret p-value:', d7_de_p_value[0])

---------KR---------
ARPU p-value: 0.03188906818819176
ANIPU p-value: 0.035313522063377685
ASPU p-value: 0.6164659869685916
d1_ret p-value: 0.40056743444823206
d7_ret p-value: 1.0


---------DE---------
ARPU p-value: 0.3668853441821587
ANIPU p-value: 0.3281878340270178
ASPU p-value: 0.4705874610383869
d1_ret p-value: 0.9183966241999324
d7_ret p-value: 1.0


- **KR**:
   - **ARPU**
   - Null Hypothesis:
      - users who were exposed to the new signup page had a similar ARPU as those who had the feature off
   - Alternative Hypothesis:
      - Users who were exposed to the new signup page had a different ARPU as those who had the feature off
   - p-value: 0.195
      - Since the p-value (0.195) is not less tan our significance level 0.05, we fail to reject the null hypothesis
      - This means we do not have sufficient evidence to say that the true means are different between the groups
   - **ASPU**
   - Null Hypothesis:
      - users who were exposed to the new signup page had a similar ASPU as those who had the feature off
   - Alternative Hypothesis:
      - Users who were exposed to the new signup page had a different ASPU as those who had the feature off
   - p-value: 0.545
      - Since the p-value (0.545) is not less tan our significance level 0.05, we fail to reject the null hypothesis
      - This means we do not have sufficient evidence to say that the true means are different between the groups
   - **ANIPU**
   - Null Hypothesis:
      - users who were exposed to the new signup page had a similar ANIPU as those who had the feature off
   - Alternative Hypothesis:
      - Users who were exposed to the new signup page had a different ANIPU as those who had the feature off
   - p-value: 0.671
      - Since the p-value (0.671) is not less tan our significance level 0.05, we fail to reject the null hypothesis
      - This means we do not have sufficient evidence to say that the true means are different between the groups
   
   **Retention**
   - Null Hypothesis:
      - users who were exposed to the new signup page had a similar d1_retention as those who had the feature off
   - Alternative Hypothesis:
      - Users who were exposed to the new signup page had a different d1_Retention as those who had the feature off

- **DE**:
   - **ARPU**
   - Null Hypothesis:
      - users who were exposed to the new signup page had a similar ARPU as those who had the feature off
   - Alternative Hypothesis:
      - Users who were exposed to the new signup page had a different ARPU as those who had the feature off

   - **ASPU**
   - Null Hypothesis:
      - users who were exposed to the new signup page had a similar ASPU as those who had the feature off
   - Alternative Hypothesis:
      - Users who were exposed to the new signup page had a different ASPU as those who had the feature off

   - **ANIPU**
   - Null Hypothesis:
      - users who were exposed to the new signup page had a similar ANIPU as those who had the feature off
   - Alternative Hypothesis:
      - Users who were exposed to the new signup page had a different ANIPU as those who had the feature off
    

## Feature ON Signups

In [9]:
df = pd.read_csv('feature_on_off_diff_signup_page_sept20.csv')
df.fillna(0, inplace=True)

def test_significance(country, col, event_type1, event_type2):
    """Function to test the significance of 2 groups

    Args:
        country (str): country code
        col (str): column names
        event_type1 (str): signup screen
        event_type2 (str): signup screen
    """
    a = df[(df['country'] == country) & (df['event_type'] == event_type1)].groupby(['uid']).mean().reset_index()[['rev','num_installs','spend','is_d1_ret','is_d7_ret']]
    b = df[(df['country'] == country) & (df['event_type'] == event_type2)].groupby(['uid']).mean().reset_index()[['rev','num_installs','spend','is_d1_ret','is_d7_ret']]

    data1=a[[col]]
    data2=b[[col]]
    
    _, p_value = mannwhitneyu(data1, data2, alternative='two-sided')
    
    return p_value


In [11]:
arpu_kr_p_value = test_significance('KR', 'rev', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_FACEBOOK_CLICK')
anipu_kr_p_value = test_significance('KR', 'num_installs', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_FACEBOOK_CLICK')
aspu_kr_p_value = test_significance('KR', 'spend', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_FACEBOOK_CLICK')
d1_ret_kr_p_value = test_significance('KR', 'is_d1_ret', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_FACEBOOK_CLICK')
d7_ret_kr_p_value = test_significance('KR', 'is_d7_ret', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_FACEBOOK_CLICK')

arpu_kr_goog_p_value = test_significance('KR', 'rev', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_GOOGLE_CLICK')
anipu_kr_goog_p_value = test_significance('KR', 'num_installs', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_GOOGLE_CLICK')
aspu_kr_goog_p_value = test_significance('KR', 'spend', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_GOOGLE_CLICK')
d1_ret_goog_p_value = test_significance('KR', 'is_d1_ret', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_GOOGLE_CLICK')
d7_ret_goog_p_value = test_significance('KR', 'is_d7_ret', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_GOOGLE_CLICK')

arpu_kr_kk_p_value = test_significance('KR', 'rev', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')
anipu_kr_kk_p_value = test_significance('KR', 'num_installs', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')
aspu_kr_kk_p_value = test_significance('KR', 'spend', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')
d1_ret_kk_p_value = test_significance('KR', 'is_d1_ret', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')
d7_ret_kk_p_value = test_significance('KR', 'is_d7_ret', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')

arpu_kr_fbg_p_value = test_significance('KR', 'rev', 'SIGNUP_SCREEN_FACEBOOK_CLICK', 'SIGNUP_SCREEN_GOOGLE_CLICK')
anipu_kr_fbg_p_value = test_significance('KR', 'num_installs', 'SIGNUP_SCREEN_FACEBOOK_CLICK', 'SIGNUP_SCREEN_GOOGLE_CLICK')
aspu_kr_fbg_p_value = test_significance('KR', 'spend', 'SIGNUP_SCREEN_FACEBOOK_CLICK', 'SIGNUP_SCREEN_GOOGLE_CLICK')
d1_ret_fbg_p_value = test_significance('KR', 'is_d1_ret', 'SIGNUP_SCREEN_FACEBOOK_CLICK', 'SIGNUP_SCREEN_GOOGLE_CLICK')
d7_ret_fbg_p_value = test_significance('KR', 'is_d7_ret', 'SIGNUP_SCREEN_FACEBOOK_CLICK', 'SIGNUP_SCREEN_GOOGLE_CLICK')


arpu_kr_fbk_p_value = test_significance('KR', 'rev', 'SIGNUP_SCREEN_FACEBOOK_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')
anipu_kr_fbk_p_value = test_significance('KR', 'num_installs', 'SIGNUP_SCREEN_FACEBOOK_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')
aspu_kr_fbk_p_value = test_significance('KR', 'spend', 'SIGNUP_SCREEN_FACEBOOK_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')
d1_ret_fbk_p_value = test_significance('KR', 'is_d1_ret', 'SIGNUP_SCREEN_FACEBOOK_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')
d7_ret_fbk_p_value = test_significance('KR', 'is_d7_ret', 'SIGNUP_SCREEN_FACEBOOK_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')


arpu_kr_gk_p_value = test_significance('KR', 'rev', 'SIGNUP_SCREEN_GOOGLE_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')
anipu_kr_gk_p_value = test_significance('KR', 'num_installs', 'SIGNUP_SCREEN_GOOGLE_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')
aspu_kr_gk_p_value = test_significance('KR', 'spend', 'SIGNUP_SCREEN_GOOGLE_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')
d1_ret_gk_p_value = test_significance('KR', 'is_d1_ret', 'SIGNUP_SCREEN_GOOGLE_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')
d7_ret_gk_p_value = test_significance('KR', 'is_d7_ret', 'SIGNUP_SCREEN_GOOGLE_CLICK', 'SIGNUP_SCREEN_KAKAO_CLICK')


arpu_de_p_value = test_significance('DE','rev','SIGNUP_SCREEN_EMAIL_CLICK','SIGNUP_SCREEN_GOOGLE_CLICK')
anipu_de_p_value = test_significance('DE','num_installs','SIGNUP_SCREEN_EMAIL_CLICK','SIGNUP_SCREEN_GOOGLE_CLICK')
aspu_de_p_value = test_significance('DE','spend','SIGNUP_SCREEN_EMAIL_CLICK','SIGNUP_SCREEN_GOOGLE_CLICK')
d1_ret_de_p_value = test_significance('KR', 'is_d1_ret', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_GOOGLE_CLICK')
d7_ret_de_p_value = test_significance('KR', 'is_d7_ret', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_GOOGLE_CLICK')

arpu_de_efb_value = test_significance('DE','rev','SIGNUP_SCREEN_EMAIL_CLICK','SIGNUP_SCREEN_FACEBOOK_CLICK')
anipu_de_efb_value = test_significance('DE','num_installs','SIGNUP_SCREEN_EMAIL_CLICK','SIGNUP_SCREEN_FACEBOOK_CLICK')
aspu_de_efb_value = test_significance('DE','spend','SIGNUP_SCREEN_EMAIL_CLICK','SIGNUP_SCREEN_FACEBOOK_CLICK')
d1_ret_de_efb_value = test_significance('KR', 'is_d1_ret', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_FACEBOOK_CLICK')
d7_ret_de_efb_value = test_significance('KR', 'is_d7_ret', 'SIGNUP_SCREEN_EMAIL_CLICK', 'SIGNUP_SCREEN_FACEBOOK_CLICK')

arpu_de_fbg_value = test_significance('DE','rev','SIGNUP_SCREEN_FACEBOOK_CLICK','SIGNUP_SCREEN_GOOGLE_CLICK')
anipu_de_fbg_value = test_significance('DE','num_installs','SIGNUP_SCREEN_FACEBOOK_CLICK','SIGNUP_SCREEN_GOOGLE_CLICK')
aspu_de_fbg_value = test_significance('DE','spend','SIGNUP_SCREEN_FACEBOOK_CLICK','SIGNUP_SCREEN_GOOGLE_CLICK')
d1_ret_de_fbg_value = test_significance('KR', 'is_d1_ret', 'SIGNUP_SCREEN_FACEBOOK_CLICK', 'SIGNUP_SCREEN_GOOGLE_CLICK')
d7_ret_de_fbg_value = test_significance('KR', 'is_d7_ret', 'SIGNUP_SCREEN_FACEBOOK_CLICK', 'SIGNUP_SCREEN_GOOGLE_CLICK')

print('---------KR signup screen email---------')
print('ARPU p-value:', arpu_kr_p_value[0])
print('ANIPU p-value:', anipu_kr_p_value[0])
print('ASPU p-value:', aspu_kr_p_value[0])

print('ARPU p-value:', arpu_kr_goog_p_value[0])
print('ANIPU p-value:', anipu_kr_goog_p_value[0])
print('ASPU p-value:', aspu_kr_goog_p_value[0])

print('ARPU p-value:', arpu_kr_kk_p_value[0])
print('ANIPU p-value:', anipu_kr_kk_p_value[0])
print('ASPU p-value:', aspu_kr_kk_p_value[0])
print('d1_ret p-value:', d1_ret_kr_p_value[0])
print('d7_ret p-value:', d7_ret_kr_p_value[0])

print('---------KR signup screen FB---------')
print('ARPU p-value:', arpu_kr_fbg_p_value[0])
print('ANIPU p-value:', anipu_kr_fbg_p_value[0])
print('ASPU p-value:', aspu_kr_fbg_p_value[0])

print('ARPU p-value:', arpu_kr_fbk_p_value[0])
print('ANIPU p-value:', anipu_kr_fbk_p_value[0])
print('ASPU p-value:', aspu_kr_fbk_p_value[0])

print('d1_ret p-value:', d1_ret_fbk_p_value[0])
print('d7_ret p-value:', d7_ret_fbk_p_value[0])

print('---------KR signup screen google---------')

print('ARPU p-value:', arpu_kr_gk_p_value[0])
print('ANIPU p-value:', anipu_kr_gk_p_value[0])
print('ASPU p-value:', aspu_kr_gk_p_value[0])

print('d1_ret p-value:', d1_ret_gk_p_value[0])
print('d7_ret p-value:', d7_ret_gk_p_value[0])

print('\n')
print('---------DE signup email---------')
print('ARPU p-value:', arpu_de_p_value[0])
print('ANIPU p-value:', anipu_de_p_value[0])
print('ASPU p-value:', aspu_de_p_value[0])

print('ARPU p-value:', arpu_de_efb_value[0])
print('ANIPU p-value:', anipu_de_efb_value[0])
print('ASPU p-value:', aspu_de_efb_value[0])

print('ARPU p-value:', arpu_de_fbg_value[0])
print('ANIPU p-value:', anipu_de_fbg_value[0])
print('ASPU p-value:', aspu_de_fbg_value[0])


---------KR signup screen email---------
ARPU p-value: 0.0008158218213419495
ANIPU p-value: 0.0009028317769984234
ASPU p-value: 0.2147538802428015
ARPU p-value: 0.04041792373967355
ANIPU p-value: 0.032878836767651395
ASPU p-value: 0.3004523900502889
ARPU p-value: 0.008353573999444709
ANIPU p-value: 0.05058598375826673
ASPU p-value: 0.1502393162311649
d1_ret p-value: 0.005000099693913881
d7_ret p-value: 1.0
---------KR signup screen FB---------
ARPU p-value: 0.01923186501556598
ANIPU p-value: 0.02511117271754731
ASPU p-value: 0.03829125190366146
ARPU p-value: 3.220769477117707e-11
ANIPU p-value: 1.7075897529210215e-09
ASPU p-value: 0.017548371202310897
d1_ret p-value: 2.6937662454435503e-06
d7_ret p-value: 1.0
---------KR signup screen google---------
ARPU p-value: 8.139018569422819e-18
ANIPU p-value: 4.144134327334353e-14
ASPU p-value: 0.321986454470666
d1_ret p-value: 2.5793273211169714e-09
d7_ret p-value: 1.0


---------DE signup email---------
ARPU p-value: 1.2564694838347848e-11
AN