In [1]:
import numpy as np
import pandas as pd
import warnings

import statsmodels.stats.api as sms

from scipy import stats as st


In [2]:
warnings.filterwarnings("ignore", category=FutureWarning)

In [3]:
df = pd.read_csv('data/cookie_cats.csv')

In [4]:
df

Unnamed: 0,userid,version,sum_gamerounds,retention_1,retention_7
0,116,gate_30,3,False,False
1,337,gate_30,38,True,False
2,377,gate_40,165,True,False
3,483,gate_40,1,False,False
4,488,gate_40,179,True,True
...,...,...,...,...,...
90184,9999441,gate_40,97,True,False
90185,9999479,gate_40,30,False,False
90186,9999710,gate_30,28,True,False
90187,9999768,gate_40,51,True,False


In [5]:
df.dtypes

userid             int64
version           object
sum_gamerounds     int64
retention_1         bool
retention_7         bool
dtype: object

In [6]:
df.isna().any()

userid            False
version           False
sum_gamerounds    False
retention_1       False
retention_7       False
dtype: bool

#### - Checking whether there are no duplicates by 'userid'

In [7]:
session_counts = df['userid'].value_counts(ascending=False)
multi_users = session_counts[session_counts > 1].count()

print(f'There is {multi_users} users that occur multiple times in the dataset')

There is 0 users that occur multiple times in the dataset


#### -Calculating the main conversion rates

In [8]:
conversion_rates = df.groupby('version')['retention_7']
std_p = lambda x: np.std(x)              
se_p = lambda x: st.sem(x)            

conversion_rates = conversion_rates.agg(['mean', std_p, se_p])
conversion_rates.columns = ['conversion_rate', 'std_deviation', 'std_error']
conversion_rates

Unnamed: 0_level_0,conversion_rate,std_deviation,std_error
version,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
gate_30,0.190201,0.39246,0.001856
gate_40,0.182,0.385845,0.001809


### - Testing (Z-Test)

H0: the 'gate_30' version maintains retention at the same level as the 'gate_40' version seven days after installing the game.  
H1: the 'gate_30' version maintains better or wors retention than the 'gate_40' version seven days after installing the game.

In [9]:
number_of_observations = df.groupby('version')['userid'].count()

In [10]:
n_control = number_of_observations.loc['gate_30']
n_treat = number_of_observations.loc['gate_40']
display(n_control, n_treat)

44700

45489

In [11]:
observed_values = pd.crosstab(df["version"], df["retention_7"])
successes = (observed_values.loc['gate_30'][1], observed_values.loc['gate_40'][1])
successes

(8502, 8279)

In [12]:
alpha=0.05

In [13]:
z_stat, pval = sms.proportions_ztest(successes, [n_control, n_treat])

In [14]:
(lower_con, lower_treat), (upper_con, upper_treat) = sms.proportion_confint(successes, [n_control, n_treat], alpha=alpha)

In [15]:
print(f'z statistic: {z_stat:.2f}')
print(f'p-value: {pval:.3f}')
print(f'95% Confidence Interval for the control group: [{lower_con:.3f}, {upper_con:.3f}]')
print(f'95% Confidence Interval for the treatment group: [{lower_treat:.3f}, {upper_treat:.3f}]')

if(pval < alpha):
    print("Reject H0")
else:
    print("H0 fail to reject")

z statistic: 3.16
p-value: 0.002
95% Confidence Interval for the control group: [0.187, 0.194]
95% Confidence Interval for the treatment group: [0.178, 0.186]
Reject H0


#### Conclusion:  
Based on the results of the conducted test, we can conclude that the difference between the conversion rates for the Control and Test groups is statistically significant. Additionally, calculations of confidence intervals indicate that they do not overlap. Furthermore, upon examining the conversion rate values for both groups, we can infer that the implemented changes resulted in a poorer user retention outcome in Test group. Therefore, there is no justification for implementing this change for all users.

### - Testing (Chi-square Test)

H0: the 'retention_7' value doesn't depend on the game version  
H1: there is dependency between the game version and 'retention_7' value

In [16]:
alpha = 0.05

In [17]:
res = st.chi2_contingency(observed_values)


In [18]:
print(f'statistic: {res.statistic:.2f}')
print(f'p-value: {res.pvalue:.3f}')

if(res.pvalue < alpha):
    print("Reject H0")
else:
    print("H0 fail to reject")

statistic: 9.96
p-value: 0.002
Reject H0


#### Conclusion:  
According to the test results there is dependency between the game version and 'retention_7' value