In [38]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy import stats
from scipy.stats import skew, kurtosis
from scipy.stats import ttest_ind, mannwhitneyu
import statsmodels.api as sm
%matplotlib inline
from Functions import clean_duplicates, clean_nan_variations, split_dataframe_by_variation, check_for_duplicates, calculate_error_rate

import pandas as pd
from scipy.stats import chi2_contingency
from statsmodels.stats.proportion import proportions_ztest

# Importing data

In [2]:
df1 = pd.read_csv("../Data/Raw/df_final_demo.txt")

In [3]:
df2 = pd.read_csv("../Data/Raw/df_final_experiment_clients.txt")

In [4]:
df3 = pd.read_csv("../Data/Raw/df_final_web_data_pt_1.txt")

In [5]:
df4 = pd.read_csv("../Data/Raw/df_final_web_data_pt_2.txt")

# Preparing data

In [6]:
df34 = pd.concat([df3, df4], ignore_index=True)

In [7]:
merged_df = pd.merge(df1, df2, on='client_id', how='inner')

In [8]:
df = pd.merge(merged_df, df34, on='client_id', how='inner')

# EDA - Exploratory Data Analysis

## Initial Exploration

In [9]:
df1.head()

Unnamed: 0,client_id,clnt_tenure_yr,clnt_tenure_mnth,clnt_age,gendr,num_accts,bal,calls_6_mnth,logons_6_mnth
0,836976,6.0,73.0,60.5,U,2.0,45105.3,6.0,9.0
1,2304905,7.0,94.0,58.0,U,2.0,110860.3,6.0,9.0
2,1439522,5.0,64.0,32.0,U,2.0,52467.79,6.0,9.0
3,1562045,16.0,198.0,49.0,M,2.0,67454.65,3.0,6.0
4,5126305,12.0,145.0,33.0,F,2.0,103671.75,0.0,3.0


In [10]:
df1.dtypes

client_id             int64
clnt_tenure_yr      float64
clnt_tenure_mnth    float64
clnt_age            float64
gendr                object
num_accts           float64
bal                 float64
calls_6_mnth        float64
logons_6_mnth       float64
dtype: object

In [11]:
df2.head()

Unnamed: 0,client_id,Variation
0,9988021,Test
1,8320017,Test
2,4033851,Control
3,1982004,Test
4,9294070,Control


In [12]:
df34.head()

Unnamed: 0,client_id,visitor_id,visit_id,process_step,date_time
0,9988021,580560515_7732621733,781255054_21935453173_531117,step_3,2017-04-17 15:27:07
1,9988021,580560515_7732621733,781255054_21935453173_531117,step_2,2017-04-17 15:26:51
2,9988021,580560515_7732621733,781255054_21935453173_531117,step_3,2017-04-17 15:19:22
3,9988021,580560515_7732621733,781255054_21935453173_531117,step_2,2017-04-17 15:19:13
4,9988021,580560515_7732621733,781255054_21935453173_531117,step_3,2017-04-17 15:18:04


## Exploring numerical and categorical variables

In [13]:
df.head(20)

Unnamed: 0,client_id,clnt_tenure_yr,clnt_tenure_mnth,clnt_age,gendr,num_accts,bal,calls_6_mnth,logons_6_mnth,Variation,visitor_id,visit_id,process_step,date_time
0,836976,6.0,73.0,60.5,U,2.0,45105.3,6.0,9.0,Test,427070339_1413275162,228976764_46825473280_96584,confirm,2017-04-02 11:51:13
1,836976,6.0,73.0,60.5,U,2.0,45105.3,6.0,9.0,Test,427070339_1413275162,228976764_46825473280_96584,confirm,2017-04-02 11:47:50
2,836976,6.0,73.0,60.5,U,2.0,45105.3,6.0,9.0,Test,427070339_1413275162,228976764_46825473280_96584,confirm,2017-04-02 11:46:45
3,836976,6.0,73.0,60.5,U,2.0,45105.3,6.0,9.0,Test,427070339_1413275162,228976764_46825473280_96584,step_3,2017-04-02 11:23:08
4,836976,6.0,73.0,60.5,U,2.0,45105.3,6.0,9.0,Test,427070339_1413275162,228976764_46825473280_96584,step_2,2017-04-02 11:22:24
5,836976,6.0,73.0,60.5,U,2.0,45105.3,6.0,9.0,Test,427070339_1413275162,228976764_46825473280_96584,step_1,2017-04-02 11:21:38
6,836976,6.0,73.0,60.5,U,2.0,45105.3,6.0,9.0,Test,427070339_1413275162,228976764_46825473280_96584,start,2017-04-02 11:21:28
7,836976,6.0,73.0,60.5,U,2.0,45105.3,6.0,9.0,Test,427070339_1413275162,104438405_2368283624_817211,start,2017-03-29 11:02:44
8,836976,6.0,73.0,60.5,U,2.0,45105.3,6.0,9.0,Test,427070339_1413275162,104438405_2368283624_817211,start,2017-03-29 11:01:40
9,836976,6.0,73.0,60.5,U,2.0,45105.3,6.0,9.0,Test,427070339_1413275162,104438405_2368283624_817211,start,2017-03-29 10:59:43


In [14]:
df.select_dtypes("number").nunique().sort_values(ascending=False)

client_id           70609
bal                 70333
clnt_tenure_mnth      482
clnt_age              165
clnt_tenure_yr         54
logons_6_mnth           9
num_accts               8
calls_6_mnth            8
dtype: int64

In [15]:
display(df)

Unnamed: 0,client_id,clnt_tenure_yr,clnt_tenure_mnth,clnt_age,gendr,num_accts,bal,calls_6_mnth,logons_6_mnth,Variation,visitor_id,visit_id,process_step,date_time
0,836976,6.0,73.0,60.5,U,2.0,45105.30,6.0,9.0,Test,427070339_1413275162,228976764_46825473280_96584,confirm,2017-04-02 11:51:13
1,836976,6.0,73.0,60.5,U,2.0,45105.30,6.0,9.0,Test,427070339_1413275162,228976764_46825473280_96584,confirm,2017-04-02 11:47:50
2,836976,6.0,73.0,60.5,U,2.0,45105.30,6.0,9.0,Test,427070339_1413275162,228976764_46825473280_96584,confirm,2017-04-02 11:46:45
3,836976,6.0,73.0,60.5,U,2.0,45105.30,6.0,9.0,Test,427070339_1413275162,228976764_46825473280_96584,step_3,2017-04-02 11:23:08
4,836976,6.0,73.0,60.5,U,2.0,45105.30,6.0,9.0,Test,427070339_1413275162,228976764_46825473280_96584,step_2,2017-04-02 11:22:24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
449826,5602139,21.0,254.0,59.5,F,3.0,157498.73,7.0,7.0,,962766819_57063135389,158544612_4105226940_43043,confirm,2017-04-05 11:48:02
449827,5602139,21.0,254.0,59.5,F,3.0,157498.73,7.0,7.0,,962766819_57063135389,158544612_4105226940_43043,step_3,2017-04-05 11:44:07
449828,5602139,21.0,254.0,59.5,F,3.0,157498.73,7.0,7.0,,962766819_57063135389,158544612_4105226940_43043,step_2,2017-04-05 11:43:10
449829,5602139,21.0,254.0,59.5,F,3.0,157498.73,7.0,7.0,,962766819_57063135389,884309593_49820690623_654151,step_1,2017-04-05 11:41:32


# Cleaning Data

In [16]:
df_cleaned = clean_duplicates(df)

In [17]:
df_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
Index: 96773 entries, 410021 to 118359
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   client_id         96773 non-null  int64  
 1   clnt_tenure_yr    96751 non-null  float64
 2   clnt_tenure_mnth  96751 non-null  float64
 3   clnt_age          96749 non-null  float64
 4   gendr             96751 non-null  object 
 5   num_accts         96751 non-null  float64
 6   bal               96751 non-null  float64
 7   calls_6_mnth      96751 non-null  float64
 8   logons_6_mnth     96751 non-null  float64
 9   Variation         69110 non-null  object 
 10  visitor_id        96773 non-null  object 
 11  visit_id          96773 non-null  object 
 12  process_step      96773 non-null  object 
 13  date_time         96773 non-null  object 
dtypes: float64(7), int64(1), object(6)
memory usage: 11.1+ MB


In [18]:
df_no_nan = clean_nan_variations(df_cleaned)
df_no_nan

Unnamed: 0,client_id,clnt_tenure_yr,clnt_tenure_mnth,clnt_age,gendr,num_accts,bal,calls_6_mnth,logons_6_mnth,Variation,visitor_id,visit_id,process_step,date_time
315552,7179755,28.0,343.0,67.5,M,3.0,352322.35,2.0,5.0,Control,167765295_97487764427,264484508_5982901710_928530,start,2017-03-15 00:19:28
66111,4192640,14.0,174.0,54.0,M,2.0,97914.40,3.0,6.0,Test,692067844_75217592829,706721307_85347845958_18583,start,2017-03-15 00:43:23
47610,6752370,38.0,464.0,22.0,M,2.0,56574.43,2.0,5.0,Test,258848572_66112715827,147442660_10728929690_338280,start,2017-03-15 05:37:35
133132,9584408,7.0,87.0,48.5,U,2.0,71834.25,2.0,5.0,Control,748244138_48778380454,484298588_81471639218_981974,start,2017-03-15 06:03:24
23242,2685910,12.0,149.0,57.5,F,2.0,135363.65,6.0,9.0,Test,321566510_66607009808,538067801_99657933298_140950,start,2017-03-15 07:12:47
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
189150,1558312,23.0,276.0,64.0,F,2.0,27918.86,2.0,5.0,Control,397562195_28573798537,175513303_32086468382_224926,start,2017-06-20 23:13:35
277305,1545935,4.0,59.0,24.0,F,3.0,52280.32,5.0,8.0,Test,976873968_42239039722,10056419_61656665964_803695,start,2017-06-20 23:13:46
275362,6800679,15.0,189.0,46.0,M,3.0,158703.72,6.0,9.0,Test,185313147_57950282409,401866349_68590569683_395732,start,2017-06-20 23:14:06
230919,291330,29.0,349.0,68.0,M,2.0,62155.41,6.0,9.0,Control,992344309_24884468623,91879336_25502670457_529422,start,2017-06-20 23:21:34


In [19]:
df_no_nan['date_time'] = pd.to_datetime(df_no_nan['date_time'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_no_nan['date_time'] = pd.to_datetime(df_no_nan['date_time'])


In [20]:
df_no_nan.info()

<class 'pandas.core.frame.DataFrame'>
Index: 69110 entries, 315552 to 118359
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   client_id         69110 non-null  int64         
 1   clnt_tenure_yr    69090 non-null  float64       
 2   clnt_tenure_mnth  69090 non-null  float64       
 3   clnt_age          69088 non-null  float64       
 4   gendr             69090 non-null  object        
 5   num_accts         69090 non-null  float64       
 6   bal               69090 non-null  float64       
 7   calls_6_mnth      69090 non-null  float64       
 8   logons_6_mnth     69090 non-null  float64       
 9   Variation         69110 non-null  object        
 10  visitor_id        69110 non-null  object        
 11  visit_id          69110 non-null  object        
 12  process_step      69110 non-null  object        
 13  date_time         69110 non-null  datetime64[ns]
dtypes: datetime64[ns](1),

In [21]:
rename_genders = {'F': 'Female', 'M': 'Male', 'U': 'Unknown'}
df_no_nan['gendr'].replace(rename_genders, inplace=True)
df_no_nan = df_no_nan[df_no_nan['gendr'] != 'X']
print(df_no_nan['gendr'].value_counts())

gendr
Male       23673
Unknown    22898
Female     22517
Name: count, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_no_nan['gendr'].replace(rename_genders, inplace=True)


In [22]:
df_no_nan['clnt_age'].value_counts()

clnt_age
62.5    897
58.5    897
59.5    895
57.5    854
52.5    843
       ... 
89.5      4
92.5      4
96.0      2
95.5      1
94.5      1
Name: count, Length: 158, dtype: int64

In [23]:
bins = [0, 18, 65, np.inf]
names = ['1: 0-18', '2: 19-65', '3: 65+']
df_no_nan['age_group'] = pd.cut(df_no_nan['clnt_age'], bins, labels=names)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_no_nan['age_group'] = pd.cut(df_no_nan['clnt_age'], bins, labels=names)


In [24]:
df_no_nan['age_group'].value_counts()

age_group
2: 19-65    57758
3: 65+      11153
1: 0-18       175
Name: count, dtype: int64

# Spliting Data

In [25]:
df_split = split_dataframe_by_variation(df_no_nan)
df_control = df_split['Control']
df_test = df_split['Test']

In [26]:
df_control = df_control.sort_values(by=['client_id', 'visit_id', 'process_step', 'date_time'])
df_control

Unnamed: 0,client_id,clnt_tenure_yr,clnt_tenure_mnth,clnt_age,gendr,num_accts,bal,calls_6_mnth,logons_6_mnth,Variation,visitor_id,visit_id,process_step,date_time,age_group
302517,1028,12.0,145.0,36.0,Male,3.0,103520.22,1.0,4.0,Control,42237450_62128060588,557292053_87239438319_391157,start,2017-04-08 18:51:28,2: 19-65
132464,1104,5.0,66.0,48.0,Unknown,3.0,154643.94,6.0,9.0,Control,194240915_18158000533,543158812_46395476577_767725,start,2017-06-12 07:49:18,2: 19-65
132465,1104,5.0,66.0,48.0,Unknown,3.0,154643.94,6.0,9.0,Control,194240915_18158000533,643221571_99977972121_69283,start,2017-06-20 22:31:33,2: 19-65
284963,1186,8.0,99.0,22.0,Unknown,2.0,31662.52,0.0,3.0,Control,446844663_31615102958,507052512_11309370126_442139,start,2017-04-08 15:59:16,2: 19-65
284962,1186,8.0,99.0,22.0,Unknown,2.0,31662.52,0.0,3.0,Control,446844663_31615102958,795373564_99931517312_810896,start,2017-04-08 18:05:02,2: 19-65
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
216462,9997470,6.0,82.0,36.0,Unknown,2.0,53543.03,0.0,3.0,Control,91394485_75296404278,655572400_94971272893_411965,start,2017-04-07 16:11:03,2: 19-65
216477,9997470,6.0,82.0,36.0,Unknown,2.0,53543.03,0.0,3.0,Control,395791369_55562604618,761490147_96352537762_21814,start,2017-05-09 15:55:48,2: 19-65
216461,9997470,6.0,82.0,36.0,Unknown,2.0,53543.03,0.0,3.0,Control,395791369_55562604618,904791598_9725982898_416914,start,2017-04-20 20:04:38,2: 19-65
143012,9998346,50.0,602.0,61.5,Female,2.0,149881.38,6.0,9.0,Control,292425655_16607136645,189177304_69869411700_783154,start,2017-03-29 15:25:00,2: 19-65


In [27]:
df_test = df_test.sort_values(by=['client_id', 'visit_id', 'process_step', 'date_time'])
df_test

Unnamed: 0,client_id,clnt_tenure_yr,clnt_tenure_mnth,clnt_age,gendr,num_accts,bal,calls_6_mnth,logons_6_mnth,Variation,visitor_id,visit_id,process_step,date_time,age_group
255461,555,3.0,46.0,29.5,Unknown,2.0,25454.66,2.0,6.0,Test,402506806_56087378777,637149525_38041617439_716659,start,2017-04-15 12:57:56,2: 19-65
9749,647,12.0,151.0,57.5,Male,2.0,30525.80,0.0,4.0,Test,66758770_53988066587,40369564_40101682850_311847,start,2017-04-12 15:41:28,2: 19-65
83864,934,9.0,109.0,51.0,Female,2.0,32522.88,0.0,3.0,Test,810392784_45004760546,7076463_57954418406_971348,start,2017-04-18 02:36:30,2: 19-65
105341,1336,48.0,576.0,42.0,Male,4.0,130537.18,6.0,9.0,Test,920624746_32603333901,583743392_96265099036_939815,start,2017-05-08 06:05:12,2: 19-65
105336,1336,48.0,576.0,42.0,Male,4.0,130537.18,6.0,9.0,Test,920624746_32603333901,614001770_19101025926_112779,confirm,2017-05-08 08:21:38,2: 19-65
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20600,9999626,9.0,113.0,35.0,Male,2.0,36642.88,6.0,9.0,Test,52633065_71189986073,182723364_27031318473_880901,start,2017-05-14 09:07:51,2: 19-65
310194,9999729,10.0,124.0,31.0,Female,3.0,107059.74,6.0,9.0,Test,843385170_36953471821,493310979_9209676464_421146,start,2017-04-20 14:21:27,2: 19-65
310199,9999729,10.0,124.0,31.0,Female,3.0,107059.74,6.0,9.0,Test,834634258_21862004160,870243567_56915814033_814203,start,2017-05-08 16:08:25,2: 19-65
310190,9999729,10.0,124.0,31.0,Female,3.0,107059.74,6.0,9.0,Test,604429154_69247391147,99583652_41711450505_426179,start,2017-04-05 13:40:49,2: 19-65


# Checking (again) for duplicates!!!!

In [28]:
num_duplicates_test = check_for_duplicates(df_test)
num_duplicates_test

0

In [29]:
num_duplicates_control = check_for_duplicates(df_control)
num_duplicates_control

0

# Comparing the Test Group with the Control Group

## Completion Rate:

In [30]:
confirm_test = df[(df['Variation'] == 'Test') & (df['process_step'] == 'confirm')]
confirm_control = df[(df['Variation'] == 'Control') & (df['process_step'] == 'confirm')]

num_confirm_test = confirm_test['client_id'].nunique()
num_confirm_control = confirm_control['client_id'].nunique()

total_test_users = df[df['Variation'] == 'Test']['client_id'].nunique()
total_control_users = df[df['Variation'] == 'Control']['client_id'].nunique()

completion_rate_test = num_confirm_test / total_test_users
completion_rate_control = num_confirm_control / total_control_users

print(f"Completion Rate for Test Group: {completion_rate_test:.2%}")
print(f"Completion Rate for Control Group: {completion_rate_control:.2%}")

Completion Rate for Test Group: 69.29%
Completion Rate for Control Group: 65.59%


- For the Test group about 69.29% of users completed the process by reaching the 'confirm' step. In contrast, in the Control group about 65.59% of users reached the 'confirm' step.
- The Test group had a higher completion rate. However, to determine if this difference is statistically significant, we need to perform a hypothesis test, such as a two-proportion z-test. The hypothesis test will tell us if the observed difference in completion rates is likely due to the changes made in the Test group or if it could just be due to random chance.

### The difference in completion rates between the Test and Control groups is statistically significant:

Null Hypothesis (H0): The completion rate for the Test group is equal to the completion rate for the Control group (ptest = pcontrol).

Alternative Hypothesis (Ha): The completion rate for the Test group is not equal to the completion rate for the Control group (ptest ≠ pcontrol).

In [31]:
total_test_users = df[df['Variation'] == 'Test']['client_id'].nunique()
total_control_users = df[df['Variation'] == 'Control']['client_id'].nunique()

In [32]:
p_test = 0.6929
p_control = 0.6559
n_test = total_test_users
n_control = total_control_users

P = ((p_test * n_test) + (p_control * n_control)) / (n_test + n_control)

Z = (p_test - p_control) / ((P * (1 - P) * (1/n_test + 1/n_control))**0.5)

p_value = stats.norm.sf(abs(Z)) * 2

print(f'Z-statistic: {Z}')
print(f'P-value: {p_value}')

Z-statistic: 8.860230552406993
P-value: 7.984913117796951e-19


- With a Z-statistic of approximately 8.86 and an extremely low p-value (7.98e-19), we can conclude that the difference in completion rates between the Test and Control groups is statistically significant.
- The p-value is a measure of how unusual or extreme the data is, under the assumption that the null hypothesis is true. Such a low p-value indicates that the probability of observing such a large or larger difference in our completion rates, assuming the null hypothesis is true, is extremely low.
- **Therefore, we have sufficient evidence to reject the null hypothesis and accept the alternative hypothesis that there is a real difference in the completion rates between the two groups. This suggests that the change or treatment applied to the Test group had a positive effect on the likelihood of users completing the process**

### Time Spent on Each Step:

In [33]:
df_test.sort_values(by=['client_id', 'date_time'], inplace=True)
df_control.sort_values(by=['client_id', 'date_time'], inplace=True)

df_test['time_diff'] = df_test.groupby('client_id')['date_time'].diff()
df_control['time_diff'] = df_control.groupby('client_id')['date_time'].diff()

df_test['time_diff'].fillna(pd.Timedelta(seconds=0), inplace=True)
df_control['time_diff'].fillna(pd.Timedelta(seconds=0), inplace=True)

average_time_per_step_df_test = df_test[df_test['process_step'] != 'start'].groupby('process_step')['time_diff'].mean()
average_time_per_step_df_control = df_control[df_control['process_step'] != 'start'].groupby('process_step')['time_diff'].mean()

average_time_per_step_df_test, average_time_per_step_df_control

(process_step
 confirm   9 days 01:56:31.437111571
 step_1    1 days 20:10:24.776025236
 step_2    0 days 03:12:06.426900584
 step_3    0 days 17:15:40.884328358
 Name: time_diff, dtype: timedelta64[ns],
 process_step
 confirm   17 days 11:56:00.867801047
 step_1     2 days 03:47:46.364912280
 step_2     0 days 00:03:52.350877192
 step_3     0 days 17:40:02.635838150
 Name: time_diff, dtype: timedelta64[ns])

- The time to 'confirm' in the Control group is markedly longer than in the Test group, which might indicate that users in the Control group encounter more issues.
- 'step_1' also takes longer in the Control group compared to the Test group, although the difference is not as pronounced as with the 'confirm' step.
- 'step_2' is reached much faster in the Control group, which could suggest that this step is more straightforward or requires less time to complete than in the Test group.
- The time for 'step_3' is relatively similar between the two groups, indicating a comparable process at this stage.

### Error Rates KPI:

In [34]:
# Define a mapping for the process steps if they are not inherently ordered
step_order = {
    'start': 0,  # assuming 'start' is the first step
    'step_1': 1,
    'step_2': 2,
    'step_3': 3,
    'confirm': 4  # assuming 'confirm' is the last step
}

In [37]:
error_rate_test = calculate_error_rate(df_test, step_order)
error_rate_control = calculate_error_rate(df_control, step_order)

print(f"Error Rate for Test Group: {error_rate_test:.4f}")
print(f"Error Rate for Control Group: {error_rate_control:.4f}")

Error Rate for Test Group: 0.0045
Error Rate for Control Group: 0.0031


- The error rates for both groups are relatively low, which is a good sign. It means that users are not often going backwards in the process, which could suggest that the process is relatively clear and users are not often confused.

- The Test group has a slightly higher error rate than the Control group. If the Test group was exposed to a new version or variation of the process, the increase in error rate could indicate that the new process is potentially more confusing or less intuitive than the Control group's process. However, the absolute difference is quite small (0.14%), so it might not be practically significant.

### Hypothesis Testing - Completion Rate:

- Null Hypothesis (H0): The completion rate for the Test group is equal to or less than the completion rate for the Control group increased by 5%.
- Alternative Hypothesis (Ha): The completion rate for the Test group is greater than the completion rate for the Control group increased by 5%                                                                                            

In [47]:
n_test = df_test['client_id'].nunique()
n_control = df_control['client_id'].nunique()

# Correctly calculate x_test and x_control
x_test = df_test[df_test['process_step'] == 'confirm']['client_id'].nunique()
x_control = df_control[df_control['process_step'] == 'confirm']['client_id'].nunique()

# Output the calculated values
print(f"n_test (Total users in Test group): {n_test}")
print(f"n_control (Total users in Control group): {n_control}")
print(f"x_test (Completions in Test group): {x_test}")
print(f"x_control (Completions in Control group): {x_control}")

n_test (Total users in Test group): 26843
n_control (Total users in Control group): 23387
x_test (Completions in Test group): 2300
x_control (Completions in Control group): 610


In [48]:
# The control rate is increased by 5%
control_rate_adjusted = (x_control / n_control) + 0.05

# Calculate the test statistic and p-value
test_stat, p_value = proportions_ztest(count=[x_test, x_control],
                                       nobs=[n_test, n_control],
                                       value=control_rate_adjusted - (x_control / n_control),
                                       alternative='larger')

print(f'Test Statistic: {test_stat}')
print(f'P-value: {p_value}')

Test Statistic: 4.594219146573653
P-value: 2.171863550287616e-06


- **Test Statistic:** The value of 4.594 is how many standard deviations away your observed difference in completion rates is from the null hypothesis' expectation. The positive value indicates that the observed difference is in the direction of the Test group having a higher completion rate than what the null hypothesis specifies.
- **P-value:** The p-value tells you the probability of observing a test statistic as extreme as, or more extreme than, what was actually observed, under the assumption that the null hypothesis is true. A p-value of is extremely small, far below the commonly used significance level of 0.05. This indicates that the observed difference in completion rates is very unlikely to have occurred by chance if the null hypothesis were true.

# Conclusion:

Given the very small p-value, we have strong evidence against the null hypothesis. Therefore, we would reject the null hypothesis in favor of the alternative hypothesis. This suggests that the completion rate for the Test group is significantly greater than the Control group's rate increased by 5%. In practical terms, whatever changes or features were introduced in the Test group appear to have had a positive impact on the completion rate, significantly exceeding the threshold for cost-effectiveness set before the test.