### Hypothesis Testing

#### Import Zone

In [1]:
import pandas as pd
import statsmodels.api as sm
import numpy as np
from functools import reduce
import scipy.stats as stats
from scipy.stats import chi2_contingency
from scipy.stats import geom
from scipy.stats import poisson
from scipy.stats import expon
from scipy.stats import norm
from scipy.stats import binom

In [2]:
df_final_experiment_clients = pd.read_csv('C:/Users/user/Jupyter/IronHack/Work_Python/Week5/MiniProject/df_final_experiment_clients.csv',sep=';')

In [3]:
df_concat_final_web_data = pd.read_csv('C:/Users/user/Jupyter/IronHack/Work_Python/Week5/MiniProject/df_concat_final_web_data.csv',sep=';')

In [4]:
df_final_demo = pd.read_csv('C:/Users/user/Jupyter/IronHack/Work_Python/Week5/MiniProject/df_final_demo.csv',sep=';')

#### Some previous calculations

In [5]:
# List of dataframes
interactions_and_testA_B_df_1 = [df_final_experiment_clients,df_concat_final_web_data]

# Merging
interactions_df_1 = reduce(lambda left, right: pd.merge(left, right, on='client_id', how='left'), interactions_and_testA_B_df_1)
interactions_df_1.head()

Unnamed: 0,client_id,Variation,visitor_id,visit_id,process_step,date_time
0,9988021,Test,580560515_7732621733,781255054_21935453173_531117,3_step_3,2017-04-17 15:27:07
1,9988021,Test,580560515_7732621733,781255054_21935453173_531117,2_step_2,2017-04-17 15:26:51
2,9988021,Test,580560515_7732621733,781255054_21935453173_531117,3_step_3,2017-04-17 15:19:22
3,9988021,Test,580560515_7732621733,781255054_21935453173_531117,2_step_2,2017-04-17 15:19:13
4,9988021,Test,580560515_7732621733,781255054_21935453173_531117,3_step_3,2017-04-17 15:18:04


In [6]:
# List of dataframes
interactions_and_testA_B_df_2 = [df_final_demo,df_final_experiment_clients]

# Merging
interactions_df_2 = reduce(lambda left, right: pd.merge(left, right, on='client_id', how='left'), interactions_and_testA_B_df_2)
interactions_df_2.head()

Unnamed: 0,client_id,clnt_tenure_yr,clnt_tenure_mnth,clnt_age,gendr,num_accts,bal,calls_6_mnth,logons_6_mnth,Variation
0,836976,6.0,73.0,60.5,U,2,45105.3,6,9,Test
1,2304905,7.0,94.0,58.0,U,2,110860.3,6,9,Control
2,1439522,5.0,64.0,32.0,U,2,52467.79,6,9,Test
3,1562045,16.0,198.0,49.0,M,2,67454.65,3,6,Test
4,5126305,12.0,145.0,33.0,F,2,103671.75,0,3,Control


In [7]:
# groupby processstep by Control 
filtered_df_3 = interactions_df_1[(interactions_df_1['Variation'] == 'Control') & (interactions_df_1['process_step'] == '0_start')]
completion_rate__control_denominator = filtered_df_3.groupby(['Variation', 'process_step'])['client_id'].nunique()
filtered_df_4 = interactions_df_1[(interactions_df_1['Variation'] == 'Control') & (interactions_df_1['process_step'] == '4_confirm')]
completion_rate_control_numerator = filtered_df_4.groupby(['Variation', 'process_step'])['client_id'].nunique()

In [8]:
# groupby processstep by Test
filtered_df_1 = interactions_df_1[(interactions_df_1['Variation'] == 'Test') & (interactions_df_1['process_step'] == '0_start')]
completion_rate_test_denominator = filtered_df_1.groupby(['Variation', 'process_step'])['client_id'].nunique()
filtered_df_2 = interactions_df_1[(interactions_df_1['Variation'] == 'Test') & (interactions_df_1['process_step'] == '4_confirm')]
completion_rate_test_numerator = filtered_df_2.groupby(['Variation', 'process_step'])['client_id'].nunique()

In [9]:
completion_rate_control = completion_rate_control_numerator.values / completion_rate__control_denominator.values
completion_rate_test = completion_rate_test_numerator.values / completion_rate_test_denominator.values

In [10]:
# control
control_confirm_clients = completion_rate_control_numerator
total_control_users = completion_rate__control_denominator
# test
test_confirm_clients = completion_rate_test_numerator
total_test_users = completion_rate_test_denominator

total_control_users.values, total_test_users.values


(array([23397], dtype=int64), array([26679], dtype=int64))

#### COMPLETION RATE: Confirmed if the difference in completion rate of the the new design and the old design is statistically significant.

In [None]:
# H0: There is no difference in completion rates between the new design and the old design.
# H1: There is a difference in completion rates between the new design and the old design.

In [11]:
# Create a contingency table
contingency_table = [[completion_rate_test * len(completion_rate_test), (1 - completion_rate_test) * len(completion_rate_test)],
                     [completion_rate_control * len(completion_rate_control), (1 - completion_rate_control) * len(completion_rate_control)]]

# Perform chi-square test
chi2_stat, p_value, dof, expected = chi2_contingency(contingency_table)

# Interpret the results
alpha = 0.05
print("Chi-square statistic:", chi2_stat)
print("P-value:", p_value)
if p_value < alpha:
    print("Reject the null hypothesis. There is a significant difference between completion rates.")
else:
    print("Fail to reject the null hypothesis. There is no significant difference between completion rates.")


Chi-square statistic: 0.0
P-value: 1.0
Fail to reject the null hypothesis. There is no significant difference between completion rates.


#### COMPLETION RATE WITH A COST-EFFECTIVENESS THRESHOLD: Carried out an analysis ensuring that the observed increase in completion rate from the A/B test meets or exceeds this 5% threshold.

In [76]:
# Calculate the completetion rate for the control grupo increased by 5%
control_completion_rate_with_threshold = completion_rate_control + 0.05

# Perform the two-proportion z-test
z_score, p_value = sm.stats.proportions_ztest([completion_rate_control, completion_rate_test], [total_test_users, total_control_users], alternative='larger')

# Interpret the results
alpha = 0.05  # Significance level
print("Z Score:", z_score)
print("P-value:", p_value)

if p_value < alpha:
    print("Reject the null hypothesis. There is evidence to suggest that the completion rates for the Test group is greater than completion rates for the Control Group.")
else:
    print("Fail to reject the null hypothesis. There is no significant evidence to suggest that the completion rates for the Test group is greater than completion rates for the Control Group.")


Z Score: [0.]
P-value: [0.5]
Fail to reject the null hypothesis. There is no significant evidence to suggest that the completion rates for the Test group is greater than completion rates for the Control Group.


#### OTHER HYPOTHESIS TEST.

-You might want to test whether the average age of clients engaging with the new process is the same as those engaging with the old process

In [46]:
# H0: average age of clients interacting with the new process = average age of clients interacting with the previous process.
# H1: average age of clients interacting with the new process != average age of clients interacting with the previous process

In [72]:
control_age = interactions_df_2[interactions_df_2['Variation'] == 'Control']['clnt_age']
test_age = interactions_df_2[interactions_df_2['Variation'] == 'Test']['clnt_age']

# Hypothesis test for age (Student's t-test)
t_statistic_age, p_value_age = stats.ttest_ind(test_age, control_age)

# Interpret the results
alpha = 0.05

print("Hypothesis test for age:")
print("Test statistic (t):", t_statistic_age)
print("P-value:", p_value_age)
if p_value_age < alpha:
    print("We reject the null hypothesis. There are significant differences in age between the groups.")
else:
    print("We cannot reject the null hypothesis. There are no significant differences in age between the groups.")


Hypothesis test for age:
Test statistic (t): -2.416145610586789
P-value: 0.0156893118497501
We reject the null hypothesis. There are significant differences in age between the groups.



-You might want to test if the average client tenure (how long they’ve been with Vanguard) of those engaging with the new process is the same as those engaging with the old process

In [None]:
# H0: The average tenure of clients interacting with the new process = average tenure of clients interacting with the previous process.
# H1: The average tenure of clients interacting with the new process != average tenure of clients interacting with the previous process.

In [73]:
control_since = interactions_df_2[interactions_df_2['Variation'] == 'Control']['clnt_tenure_yr']
test_since = interactions_df_2[interactions_df_2['Variation'] == 'Test']['clnt_tenure_yr']

# Hypothesis test for customer tenure (Student's t-test)
t_statistic_since, p_value_since = stats.ttest_ind(test_since, control_since)

# Interpret the results
alpha = 0.05

print("\nHypothesis test for customer tenure:")
print("Test statistic (t):", t_statistic_since)
print("P-value:", p_value_since)
if p_value_since < alpha:
    print("We reject the null hypothesis. There are significant differences in customer tenure between the groups.")
else:
    print("We cannot reject the null hypothesis. There are no significant differences in customer tenure between the groups.")


Hypothesis test for customer tenure:
Test statistic (t): -1.7148872757395706
P-value: 0.0863720328483434
We cannot reject the null hypothesis. There are no significant differences in customer tenure between the groups.


#### Evaluated the experiment by answering questions relating to:


	-Design Effectiveness
		1) Was the experiment well-structured?
		ANSWER: Yes, clients from both samples were directed into the same steps and the samples were very homogenous

		2) Were clients randomly and equally divided between the old and new designs?
        ANSWER: I don't know how the samples were selected by they were homogenous

		3) Were there any biases?
		ANSWER: Can't identified any
	
	-Duration
		1) Was the timeframe of the experiment (from 3/15/2017 to 6/20/2017) adequate to gather meaningful data and insights?
		ANSWER: As far as we got data to compare we can reach some insights. Don't see how a longer test would provided better results (more accurate)

	-Additional Data Needs
		1) What other data, if available, could enhance the analysis?
		ANSWER: Gender data has a third of its value in Unknown category