In [83]:


import numpy as np
import pandas as pd
from scipy.stats import t
from scipy.stats import ttest_ind
import scipy.stats as stats
from scipy.stats import norm
import matplotlib.pyplot as plt
%matplotlib inline

Paired T-test: mean difference accross one populations e.g. one population trying two difference shoes. Take a sample and for each subject in 
the sample perform two measurements, then calculate a mean difference. Calculate the difference for each subject in the sample then take the mean of all them. 

Two sample T-test: two independent populations therefore two independent samples, take two statistics and compare both means, the difference of the means separately. mu1 - mu2

Conditions for constructing a t interval:
- The data is a random sample from the population of interest
- The sampling distribution of the mean is approx. normal
    - n >= 30 (central limit theorem)
    - orig. distribution normal
    - approx. symmetric (not skewed)
- Individual observations can be considered independent
    - sample with replacement
    - if sampling without replacement, then we need to follow the 10% rule: the sample n needs to be 10% or less than the whole population
Reference: Conditions for inference on a mean https://www.khanacademy.org/math/ap-statistics/xfb5d8e68:inference-quantitative-means/one-sample-t-interval-mean/a/reference-conditions-inference-one-mean




In [84]:
df = pd.read_csv("query_results.csv")

In [85]:
df.head()

Unnamed: 0,id,group,total_spent_per_user
0,1014313,B,0.0
1,1029532,B,0.0
2,1018168,A,0.0
3,1029599,A,0.0
4,1025920,B,0.0


# $H_0$: The control and treatment group's average spent will remain the same. 

In [86]:
def get_mean_and_se(df, col):

    mean = df[col].mean()
    se = df[col].std()/np.sqrt(len(df))
    return mean, se

In [88]:
def critical_value(sig, dof, test_kind='two-tail'):
    '''Calculate critical value for various tests.

    Parameters:
    -----------
    sig: float
        Significance level between 0 and 1 e.g. 0.05
    dof: int
        Degrees of freedom i.e. number of data points - 1
    test_kind: string
        Type of test i.e. left, right, two-tail. 
        Default is two-tail. 

    Returns: 
    --------
    critical_value: float 
        The critical value.'''
    from scipy.stats import t
    if test_kind == 'left':
        crit = t.ppf(q=sig, df=dof)
    elif test_kind == 'right':
        crit = t.ppf(q=1-sig, df=dof)
    else:
        print(f'{sig=} {dof=}')
        crit = t.ppf(q=1-sig/2, df=dof)
    return crit

In [90]:
def confidence_interval(mean, se, critical_value):
    ''' '''

    lower = mean - critical_value * se
    upper = mean + critical_value * se
    return lower, upper 

## Hypothesis Test for a Mean

One-sample two-sided t-test

In [98]:
def t_test_statistic_1_samp (sample, sample_col, hypothesis_mean):
    '''Calculates a t-test for one sample.
       T = (x̄ - μ₀) / (s / √n)
     
    Parameters:
    -----------
    sample: df
        This is you sample dataframe.
    sample_col: string
        This is the column to be aggregated. 
    hypothesis_mean: float
        μ₀: This is the hypothesized population mean, which is the value you want to test against.
    
    Returns:
    --------
    T: float
        This is the calculated t-value, which measures the difference between the sample mean (x̄) 
        and the hypothesized population mean (μ₀) in units of the standard error of the sample mean.
        The resulting t-value is compared to a t-distribution to determine the probability of 
        observing such a large difference between the sample mean and the hypothesized population mean 
        by chance alone. If this probability is low enough (usually set at a significance level of 0.05), 
        we reject the null hypothesis that the sample mean is not significantly different 
        from the hypothesized population mean.
    
    '''
    sample_n = len(sample)
    mean, se = get_mean_and_se(sample, sample_col)

    t_1_samp = (mean - hypothesis_mean) / (se*np.sqrt(sample_n) / np.sqrt(sample_n))
    return t_1_samp

In [93]:
def t_test_statistic_2_samp (sample_1, sample_2, col_1, col_2, hypothesis_mean=0):
    '''Calculates a t-test for two independent samples. which is a statistical test used to determine 
    whether two groups of data are significantly different from each other.
    
    Parameters:
    -----------
    sample_1: dataframe
        This is the first dataframe used for computing the mean and standard error.
    sample_2: dataframe
        This is the second dataframe used for computing the mean and standard error.
    col_1: string
        This is the column used for analysis within the first dataframe.
    col_2: string
        This is the column used for analysis within the second dataframe. 
    hypothsis_mean: float
        Represents the null hypothesis mean, which is the value being tested against the sample means to 
        determine whether the difference between them is statistically significant. 
        The default is 0. 


    Returns:
    --------
    
    '''
    mean_1, se_1 = get_mean_and_se(sample_1, col_1)
    mean_2, se_2 = get_mean_and_se(sample_2, col_2)

    t = ((mean_1 - mean_2) - hypothesis_mean) / np.sqrt((se_1**2 + se_2**2))
    return t


In [None]:
def confidence_interval_diff_mean(control_sample, treamtent_sample, col_1, col_2, sig):
    ''' '''

    control_mean, se_1 = get_mean_and_se(control_sample, col_1)
    treatment_mean, se_2 = get_mean_and_se(treamtent_sample, col_2)
    dof = (len(control_sample) - 1) + (len(treamtent_sample) - 1)
    test_kind = 'two-tail'


    sample_stat = treatment_mean - control_mean
    se = np.sqrt(se_1**2 + se_2**2)

    crit_value = critical_value(sig, dof, test_kind='two-tail')

    lower = sample_stat - crit_value * se
    upper = sample_stat + crit_value * se

    print(f'{sample_stat=}, {se=}, {crit_value=}')

    return lower, upper 

In [None]:
df_a = df[df["group"]=="A"]

df_b = df[df["group"]=="B"]

mean_a, std_a = get_mean_and_se(df_a, "total_spent_per_user")

mean_b, std_b = get_mean_and_se(df_b, "total_spent_per_user")

sig = 0.05

print(mean_a, mean_b)


critical_a = critical_value(sig, dof=len(df_a) - 1)
critical_b = critical_value(sig, dof=len(df_b) - 1)

confidence_interval_a = confidence_interval(mean_a, std_a, critical_a)
confidence_interval_b = confidence_interval(mean_b, std_b, critical_b)


print(f'Group A: {np.round(confidence_interval_a, 3)}')
print('Group B: {:.4}, {:.4}'.format(*confidence_interval_b))


t = t_test_statistic_2_samp(df_a, df_b, 'total_spent_per_user', 'total_spent_per_user', 0)

# Conduct t-test assuming unequal variance
t_stat, p_val = ttest_ind(df_a['total_spent_per_user'], df_b['total_spent_per_user'], equal_var=False)

# Print results
print("t-statistic: {:.3f}".format(t_stat))
print("p-value: {:.3f}".format(p_val))

# Check significance level at 5%
if p_val < 0.05:
    print("Conclusion: Reject the null hypothesis")
else:
    print("Conclusion: Fail to reject the null hypothesis")


ci_diff_mean = confidence_interval_diff_mean(df_a, df_b, 'total_spent_per_user', 'total_spent_per_user', 0.05)

print(ci_diff_mean)


sig=0.05 dof=48941
sample_stat=0.016348477956938723, se=0.23214055875284462, crit_value=1.9600124577813611
(-0.4386499091549627, 0.47134686506884016)


(nan, nan)

4.800000000000001