In [23]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import statsmodels.stats.api as sms
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from math import ceil

%matplotlib inline

df = pd.read_csv('airport_abandoned_cart_03_2022.csv')
print(df.head())

  test_group  renter_id  first_send_rentals  second_send_rentals  \
0       Test    7947534                   0                    0   
1       Test    8254530                   0                    0   
2       Test    8803501                   0                    0   
3       Test    8990899                   0                    0   
4       Test    8999151                   0                    0   

   third_send_rentals  total_rental_count  
0                   0                   0  
1                   0                   0  
2                   0                   0  
3                   0                   0  
4                   0                   0  


In [24]:
df['total_one_week_rental_count'] = df['first_send_rentals'] + df['second_send_rentals'] + df['third_send_rentals']

In [25]:
df['test_group'].value_counts()

Control    47495
Test       14751
Name: test_group, dtype: int64

In [26]:
converted = df['total_one_week_rental_count'] > 0
df['converted'] = converted.astype('int')

In [27]:
def test_homogeneity_of_variance(df, test_column, test_groups, p_value = 0.05):
    print(test_column)
    x, pval = stats.levene(df[test_column][df[test_groups] == 'Control'],
                           df[test_column][df[test_groups] == 'Test'],
                           center='mean')

    if pval > p_value:
        print('P-Value = {}. Groups have equal variance'.format(pval), "\n")
    else:
        print('P-Value = {}. Groups do not have equal variance'.format(pval), "\n")

test_value_columns = ['first_send_rentals', 'second_send_rentals', 'third_send_rentals', 'total_one_week_rental_count', 'converted']
for column in test_value_columns:
    test_homogeneity_of_variance(df, column,'test_group')
    

first_send_rentals
P-Value = 2.2741479754307204e-78. Groups do not have equal variance 

second_send_rentals
P-Value = 1.9492005462513068e-42. Groups do not have equal variance 

third_send_rentals
P-Value = 2.9169342836078895e-09. Groups do not have equal variance 

total_one_week_rental_count
P-Value = 1.8762109167794478e-103. Groups do not have equal variance 

converted
P-Value = 1.288782085293977e-255. Groups do not have equal variance 



In [28]:
stats_per_renter = df.groupby('test_group').agg({'renter_id':'count', 'first_send_rentals': 'sum', 'second_send_rentals': 'sum', 'third_send_rentals': 'sum', 'total_one_week_rental_count': 'sum', 'converted': 'sum'})
stats_per_renter['first_send_rentals_per_renter'] = stats_per_renter['first_send_rentals'] / stats_per_renter['renter_id']
stats_per_renter['second_send_rentals_per_renter'] = stats_per_renter['second_send_rentals'] / stats_per_renter['renter_id']
stats_per_renter['third_send_rentals_per_renter'] = stats_per_renter['third_send_rentals'] / stats_per_renter['renter_id']
stats_per_renter['one_week_rentals_per_renter'] = stats_per_renter['total_one_week_rental_count'] / stats_per_renter['renter_id']
stats_per_renter['converted'] = stats_per_renter['converted'] / stats_per_renter['renter_id']
stats_per_renter.style.format('{:.2f}')

Unnamed: 0_level_0,renter_id,first_send_rentals,second_send_rentals,third_send_rentals,total_one_week_rental_count,converted,first_send_rentals_per_renter,second_send_rentals_per_renter,third_send_rentals_per_renter,one_week_rentals_per_renter
test_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Control,47495.0,3896.0,2518.0,1842.0,8256.0,0.14,0.08,0.05,0.04,0.17
Test,14751.0,685.0,477.0,459.0,1621.0,0.09,0.05,0.03,0.03,0.11


In [17]:
df.groupby('test_group')['converted'].sum()

test_group
Control    12487
Test        1294
Name: converted, dtype: int64

In [18]:
import researchpy as rp

rp.ttest(group1= df['first_send_rentals'][df['test_group'] == 'Control'], group1_name= "Control",
         group2= df['first_send_rentals'][df['test_group'] == 'Test'], group2_name= "Test",equal_variances=False)

(   Variable        N      Mean        SD        SE  95% Conf.  Interval
 0   Control  47495.0  0.278071  0.801740  0.003679   0.270861  0.285282
 1      Test  14751.0  0.046438  0.316677  0.002607   0.041327  0.051548
 2  combined  62246.0  0.223179  0.723825  0.002901   0.217493  0.228865,
                    Welch's t-test     results
 0  Difference (Control - Test) =       0.2316
 1           Degrees of freedom =   59144.1696
 2                            t =      51.3700
 3        Two side test p value =       0.0000
 4       Difference < 0 p value =       1.0000
 5       Difference > 0 p value =       0.0000
 6                    Cohen's d =       0.3230
 7                    Hedge's g =       0.3230
 8                Glass's delta =       0.2889
 9                  Pearson's r =       0.2067)

In [19]:
rp.ttest(group1= df['second_send_rentals'][df['test_group'] == 'Control'], group1_name= "Control",
         group2= df['second_send_rentals'][df['test_group'] == 'Test'], group2_name= "Test",equal_variances=False)

(   Variable        N      Mean        SD        SE  95% Conf.  Interval
 0   Control  47495.0  0.053016  0.339820  0.001559   0.049960  0.056072
 1      Test  14751.0  0.032337  0.209855  0.001728   0.028950  0.035724
 2  combined  62246.0  0.048116  0.314046  0.001259   0.045648  0.050583,
                    Welch's t-test     results
 0  Difference (Control - Test) =       0.0207
 1           Degrees of freedom =   40266.7026
 2                            t =       8.8851
 3        Two side test p value =       0.0000
 4       Difference < 0 p value =       1.0000
 5       Difference > 0 p value =       0.0000
 6                    Cohen's d =       0.0659
 7                    Hedge's g =       0.0659
 8                Glass's delta =       0.0609
 9                  Pearson's r =       0.0442)

In [20]:
rp.ttest(group1= df['total_one_week_rental_count'][df['test_group'] == 'Control'], group1_name= "Control",
         group2= df['total_one_week_rental_count'][df['test_group'] == 'Test'], group2_name= "Test",equal_variances=False)

(   Variable        N      Mean        SD        SE  95% Conf.  Interval
 0   Control  47495.0  0.369871  0.910575  0.004178   0.361681  0.378060
 1      Test  14751.0  0.109891  0.448792  0.003695   0.102648  0.117134
 2  combined  62246.0  0.308261  0.832228  0.003336   0.301723  0.314799,
                    Welch's t-test     results
 0  Difference (Control - Test) =       0.2600
 1           Degrees of freedom =   50795.6739
 2                            t =      46.6097
 3        Two side test p value =       0.0000
 4       Difference < 0 p value =       1.0000
 5       Difference > 0 p value =       0.0000
 6                    Cohen's d =       0.3152
 7                    Hedge's g =       0.3152
 8                Glass's delta =       0.2855
 9                  Pearson's r =       0.2025)

In [22]:
rp.ttest(group1= df['total_rental_count'][df['test_group'] == 'Control'], group1_name= "Control",
         group2= df['total_rental_count'][df['test_group'] == 'Test'], group2_name= "Test",equal_variances=False)

(   Variable        N      Mean        SD        SE  95% Conf.  Interval
 0   Control  47495.0  0.468049  1.068044  0.004901   0.458444  0.477655
 1      Test  14751.0  0.323707  0.881343  0.007257   0.309483  0.337931
 2  combined  62246.0  0.433843  1.028699  0.004123   0.425762  0.441925,
                    Welch's t-test     results
 0  Difference (Control - Test) =       0.1443
 1           Degrees of freedom =   29377.3846
 2                            t =      16.4840
 3        Two side test p value =       0.0000
 4       Difference < 0 p value =       1.0000
 5       Difference > 0 p value =       0.0000
 6                    Cohen's d =       0.1406
 7                    Hedge's g =       0.1406
 8                Glass's delta =       0.1351
 9                  Pearson's r =       0.0957)