In [1]:
import pandas as pd
import pingouin as pg

Example 1

In [2]:
# Simulating data: Group A and Group B with different means and standard deviations
data = pd.DataFrame({
    "conversion_rate": [0.1, 0.15, 0.08, 0.12, 0.11, 0.15, 0.14, 0.13, 0.09, 0.16,
                        0.21, 0.22, 0.20, 0.19, 0.18, 0.24, 0.23, 0.25, 0.17, 0.22],
    "group": ["A"] * 10 + ["B"] * 10
})

# Using Pingouin to perform an independent t-test
results = pg.ttest(data[data['group'] == 'A']['conversion_rate'],
                   data[data['group'] == 'B']['conversion_rate'],
                   alternative='two-sided')

print(results)

               T  dof alternative         p-val           CI95%   cohen-d  \
T-test -7.350368   18   two-sided  8.010860e-07  [-0.11, -0.06]  3.287184   

             BF10  power  
T-test  1.311e+04    1.0  


Example 2

In [3]:
# Data simulation: creating two groups with different variances
data_var = pd.DataFrame({
    "time_on_site": [5, 7, 8, 5, 6, 7, 5, 6, 7, 8,
                     10, 12, 11, 13, 12, 10, 11, 12, 13, 14],
    "group": ["A"] * 10 + ["B"] * 10
})

# Performing Levene's test for equality of variances
levene_results = pg.homoscedasticity(data_var, dv='time_on_site', group='group', method='levene')
print(levene_results)

          W  pval  equal_var
levene  0.0   1.0       True


Example 3

In [6]:
# Data simulation: Repeated measures in an A/B testing scenario
data_repeated = pd.DataFrame({
    "user_id": list(range(1, 11)) * 2,  # Converting range to list and replicating for two conditions
    "usability_score": [78, 75, 80, 82, 85, 86, 88, 90, 87, 89,
                        80, 83, 85, 87, 86, 89, 91, 93, 90, 92],
    "condition": ["Before"] * 10 + ["After"] * 10
})

# Performing repeated measures ANOVA
anova_results = pg.rm_anova(dv='usability_score', within='condition', subject='user_id', data=data_repeated)
print(anova_results)

      Source  ddof1  ddof2          F     p-unc       ng2  eps
0  condition      1      9  33.906977  0.000252  0.144256  1.0


Example 4

In [7]:
# Data: Examining correlation between time spent on site and purchase amount
data_corr = pd.DataFrame({
    "time_spent": [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
    "purchase_amount": [20, 40, 60, 80, 100, 120, 140, 160, 180, 200]
})

# Calculating Pearson correlation
correlation_results = pg.corr(data_corr['time_spent'], data_corr['purchase_amount'])
print(correlation_results)

          n    r       CI95%  p-val BF10  power
pearson  10  1.0  [1.0, 1.0]    0.0  inf      1


Example 5

In [8]:
# Simulating data for the same users trying two different processes
data_paired = pd.DataFrame({
    "user_id": range(1, 21),
    "time_A": [120, 110, 123, 130, 115, 125, 120, 115, 110, 118, 122, 125, 130, 140, 135, 150, 145, 132, 138, 140],
    "time_B": [115, 105, 110, 120, 110, 115, 112, 108, 105, 111, 117, 110, 115, 125, 120, 130, 128, 125, 120, 125]
})

# Performing a paired sample t-test
paired_results = pg.ttest(data_paired['time_A'], data_paired['time_B'], paired=True)
print(paired_results)

              T  dof alternative         p-val          CI95%   cohen-d  \
T-test  9.56901   19   two-sided  1.065003e-08  [8.48, 13.22]  1.114615   

             BF10    power  
T-test  1.182e+06  0.99711  


Example 6

In [9]:
# Simulating sales data for two promotional strategies
data_sales = pd.DataFrame({
    "sales": [20, 30, 15, 10, 50, 25, 40, 30, 20, 55, 60, 45, 40, 35, 25, 65, 70, 50, 45, 30],
    "strategy": ["A"] * 10 + ["B"] * 10
})

# Performing Mann-Whitney U Test
mwu_results = pg.mwu(data_sales[data_sales['strategy'] == 'A']['sales'],
                     data_sales[data_sales['strategy'] == 'B']['sales'],
                     alternative='two-sided')
print(mwu_results)

     U-val alternative     p-val   RBC   CLES
MWU   20.5   two-sided  0.027832  0.59  0.205
