## T-test

In [1]:
import numpy as np
import pingouin as pg

np.random.seed(123)
mean, cov, n = [4, 5], [(1, .6), (.6, 1)], 30
x, y = np.random.multivariate_normal(mean, cov, n).T

# T-test
pg.ttest(x, y)

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-3.400706,58,two-sided,0.001222,"[-1.68, -0.43]",0.878059,26.155,0.916807


## Pearson's correlation

In [2]:
pg.corr(x, y)

Unnamed: 0,n,r,CI95%,p-val,BF10,power
pearson,30,0.594785,"[0.3, 0.79]",0.000527,69.723,0.950373


## Robust correlation

In [3]:
# Introduce an outlier
x[5] = 18
# Use the robust biweight midcorrelation
pg.corr(x, y, method="bicor")

Unnamed: 0,n,r,CI95%,p-val,power
bicor,30,0.575834,"[0.27, 0.78]",0.000869,0.932799


## Test the normality of the data

In [4]:
print(pg.normality(x))                                    # Univariate normality
print(pg.multivariate_normality(np.column_stack((x, y)))) # Multivariate normality

          W          pval  normal
0  0.615321  1.128494e-07   False
HZResults(hz=1.6967733646126668, pval=0.00018201726664169367, normal=False)


## One-way ANOVA using a pandas DataFrame

In [5]:
# Read an example dataset
df = pg.read_dataset('mixed_anova')

# Run the ANOVA
aov = pg.anova(data=df, dv='Scores', between='Group', detailed=True)
print(aov)

   Source          SS   DF        MS         F   p-unc       np2
0   Group    5.459963    1  5.459963  5.243656  0.0232  0.028616
1  Within  185.342729  178  1.041251       NaN     NaN       NaN


## Repeated measures ANOVA

In [6]:
pg.rm_anova(data=df, dv='Scores', within='Time', subject='Subject', detailed=True)

Unnamed: 0,Source,SS,DF,MS,F,p-unc,ng2,eps
0,Time,7.628428,2,3.814214,3.912796,0.022629,0.039981,0.998751
1,Error,115.027023,118,0.974805,,,,


## Post-hoc tests corrected for multiple-comparisons

In [7]:
# FDR-corrected post hocs with Hedges'g effect size
posthoc = pg.pairwise_tests(data=df, dv='Scores', within='Time', subject='Subject',
                             parametric=True, padjust='fdr_bh', effsize='hedges')

# Pretty printing of table
pg.print_table(posthoc, floatfmt='.3f')


POST HOC TESTS

Contrast    A        B        Paired    Parametric         T     dof  alternative      p-unc    p-corr  p-adjust      BF10    hedges
----------  -------  -------  --------  ------------  ------  ------  -------------  -------  --------  ----------  ------  --------
Time        August   January  True      True          -1.740  59.000  two-sided        0.087     0.131  fdr_bh       0.582    -0.328
Time        August   June     True      True          -2.743  59.000  two-sided        0.008     0.024  fdr_bh       4.232    -0.483
Time        January  June     True      True          -1.024  59.000  two-sided        0.310     0.310  fdr_bh       0.232    -0.170



## Two-way mixed ANOVA

In [8]:
# Compute the two-way mixed ANOVA
aov = pg.mixed_anova(data=df, dv='Scores', between='Group', within='Time',
                     subject='Subject', correction=False, effsize="np2")
pg.print_table(aov)


ANOVA SUMMARY

Source          SS    DF1    DF2     MS      F    p-unc    np2      eps
-----------  -----  -----  -----  -----  -----  -------  -----  -------
Group        5.460      1     58  5.460  5.052    0.028  0.080  nan
Time         7.628      2    116  3.814  4.027    0.020  0.065    0.999
Interaction  5.167      2    116  2.584  2.728    0.070  0.045  nan



## Pairwise correlations between columns of a dataframe

In [9]:
import pandas as pd
np.random.seed(123)
z = np.random.normal(5, 1, 30)
data = pd.DataFrame({'X': x, 'Y': y, 'Z': z})
pg.pairwise_corr(data, columns=['X', 'Y', 'Z'], method='pearson')

Unnamed: 0,X,Y,method,alternative,n,r,CI95%,p-unc,BF10,power
0,X,Y,pearson,two-sided,30,0.365979,"[0.01, 0.64]",0.046703,1.5,0.524973
1,X,Z,pearson,two-sided,30,0.251234,"[-0.12, 0.56]",0.180509,0.534,0.272089
2,Y,Z,pearson,two-sided,30,0.020115,"[-0.34, 0.38]",0.915975,0.228,0.050775


## Pairwise T-test between columns of a dataframe

In [10]:
data.ptests(paired=True, stars=False)

Unnamed: 0,X,Y,Z
X,-,0.226,0.165
Y,-1.238,-,0.658
Z,-1.424,-0.447,-


##  Multiple linear regression

In [11]:
pg.linear_regression(data[['X', 'Z']], data['Y'])

Unnamed: 0,names,coef,se,T,pval,r2,adj_r2,CI[2.5%],CI[97.5%]
0,Intercept,4.650341,0.840938,5.529945,7e-06,0.139448,0.075703,2.924879,6.375803
1,X,0.142874,0.068405,2.088663,0.046298,0.139448,0.075703,0.002519,0.283229
2,Z,-0.069407,0.16697,-0.415682,0.680928,0.139448,0.075703,-0.412001,0.273188


## Mediation analysis

In [12]:
pg.mediation_analysis(data=data, x='X', m='Z', y='Y', seed=42, n_boot=1000)

Unnamed: 0,path,coef,se,pval,CI[2.5%],CI[97.5%],sig
0,Z ~ X,0.102926,0.07494,0.180509,-0.050581,0.256433,No
1,Y ~ Z,0.01821,0.171044,0.915975,-0.332158,0.368577,No
2,Total,0.135731,0.065225,0.046703,0.002123,0.269339,Yes
3,Direct,0.142874,0.068405,0.046298,0.002519,0.283229,Yes
4,Indirect,-0.007144,0.025426,0.898,-0.069465,0.028967,No


## Contingency analysis

In [13]:
data = pg.read_dataset('chi2_independence')
expected, observed, stats = pg.chi2_independence(data, x='sex', y='target')
stats

Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,22.717227,1.0,1.876778e-06,0.273814,0.997494
1,cressie-read,0.666667,22.931427,1.0,1.678845e-06,0.275102,0.997663
2,log-likelihood,0.0,23.557374,1.0,1.212439e-06,0.278832,0.998096
3,freeman-tukey,-0.5,24.219622,1.0,8.595211e-07,0.282724,0.998469
4,mod-log-likelihood,-1.0,25.071078,1.0,5.525544e-07,0.287651,0.998845
5,neyman,-2.0,27.457956,1.0,1.605471e-07,0.301032,0.999481
