In [1]:
using Distributions
#using StatsPlots
#default(fmt=:png)

function pvalue_welch_t(m, X̄, U, n, Ȳ, V; Δμ=0.0, roundfunc=identity, dffunc=degree_of_freedom)
    t = tvalue_welch(m, X̄, U, n, Ȳ, V; Δμ)
    dfhat = dffunc(m, U, n, V)
    2ccdf(TDist(roundfunc(dfhat)), abs(t))
end

pvalue_welch_t(X, Y; Δμ=0.0, roundfunc=identity, dffunc=degree_of_freedom) =
    pvalue_welch_t(length(X), mean(X), var(X), length(Y), mean(Y), var(Y); Δμ, roundfunc, dffunc)

tvalue_welch(m, X̄, U, n, Ȳ, V; Δμ=0.0) = (X̄ - Ȳ - Δμ) / √(U/m + V/n)
tvalue_welch(X, Y; Δμ=0.0) =
    tvalue_welch(length(X), mean(X), var(X), length(Y), mean(Y), var(Y); Δμ)

degree_of_freedom(m, U, n, V) = (U/m + V/n)^2 / ((U/m)^2/(m-1) + (V/n)^2/(n-1))
degree_of_freedom(X, Y) = degree_of_freedom(length(X), var(X), length(Y), var(Y))

degree_of_freedom_not_Satterthwaite(m, U, n, V) =
    (U/m + V/n)^2 / ((U/m)^2/(m+1) + (V/n)^2/(n+1)) - 2
degree_of_freedom_not_Satterthwaite(X, Y) =
    degree_of_freedom_not_Satterthwaite(length(X), var(X), length(Y), var(Y))

X = [80, 87, 78, 72]
Y = [119, 78, 95, 124, 85, 92]
@show X Y
@show degree_of_freedom(X, Y);
@show pvalue_welch_t(X, Y);
@show pvalue_welch_t(X, Y; roundfunc=round);

X = [80, 87, 78, 72]
Y = [119, 78, 95, 124, 85, 92]
degree_of_freedom(X, Y) = 6.50027434076389
pvalue_welch_t(X, Y) = 0.050836543911200315
pvalue_welch_t(X, Y; roundfunc = round) = 0.048148917771018214


In [2]:
X = [8, 21, 22, 30]
Y = [5, 42, 43, 83, 83, 119]
@show X Y
@show degree_of_freedom(X, Y);
@show pvalue_welch_t(X, Y);
@show pvalue_welch_t(X, Y; roundfunc=round);

X = [8, 21, 22, 30]
Y = [5, 42, 43, 83, 83, 119]
degree_of_freedom(X, Y) = 5.737389602352075
pvalue_welch_t(X, Y) = 0.050133803058240396
pvalue_welch_t(X, Y; roundfunc = round) = 0.048306826291412475


In [3]:
X = sort(round.(Int, 20*[-0.8392321454441508, -0.13632577223881945, -0.1907178263261891, 0.25603978994522353] .+ 25))
Y = sort(round.(Int, 20*[-0.9932984206053254, 2.913878236092646, 2.8891275856381764, 4.716892383695555, 0.8317174086782941, 0.8766881245471467] .+ 25))
@show X Y
@show degree_of_freedom(X, Y);
@show pvalue_welch_t(X, Y);
@show pvalue_welch_t(X, Y; roundfunc=round);

X = [8, 21, 22, 30]
Y = [5, 42, 43, 83, 83, 119]
degree_of_freedom(X, Y) = 5.737389602352075
pvalue_welch_t(X, Y) = 0.050133803058240396
pvalue_welch_t(X, Y; roundfunc = round) = 0.048306826291412475


In [4]:
X = [-0.8392321454441508, -0.13632577223881945, -0.1907178263261891, 0.25603978994522353]
Y = [-0.9932984206053254, 2.913878236092646, 2.8891275856381764, 4.716892383695555, 0.8317174086782941, 0.8766881245471467]
@show X Y
@show degree_of_freedom(X, Y);
@show pvalue_welch_t(X, Y);
@show pvalue_welch_t(X, Y; roundfunc=round);

X = [-0.8392321454441508, -0.13632577223881945, -0.1907178263261891, 0.25603978994522353]
Y = [-0.9932984206053254, 2.913878236092646, 2.8891275856381764, 4.716892383695555, 0.8317174086782941, 0.8766881245471467]
degree_of_freedom(X, Y) = 5.727616620520906
pvalue_welch_t(X, Y) = 0.05170351982251145
pvalue_welch_t(X, Y; roundfunc = round) = 0.04978472882544727


In [5]:
X = [0.8115162043087973, -0.8404363548455143, 1.6727606354453446, 0.533847620210447]
Y = [1.6339095622225246, 2.065488181473473, 2.0345943286959765, 1.7579073472598687, 3.3451457705490997, 1.8267931745140344]
@show X Y
@show degree_of_freedom(X, Y);
@show pvalue_welch_t(X, Y);
@show pvalue_welch_t(X, Y; roundfunc=round);

X = [0.8115162043087973, -0.8404363548455143, 1.6727606354453446, 0.533847620210447]
Y = [1.6339095622225246, 2.065488181473473, 2.0345943286959765, 1.7579073472598687, 3.3451457705490997, 1.8267931745140344]
degree_of_freedom(X, Y) = 4.463541829417354
pvalue_welch_t(X, Y) = 0.04833942741104785
pvalue_welch_t(X, Y; roundfunc = round) = 0.0542844038045889


In [6]:
# using HypothesisTests

for _ in 1:10^6
    X = round.(Int, rand(Normal(80, 10), 4))
    Y = round.(Int, rand(Normal(80, 20), 6))
    abs(rem(degree_of_freedom(X, Y), 1) - 0.5) ≥ 0.05 && continue
    (pvalue_welch_t(X, Y) - 0.05) * (pvalue_welch_t(X, Y; roundfunc=round) - 0.05) < 0 && break
end

@show X Y
@show degree_of_freedom(X, Y);
@show pvalue_welch_t(X, Y);
@show pvalue_welch_t(X, Y; roundfunc=round);
# UnequalVarianceTTest(X, Y)

X = [103, 85, 75, 87]
Y = [83, 67, 55, 73, 69, 75]
degree_of_freedom(X, Y) = 5.543070191690063
pvalue_welch_t(X, Y) = 0.051515311832126216
pvalue_welch_t(X, Y; roundfunc = round) = 0.048218711286918195


## R

In [7]:
using RCall
X = [80, 87, 78, 72]
Y = [119, 78, 95, 124, 85, 92]
@rput X Y
R"""t.test(X, Y)"""

RObject{VecSxp}

	Welch Two Sample t-test

data:  X and Y
t = -2.3903, df = 6.5003, p-value = 0.05084
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -39.26251228   0.09584561
sample estimates:
mean of x mean of y 
 79.25000  98.83333 



In [8]:
@show tvalue_welch(X, Y);
@show degree_of_freedom(X, Y);
@show pvalue_welch_t(X, Y);

tvalue_welch(X, Y) = -2.3902810031928428
degree_of_freedom(X, Y) = 6.50027434076389
pvalue_welch_t(X, Y) = 0.050836543911200315


## STATA

https://www.statology.org/welchs-t-test-stata/

<img src="IMG_5204.png">

In [9]:
m, X̄, U = 12, 21.00, 2.730301^2
n, Ȳ, V = 12, 22.75, 3.250874^2
@show (; N=m, Mean=X̄, Std=√U, SE=√(U/m));
@show (; N=n, Mean=Ȳ, Std=√V, SE=√(V/n));
@show tvalue_welch(m, X̄, U, n, Ȳ, V);
println()
@show degree_of_freedom(m, U, n, V);
@show pvalue_welch_t(m, X̄, U, n, Ȳ, V);
println()
@show degree_of_freedom_not_Satterthwaite(m, U, n, V);
@show pvalue_welch_t(m, X̄, U, n, Ȳ, V; dffunc=degree_of_freedom_not_Satterthwaite);

(; N = m, Mean = X̄, Std = √U, SE = √(U / m)) = (N = 12, Mean = 21.0, Std = 2.730301, SE = 0.7881700086593522)
(; N = n, Mean = Ȳ, Std = √V, SE = √(V / n)) = (N = 12, Mean = 22.75, Std = 3.250874, SE = 0.9384464895007777)
tvalue_welch(m, X̄, U, n, Ȳ, V) = -1.4279678837572058

degree_of_freedom(m, U, n, V) = 21.36240412790263
pvalue_welch_t(m, X̄, U, n, Ȳ, V) = 0.16775823959526698

degree_of_freedom_not_Satterthwaite(m, U, n, V) = 23.246477605703106
pvalue_welch_t(m, X̄, U, n, Ȳ, V; dffunc = degree_of_freedom_not_Satterthwaite) = 0.16659877018134617


In [10]:
using SymPy
@syms U V m n
expr = m + n - degree_of_freedom_not_Satterthwaite(m, U, n, V)
factor(expr)

                            2
 /     2            2      \ 
 \- U*n  - U*n + V*m  + V*m/ 
-----------------------------
 2  3    2  2    2  3    2  2
U *n  + U *n  + V *m  + V *m 

In [11]:
using SymPy
@syms U V m n
expr = m + n - 2 - degree_of_freedom(m, U, n, V)
factor(expr)

                            2
 /     2            2      \ 
 \- U*n  + U*n + V*m  - V*m/ 
-----------------------------
 2  3    2  2    2  3    2  2
U *n  - U *n  + V *m  - V *m 

In [12]:
using SymPy
@syms U V m n
expr = m + n - (U/m + V/n)^2 / ((U/m)^2/m + (V/n)^2/n)
factor(expr)

               2
/     2      2\ 
\- U*n  + V*m / 
----------------
  2  3    2  3  
 U *n  + V *m   

https://stats.oarc.ucla.edu/stata/output/t-test/

<img src="IMG_5209.jpeg" width=60%>

In [13]:
m, X̄, U =  91, 50.12088, 10.30516^2
n, Ȳ, V = 109, 54.99083, 8.133715^2
@show (; N=m, Mean=X̄, Std=√U, SE=√(U/m));
@show (; N=n, Mean=Ȳ, Std=√V, SE=√(V/n));
@show tvalue_welch(m, X̄, U, n, Ȳ, V);
@show degree_of_freedom(m, U, n, V);
@show pvalue_welch_t(m, X̄, U, n, Ȳ, V);

(; N = m, Mean = X̄, Std = √U, SE = √(U / m)) = (N = 91, Mean = 50.12088, Std = 10.30516, SE = 1.0802742967993244)
(; N = n, Mean = Ȳ, Std = √V, SE = √(V / n)) = (N = 109, Mean = 54.99083, Std = 8.133715, SE = 0.7790686023497558)
tvalue_welch(m, X̄, U, n, Ȳ, V) = -3.6564108141106053
degree_of_freedom(m, U, n, V) = 169.70738506446347
pvalue_welch_t(m, X̄, U, n, Ȳ, V) = 0.00034088152683966214


## SAS

https://support.sas.com/documentation/onlinedoc/stat/132/ttest.pdf

<img src="IMG_5210.jpeg" width=60%>
<img src="IMG_5211.jpeg" width=50%>

In [14]:
m, X̄, U = 7, 76.8571, 2.5448^2
n, Ȳ, V = 7, 82.7143, 3.1472^2
@show (; N=m, Mean=X̄, Std=√U, SE=√(U/m));
@show (; N=n, Mean=Ȳ, Std=√V, SE=√(V/n));
@show tvalue_welch(m, X̄, U, n, Ȳ, V);
println()
@show degree_of_freedom(m, U, n, V);
@show pvalue_welch_t(m, X̄, U, n, Ȳ, V);

(; N = m, Mean = X̄, Std = √U, SE = √(U / m)) = (N = 7, Mean = 76.8571, Std = 2.5448, SE = 0.9618439909138814)
(; N = n, Mean = Ȳ, Std = √V, SE = √(V / n)) = (N = 7, Mean = 82.7143, Std = 3.1472, SE = 1.18952978945464)
tvalue_welch(m, X̄, U, n, Ȳ, V) = -3.828869171023611

degree_of_freedom(m, U, n, V) = 11.496287043561036
pvalue_welch_t(m, X̄, U, n, Ȳ, V) = 0.00258864569606385


## SPSS

https://www.stats-guild.com/analytics/15678

<img src="IMG_5212.jpeg" width=100%>

In [15]:
m, X̄, U = 115, 3054.17, 751.684^2
n, Ȳ, V =  74, 2773.16, 660.340^2
@show (; N=m, Mean=X̄, Std=√U, SE=√(U/m));
@show (; N=n, Mean=Ȳ, Std=√V, SE=√(V/n));
@show tvalue_welch(m, X̄, U, n, Ȳ, V);
@show degree_of_freedom(m, U, n, V);
@show pvalue_welch_t(m, X̄, U, n, Ȳ, V);

(; N = m, Mean = X̄, Std = √U, SE = √(U / m)) = (N = 115, Mean = 3054.17, Std = 751.684, SE = 70.0948944277312)
(; N = n, Mean = Ȳ, Std = √V, SE = √(V / n)) = (N = 74, Mean = 2773.16, Std = 660.34, SE = 76.76296576809364)
tvalue_welch(m, X̄, U, n, Ȳ, V) = 2.703288302330104
degree_of_freedom(m, U, n, V) = 169.86547915064858
pvalue_welch_t(m, X̄, U, n, Ȳ, V) = 0.007563638533469261
