In [1]:
import numpy as np
from scipy import stats

In [2]:
help(stats.mannwhitneyu)

Help on function mannwhitneyu in module scipy.stats._mannwhitneyu:

mannwhitneyu(x, y, use_continuity=True, alternative='two-sided', axis=0, method='auto', *, nan_policy='propagate', keepdims=False)
    Perform the Mann-Whitney U rank test on two independent samples.
    
    The Mann-Whitney U test is a nonparametric test of the null hypothesis
    that the distribution underlying sample `x` is the same as the
    distribution underlying sample `y`. It is often used as a test of
    difference in location between distributions.
    
    Parameters
    ----------
    x, y : array-like
        N-d arrays of samples. The arrays must be broadcastable except along
        the dimension given by `axis`.
    use_continuity : bool, optional
        Whether a continuity correction (1/2) should be applied.
        Default is True when `method` is ``'asymptotic'``; has no effect
        otherwise.
    alternative : {'two-sided', 'less', 'greater'}, optional
        Defines the alternative hypoth

In [3]:
X = np.random.randint(0, 100, 100)
print(X)
Y1 = X * 1.15
Y2 = X * 0.85
print(Y1)
print(Y2)

[29  3 63 81 47 97 11 24 59 17 53 71 86 29 31 32 12 73 15 88 13 23 99  5
  1 54 10 18 45 44 21 75 94 14 34 67 82 16 24 96 27 69 78 79 24 78 16 26
 65 85 96 26 91 54 64 55 38 56 92 81 52 61 62 74 53  9 27 16 37 48 80 12
 56 41 74 97 36 52 76 12 90 69 79 74 17 88 23  5 44 63 64 41 17 71 33 54
 86 83 57 48]
[ 33.35   3.45  72.45  93.15  54.05 111.55  12.65  27.6   67.85  19.55
  60.95  81.65  98.9   33.35  35.65  36.8   13.8   83.95  17.25 101.2
  14.95  26.45 113.85   5.75   1.15  62.1   11.5   20.7   51.75  50.6
  24.15  86.25 108.1   16.1   39.1   77.05  94.3   18.4   27.6  110.4
  31.05  79.35  89.7   90.85  27.6   89.7   18.4   29.9   74.75  97.75
 110.4   29.9  104.65  62.1   73.6   63.25  43.7   64.4  105.8   93.15
  59.8   70.15  71.3   85.1   60.95  10.35  31.05  18.4   42.55  55.2
  92.    13.8   64.4   47.15  85.1  111.55  41.4   59.8   87.4   13.8
 103.5   79.35  90.85  85.1   19.55 101.2   26.45   5.75  50.6   72.45
  73.6   47.15  19.55  81.65  37.95  62.1   98.9   95.45  65

In [4]:
def u2_zscore(U1, p, x, y) -> tuple[float, float]:
    n1, n2 = len(x), len(y)
    U2 = n1 * n2 - U1

    mu_U = n1 * n2 / 2
    sigma_U = np.sqrt(n1 * n2 * (n1 + n2 + 1) / 12)

    z_score = (U1 - mu_U) / sigma_U
    return U2, z_score

In [5]:
U1, p = stats.mannwhitneyu(X, X, alternative="two-sided")
U2, zscore = u2_zscore(U1, p, X, X)
print(f"{U1=:.2f}, {U2=:.2f}, {zscore=:.4f}, {p=:.4f}")

U1, p = stats.mannwhitneyu(X, Y1, alternative="two-sided")
U2, zscore = u2_zscore(U1, p, X, Y1)
print(f"{U1=:.2f}, {U2=:.2f}, {zscore=:.4f}, {p=:.4f}")

U1, p = stats.mannwhitneyu(X, Y2, alternative="two-sided")
U2, zscore = u2_zscore(U1, p, X, Y2)
print(f"{U1=:.2f}, {U2=:.2f}, {zscore=:.4f}, {p=:.4f}")


U1=5000.00, U2=5000.00, zscore=0.0000, p=1.0000
U1=4293.50, U2=5706.50, zscore=-1.7263, p=0.0845
U1=5816.00, U2=4184.00, zscore=1.9938, p=0.0463


In [15]:
import random

sentence = [ "I", "have", "read", "a", "very", "informative", "article", "written", "by", "Neel Nanda"]
print(" ".join(sentence))
# print(" ".join(random.shuffle(sentence)))
random.shuffle(sentence)
print(" ".join(sentence))

I have read a very informative article written by Neel Nanda
Neel Nanda have by read I very article written a informative
