In [1]:
from sortedcontainers import SortedList
from math import sqrt
from scipy.stats import mannwhitneyu,norm

In [2]:
def add(X,Ux,Y,Uy,T,val) :
    lb = X.bisect_left(val)
    rb = X.bisect_right(val)
    t = rb - lb
    lb = Y.bisect_left(val)
    rb = Y.bisect_right(val)
    t = t + rb - lb
    T = T + 3*(t*t + t)   
    if Ux == None :
        Ux = 0.0
    if lb != rb :
        Ux += 0.5*(rb - lb)
    Ux += lb
    X.add(val)  
    if len(Y) > 0 :
        Uy = len(X)*len(Y) - Ux
    return (X,Ux,Y,Uy,T)

In [3]:
def remove(X,Ux,Y,Uy,T,val) :
    X.remove(val)
    lb = X.bisect_left(val)
    rb = X.bisect_right(val)
    t = rb - lb
    lb = Y.bisect_left(val)
    rb = Y.bisect_right(val)
    T = T - 3*(t*t + t)  
    if len(X) == 0 :
         Ux = None
    else :
        if lb != rb :
            Ux -= 0.5*(rb - lb)
        Ux -= lb
        if len(Y) > 0 :
            Uy = len(X)*len(Y) - Ux
    return (X,Ux,Y,Uy,T)

In [4]:
class seq_mann_whitney_U :
    def __init__(self) :
        self.X = SortedList()
        self.Y = SortedList()
        self.Ux = None
        self.Uy = None
        self.T = 0.0
    def add_x(self,val) :
        self.X,self.Ux,self.Y,self.Uy,self.T = add(self.X,self.Ux,self.Y,self.Uy,self.T,val)
    def add_y(self,val) :
        self.Y,self.Uy,self.X,self.Ux,self.T = add(self.Y,self.Uy,self.X,self.Ux,self.T,val)
        return self 
    def remove_x(self,val) :
        self.X,self.Ux,self.Y,self.Uy,self.T = remove(self.X,self.Ux,self.Y,self.Uy,self.T,val)
    def remove_y(self,val) :
        self.Y,self.Uy,self.X,self.Ux,self.T = remove(self.Y,self.Uy,self.X,self.Ux,selfT,val)
    def asymptotic_p(self) :
        if self.Ux == None or self.Uy == None :
            return None
        nx = len(self.X)
        ny = len(self.Y)
        n = nx + ny
        mu = nx*ny/2
        U = self.Ux
        if self.Uy > U :
            U = self.Uy
        sigma = sqrt((mu/6)*(n+1-self.T/(n*(n-1))))
        return 2*norm.sf((U - mu)/sigma)      

In [5]:
S = seq_mann_whitney_U()

In [6]:
Y = [19,22,16,29,24]
X = [20,11,17,12]

for y in Y :
    S.add_y(y)
for x in X :
    S.add_x(x)

In [7]:
print(S.Ux)
print(S.Uy)
print(S.T)
print(S.asymptotic_p())

3.0
17.0
0.0
0.0864107329737


In [8]:
from scipy.stats import mannwhitneyu

In [9]:
mannwhitneyu(Y,X,method="asymptotic",use_continuity=False)

MannwhitneyuResult(statistic=17.0, pvalue=0.0864107329737)

In [10]:
from numpy.random import uniform,poisson,seed

In [11]:
seed(1)

In [12]:
X = uniform(0,1,100)
Y = uniform(0,1,100)

In [13]:
S = seq_mann_whitney_U()
for y in Y :
    S.add_y(y)
for x in X :
    S.add_x(x)

In [14]:
print(S.Ux)
print(S.Uy)
print(S.T)
print(S.asymptotic_p())

5024.0
4976.0
0.0
0.9532377881057771


In [15]:
mannwhitneyu(X,Y,method="asymptotic",use_continuity=False)

MannwhitneyuResult(statistic=5024.0, pvalue=0.9532377881057771)

In [16]:
seed(1)
X = poisson(1,100)
Y = poisson(1,100)

In [17]:
S = seq_mann_whitney_U()
for y in Y :
    S.add_y(y)
for x in X :
    S.add_x(x)

In [18]:
print(S.Ux)
print(S.Uy)
print(S.T)
print(S.asymptotic_p())

4588.5
5411.5
814770.0
0.2887206312324161


In [19]:
mannwhitneyu(X,Y,method="asymptotic",use_continuity=False)

MannwhitneyuResult(statistic=4588.5, pvalue=0.2887206312324161)

In [20]:
X = uniform(0,1,10)
Y = uniform(0,1,10)

In [21]:
S = seq_mann_whitney_U()
for y in Y :
    S.add_y(y)
for x in X :
    S.add_x(x)

In [22]:
print(S.Ux)
print(S.Uy)
print(S.T)
print(S.asymptotic_p())

70.0
30.0
0.0
0.1305700181157362


In [23]:
mannwhitneyu(X,Y,method="asymptotic",use_continuity=False)

MannwhitneyuResult(statistic=70.0, pvalue=0.1305700181157362)

In [24]:
X = list(X) + [0.5,0.5]
Y = list(Y) + [0.5,0.5]

In [25]:
S = seq_mann_whitney_U()
for y in Y :
    S.add_y(y)
for x in X :
    S.add_x(x)

In [26]:
print(S.Ux)
print(S.Uy)
print(S.T)
print(S.asymptotic_p())

96.0
48.0
60.0
0.16493532127717858


In [27]:
mannwhitneyu(X,Y,method="asymptotic",use_continuity=False)

MannwhitneyuResult(statistic=96.0, pvalue=0.16493532127717858)