# Online Mean and Variance

sources:

- https://datagenetics.com/blog/november22017/index.html

- https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance

- https://natural-blogarithm.com/post/variance-welford-vs-numpy/



In [1]:
import numpy as np

X = np.random.rand(5)
X = np.array([1,3,2,2,4,0])
k = len(X)
N = np.array(range(1,k+1))
print('index :', N)
print('data : ', X)

avg_X = np.array([np.mean(X[:i]) for i in range(1, k+1)])
print('Progressive Mean (numpy direct) :      ', avg_X)

sum_X = np.cumsum(X) 

print('Cumulative Sum (numpy direct) :        ', sum_X)
avg_X = sum_X / N
print('Progressive Mean (from numpy cumsum) : ', avg_X)

avg_X = np.zeros(k)
avg_X[0] = X[0]
for i in range(1, k):
    #n = i+1
    avg_X[i] = avg_X[i-1] + ((X[i] - avg_X[i-1]) / (i+1) )
print('Progressive Mean (online wolfram) :    ', avg_X)

var_X = np.array([np.var(X[:i]) for i in range(1, k+1)])
print('Progressive Variance (numpy direct) :  ', var_X)

sqs_X = np.cumsum(X**2) 
print('Squared Cumulative Sum (numpy):        ', sqs_X)

var_X = (sqs_X / N) - (avg_X ** 2)
print('Progressive Variance (numpy sqs) :     ', var_X)

var_X = np.zeros(k)
for i in range(1, k):
    #n = i+1
    var_X[i] = ( var_X[i-1]*i + (X[i]-avg_X[i-1])*(X[i]-avg_X[i]) )  /  (i+1)
print('Progressive Variance (online) :        ', var_X)

index : [1 2 3 4 5 6]
data :  [1 3 2 2 4 0]
Progressive Mean (numpy direct) :       [1.  2.  2.  2.  2.4 2. ]
Cumulative Sum (numpy direct) :         [ 1  4  6  8 12 12]
Progressive Mean (from numpy cumsum) :  [1.  2.  2.  2.  2.4 2. ]
Progressive Mean (online wolfram) :     [1.  2.  2.  2.  2.4 2. ]
Progressive Variance (numpy direct) :   [0.         1.         0.66666667 0.5        1.04       1.66666667]
Squared Cumulative Sum (numpy):         [ 1 10 14 18 34 34]
Progressive Variance (numpy sqs) :      [0.         1.         0.66666667 0.5        1.04       1.66666667]
Progressive Variance (online) :         [0.         1.         0.66666667 0.5        1.04       1.66666667]
