## Single-Pass Online Statistics Algorithms ##
Nikolai Shokhirev, http://www.numericalexpert.com/

### Suppliment to the article ###
** Single-Pass Online Statistics Algorithms **: http://www.numericalexpert.com/articles/single_pass_stat/ 

In [1]:
import numpy as np

In [2]:
from online_statistics import *

#### Helper functions ####

In [3]:
def run_win2pass(X, n):
    # Two-pass algorithm
    print('Mean,  M2,  M3,  M4')
    for i in range(len(X)):
        for k in [1,2,3,4]:
            print(win_moment(X, n, i, k), end=' ')
        print()

In [4]:
def run_win1pass(X, n):
    # Single-pass algorithm
    ws = WindowedStat(n)
    print('Mean,  M2,  M3,  M4')
    for x in X:
        ws.push(x)
        print(ws.m, ws.m2, ws.m3, ws.m4)

In [5]:
def run_cum2pass(X):
    # Two-pass algorithm
    print('Mean,  M2,  M3,  M4')
    for i in range(len(X)):
        for k in [1,2,3,4]:
            print(cum_moment(X, i, k), end=' ')
        print()

In [6]:
def run_cum1pass(X):
    # Single-pass cumulative algorithm
    ws = CumulativeStat()
    print('Mean,  M2,  M3,  M4')
    for x in X:
        ws.push(x)
        print(ws.m, ws.m2, ws.m3, ws.m4)

In [7]:
def run_wincov2pass(X, Y, n):
    # Two-pass algorithm
    print('Covariance')
    for i in range(len(X)):
        print(win_cov(X, Y, n, i))

In [8]:
def run_wincov1pass(X, Y, n):
    # Single-pass algorithm
    wc = WindowedCovariance(n)
    print('Covariance')
    for x, y in zip(X,Y):
        wc.push(x,y)
        print(wc.cov)

In [9]:
def run_cumcov2pass(X, Y):
    # Two-pass algorithm
    print('Covariance')
    for i in range(len(X)):
        print(cum_cov(X, Y, i))

In [10]:
def run_cumcov1pass(X, Y):
    # Two-pass algorithm
    wc = CumulativeCovariance()
    print('Covariance')
    for x, y in zip(X,Y):
        wc.push(x,y)
        print(wc.cov)

### Tests ###

In [11]:
# Test data
X = np.array([0.2,1.0,1.4,2.0,2.7,3.8,5.4,7.5,11,15],dtype=float)

#### Windowed statistics ####

In [12]:
# Single-pass algorithm
n = 3 # windows size
run_win1pass(X, n)

Mean,  M2,  M3,  M4
0.2 0.0 -3.46944695195e-18 1.73472347598e-18
0.6 0.16 -2.94902990916e-17 0.0256
0.866666666667 0.248888888889 -0.0474074074074 0.0929185185185
1.46666666667 0.168888888889 0.0165925925926 0.0427851851852
2.03333333333 0.282222222222 0.0140740740741 0.119474074074
2.83333333333 0.548888888889 0.107407407407 0.451918518519
3.96666666667 1.22888888889 0.302592592593 2.26525185185
5.56666666667 2.29555555556 0.569259259259 7.90436296296
7.96666666667 5.33555555556 3.63325925926 42.7022296296
11.1666666667 9.38888888889 2.34259259259 132.226851852


In [13]:
# Two-pass algorithm
n = 4 # windows size
run_win2pass(X, n)

Mean,  M2,  M3,  M4
0.2 0.0 0.0 0.0 
0.6 0.16 1.38777878078e-17 0.0256 
0.866666666667 0.248888888889 -0.0474074074074 0.0929185185185 
1.15 0.4275 -0.05775 0.33523125 
1.775 0.411875 0.07115625 0.278795703125 
2.475 0.796875 0.24703125 1.11778945313 
3.475 1.646875 0.87328125 4.70922695313 
4.85 3.2625 1.92 17.99750625 
6.925 7.256875 8.44846875 94.1580144531 
9.725 13.276875 14.23396875 287.829289453 


In [14]:
# Single-pass algorithm
n = 4 # windows size
run_win1pass(X, n)

Mean,  M2,  M3,  M4
0.2 0.0 -3.46944695195e-18 1.73472347598e-18
0.6 0.16 -2.94902990916e-17 0.0256
0.866666666667 0.248888888889 -0.0474074074074 0.0929185185185
1.15 0.4275 -0.05775 0.33523125
1.775 0.411875 0.07115625 0.278795703125
2.475 0.796875 0.24703125 1.11778945313
3.475 1.646875 0.87328125 4.70922695313
4.85 3.2625 1.92 17.99750625
6.925 7.256875 8.44846875 94.1580144531
9.725 13.276875 14.23396875 287.829289453


In [15]:
# Two-pass algorithm
n = 4 # windows size
run_win2pass(X, n)

Mean,  M2,  M3,  M4
0.2 0.0 0.0 0.0 
0.6 0.16 1.38777878078e-17 0.0256 
0.866666666667 0.248888888889 -0.0474074074074 0.0929185185185 
1.15 0.4275 -0.05775 0.33523125 
1.775 0.411875 0.07115625 0.278795703125 
2.475 0.796875 0.24703125 1.11778945313 
3.475 1.646875 0.87328125 4.70922695313 
4.85 3.2625 1.92 17.99750625 
6.925 7.256875 8.44846875 94.1580144531 
9.725 13.276875 14.23396875 287.829289453 


#### Cumulative statistics ####

In [16]:
# The first 4 values coincide with the above test.
run_cum1pass(X)

Mean,  M2,  M3,  M4
0.2 0.0 -3.46944695195e-18 1.73472347598e-18
0.6 0.16 -2.94902990916e-17 0.0256
0.866666666667 0.248888888889 -0.0474074074074 0.0929185185185
1.15 0.4275 -0.05775 0.33523125
1.46 0.7264 -0.006768 1.00290112
1.85 1.36583333333 0.4725 3.82608958333
2.35714285714 2.71387755102 2.53686297376 16.5681936693
3.0 5.2675 8.79825 66.084625
3.88888888889 11.0032098765 34.6653305898 337.810830818
5.0 21.014 96.9546 1240.0949


In [17]:
run_cum2pass(X)

Mean,  M2,  M3,  M4
0.2 0.0 0.0 0.0 
0.6 0.16 1.38777878078e-17 0.0256 
0.866666666667 0.248888888889 -0.0474074074074 0.0929185185185 
1.15 0.4275 -0.05775 0.33523125 
1.46 0.7264 -0.006768 1.00290112 
1.85 1.36583333333 0.4725 3.82608958333 
2.35714285714 2.71387755102 2.53686297376 16.5681936693 
3.0 5.2675 8.79825 66.084625 
3.88888888889 11.0032098765 34.6653305898 337.810830818 
5.0 21.014 96.9546 1240.0949 


### Covariance ###

In [18]:
# Test data
X = np.array(range(10))
Y = X + np.random.uniform(size=10)
X = X + np.random.uniform(size=10)

In [19]:
n = 4
run_wincov1pass(X, Y, n)

Covariance
0.0
0.162176504834
0.454873810488
1.10303001434
1.30642641714
1.1964980382
1.12359078133
0.960414905236
1.27182856029
1.34879684567


In [20]:
n = 4
run_wincov2pass(X, Y, n)

Covariance
0.0
0.162176504834
0.454873810488
1.10303001434
1.30642641714
1.1964980382
1.12359078133
0.960414905236
1.27182856029
1.34879684567
