In [23]:
import itertools
import pandas as pd
from scipy import stats
from odapi.connectors import Irceline
from odapi.toolbox.statistics import StatisticalTest
from odapi.settings import settings
settings.logger.setLevel(50)

In [2]:
client = Irceline()

In [3]:
client.meta.columns

Index(['serieid', 'siteid', 'measureid', 'serieunits', 'measurekey',
       'measurename', 'sitekey', 'sitename', 'seriekey', 'molarmass', 'factor',
       'sitelocation', 'sitetype', 'lat', 'lon', 'nuts1id', 'nuts2id',
       'nuts3id', 'nuts1name', 'nuts2name', 'nuts3name', 'lauid', 'launame',
       'started', 'stopped'],
      dtype='object')

In [4]:
sel = client.select(measurekey='NO2', sitekey='41')[["serieid", "sitekey", "measurekey"]]

In [5]:
sel.sample(2)

Unnamed: 0,serieid,sitekey,measurekey
105,6639,41WOL1,NO2
98,6516,41B006,NO2


In [6]:
t0 = "2019-01-01"
t1 = "2020-01-01"
t2 = "2019-10-01"

In [7]:
recs = client.get_records(sel, start=t0, stop=t1)

In [8]:
recs.sample(2)

Unnamed: 0,serieid,start,stop,value
3007,6504,2019-05-06 06:00:00+00:00,2019-05-06 07:00:00+00:00,92.0
3396,6639,2019-05-22 11:00:00+00:00,2019-05-22 12:00:00+00:00,16.5


In [9]:
data = recs.merge(sel).pivot_table(index='start', columns='sitekey', values='value')

In [10]:
data.sample(5)

sitekey,41B001,41B004,41B006,41B008,41B011,41MEU1,41N043,41R001,41R002,41R012,41WOL1
start,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2019-07-03 19:00:00+00:00,17.0,10.5,,,5.5,16.0,6.0,27.0,10.5,6.5,17.5
2019-09-25 23:00:00+00:00,24.5,9.0,5.0,19.5,1.5,24.0,11.0,8.5,11.0,3.0,4.5
2019-11-02 21:00:00+00:00,29.5,15.5,,,1.0,10.5,13.5,3.5,12.5,1.5,4.5
2019-11-24 03:00:00+00:00,43.5,37.0,35.5,45.0,30.0,36.5,35.5,35.5,37.0,28.0,37.5
2019-02-15 03:00:00+00:00,67.5,81.0,42.5,62.5,62.5,66.5,73.5,75.0,63.0,26.0,57.5


In [11]:
def t_test(ref, exp, **params):
    res = stats.ttest_ind(ref, exp, equal_var=False, nan_policy='omit')
    rep = {k: getattr(res, k) for k in res._fields}
    rep["class"] = res.__class__.__name__
    rep["params"] = params
    return rep

In [12]:
def ks_test(ref, exp, **params):
    res = stats.ks_2samp(ref, exp, **params)
    rep = {k: getattr(res, k) for k in res._fields}
    rep["class"] = res.__class__.__name__
    rep["params"] = params
    return rep

In [13]:
def dispatch(ref, exp, callback, mode='product', **params):
    if mode == 'product':
        for x, y in itertools.product(ref, exp):
            res = callback(ref[x], exp[y], **params)
            res.update({'ref_key': x, 'exp_key': y})
            yield res
    elif mode == 'pairwise':
        for x in set(ref.columns).intersection(set(exp.columns)):
            res = callback(ref[x], exp[x], **params)
            res.update({'ref_key': x, 'exp_key': x})
            yield res

In [14]:
def apply_test(ref, exp, test_func=t_test, mode='product', **params):
    return pd.DataFrame([r for r in dispatch(ref, exp, callback=test_func, mode=mode, **params)])

In [15]:
ref = data.loc[:,['41R002', '41R012']]
exp = data.loc[t2:t1,:]

In [16]:
m = 'product'
t = apply_test(ref, exp, test_func=t_test, mode=m)
ks = apply_test(ref, exp, test_func=ks_test, mode=m)

In [17]:
test = pd.concat([t, ks])

In [18]:
test

Unnamed: 0,statistic,pvalue,class,params,ref_key,exp_key
0,-30.85706,1.8501019999999998e-185,Ttest_indResult,{},41R002,41B001
1,4.543881,5.708183e-06,Ttest_indResult,{},41R002,41B004
2,24.070985,3.157105e-119,Ttest_indResult,{},41R002,41B006
3,-23.870924,3.499832e-116,Ttest_indResult,{},41R002,41B008
4,34.512547,4.0771100000000003e-227,Ttest_indResult,{},41R002,41B011
5,19.398188,1.302347e-80,Ttest_indResult,{},41R002,41MEU1
6,-4.691089,2.823173e-06,Ttest_indResult,{},41R002,41N043
7,10.273145,1.932855e-24,Ttest_indResult,{},41R002,41R001
8,1.250522,0.2111897,Ttest_indResult,{},41R002,41R002
9,56.073081,0.0,Ttest_indResult,{},41R002,41R012


In [19]:
s = test.pivot_table(index='exp_key', columns=['class', 'ref_key'], values=["pvalue", "statistic"])

In [20]:
s = s.reorder_levels([2,1,0], axis=1).sort_index(axis=1)

In [21]:
s

ref_key,41R002,41R002,41R002,41R002,41R012,41R012,41R012,41R012
class,Ks_2sampResult,Ks_2sampResult,Ttest_indResult,Ttest_indResult,Ks_2sampResult,Ks_2sampResult,Ttest_indResult,Ttest_indResult
Unnamed: 0_level_2,pvalue,statistic,pvalue,statistic,pvalue,statistic,pvalue,statistic
exp_key,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
41B001,1.0,0.297176,1.8501019999999998e-185,-30.85706,0.0,0.651889,0.0,-76.424326
41B004,1.0,0.062928,5.708183e-06,4.543881,2.700991e-272,0.421289,6.280976e-271,-39.524806
41B006,1.0,0.21937,3.157105e-119,24.070985,1.0,0.24479,1.184674e-98,-21.932094
41B008,1.0,0.258664,3.499832e-116,-23.870924,0.0,0.602412,0.0,-65.671792
41B011,1.0,0.346192,4.0771100000000003e-227,34.512547,1.0,0.119164,1.1126979999999999e-20,-9.396097
41MEU1,1.0,0.182582,1.302347e-80,19.398188,1.0,0.350761,2.335742e-207,-33.104862
41N043,1.0,0.053374,2.823173e-06,-4.691089,0.0,0.466798,0.0,-46.966817
41R001,1.0,0.117919,1.932855e-24,10.273145,1.190893e-220,0.37917,2.424245e-233,-35.840144
41R002,1.0,0.054077,0.2111897,1.250522,0.0,0.468391,0.0,-44.95518
41R012,0.0,0.467471,0.0,56.073081,1.0,0.056212,0.01898931,2.346787


In [24]:
StatisticalTest.apply(ref, exp, mode='product', test='KS-Test')

Unnamed: 0,ref_key,exp_key,test,ref_count,exp_count,ref_mean,exp_mean,ref_std,exp_std,class,params,statistic,pvalue
0,41R002,41B001,KS-Test,8138,2166,33.239739,46.369575,33.239739,17.392898,Ks_2sampResult,{},0.297176,1.0
1,41R002,41B004,KS-Test,8138,2104,33.239739,31.339354,33.239739,16.761214,Ks_2sampResult,{},0.062928,1.0
2,41R002,41B006,KS-Test,8138,2082,33.239739,23.821326,33.239739,15.250386,Ks_2sampResult,{},0.21937,1.0
3,41R002,41B008,KS-Test,8138,2113,33.239739,44.082821,33.239739,18.668751,Ks_2sampResult,{},0.258664,1.0
4,41R002,41B011,KS-Test,8138,2153,33.239739,19.383186,33.239739,16.060619,Ks_2sampResult,{},0.346192,1.0
5,41R002,41MEU1,KS-Test,8138,2152,33.239739,26.425418,33.239739,13.284941,Ks_2sampResult,{},0.182582,1.0
6,41R002,41N043,KS-Test,8138,2168,33.239739,35.300046,33.239739,18.123405,Ks_2sampResult,{},0.053374,1.0
7,41R002,41R001,KS-Test,8138,2156,33.239739,29.144481,33.239739,15.917796,Ks_2sampResult,{},0.117919,1.0
8,41R002,41R002,KS-Test,8138,2071,33.239739,32.735635,33.239739,15.837368,Ks_2sampResult,{},0.054077,1.0
9,41R002,41R012,KS-Test,8138,2165,33.239739,15.205312,33.239739,11.590578,Ks_2sampResult,{},0.467471,0.0
