In [1]:
import itertools
import pandas as pd
from scipy import stats
from odapi.connectors import Irceline
from odapi.settings import settings
settings.logger.setLevel(50)

In [2]:
client = Irceline()

In [3]:
client.meta.columns

Index(['serieid', 'siteid', 'measureid', 'serieunits', 'measurekey',
       'measurename', 'sitekey', 'sitename', 'seriekey', 'molarmass', 'factor',
       'sitelocation', 'sitetype', 'lat', 'lon', 'nuts1id', 'nuts2id',
       'nuts3id', 'nuts1name', 'nuts2name', 'nuts3name', 'lauid', 'launame',
       'started', 'stopped'],
      dtype='object')

In [4]:
sel = client.select(measurekey='NO2', sitekey='41')[["serieid", "sitekey", "measurekey"]]

In [5]:
sel.sample(2)

Unnamed: 0,serieid,sitekey,measurekey
97,6508,41B004,NO2
104,6622,41R012,NO2


In [6]:
t0 = "2019-01-01"
t1 = "2020-01-01"
t2 = "2019-10-01"

In [7]:
recs = client.get_records(sel, start=t0, stop=t1)

In [8]:
recs.sample(2)

Unnamed: 0,serieid,start,stop,value
5956,6516,2019-12-11 08:00:00+00:00,2019-12-11 09:00:00+00:00,37.0
4224,6639,2019-06-25 23:00:00+00:00,2019-06-26 00:00:00+00:00,16.5


In [9]:
data = recs.merge(sel).pivot_table(index='start', columns='sitekey', values='value')

In [10]:
data.sample(5)

sitekey,41B001,41B004,41B006,41B008,41B011,41MEU1,41N043,41R001,41R002,41R012,41WOL1
start,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2019-01-22 05:00:00+00:00,44.0,,,48.0,31.0,35.0,47.0,28.5,32.0,11.5,24.0
2019-07-31 11:00:00+00:00,29.0,10.0,,,0.0,10.5,12.5,9.5,14.0,5.0,5.0
2019-12-08 10:00:00+00:00,30.0,13.0,7.0,23.0,2.0,7.0,15.5,13.5,17.0,6.5,9.0
2019-09-05 16:00:00+00:00,58.5,12.0,,,8.0,10.0,7.5,30.5,26.5,8.5,19.0
2019-01-15 20:00:00+00:00,44.0,26.5,,35.5,7.5,,21.5,18.5,24.5,12.0,19.0


In [11]:
def t_test(ref, exp, **params):
    res = stats.ttest_ind(ref, exp, equal_var=False, nan_policy='omit')
    rep = {k: getattr(res, k) for k in res._fields}
    rep["test"] = "T-Test"
    return rep

In [12]:
def ks_test(ref, exp, **params):
    res = stats.ks_2samp(ref, exp, **params)
    rep = {k: getattr(res, k) for k in res._fields}
    rep["test"] = "KS-Test"
    return rep

In [13]:
def dispatch(ref, exp, callback, mode='product', **params):
    if mode == 'product':
        for x, y in itertools.product(ref, exp):
            res = callback(ref[x], exp[y], **params)
            res.update({
                'ref_key': x, 'exp_key': y,
                #'ref_count': ref[x].count(), 'exp_count': exp[y].count(),
                #'ref_mean': ref[x].mean(), 'exp_mean': exp[y].mean(),
                #'ref_std': ref[x].std(), 'exp_std': exp[y].std()
            })
            yield res

In [14]:
def apply_test(ref, exp, test_func=t_test, mode='product', **params):
    return pd.DataFrame([r for r in dispatch(ref, exp, callback=test_func, mode=mode, **params)])

In [15]:
ref = data.loc[:,['41R002', '41R012']]
exp = data.loc[t2:t1,:]

In [16]:
t = apply_test(ref, exp, test_func=t_test)
ks = apply_test(ref, exp, test_func=ks_test)

In [17]:
test = pd.concat([t, ks])

In [18]:
s = test.pivot_table(index='exp_key', columns=['test', 'ref_key'], values=["pvalue", "statistic"])

In [19]:
s = s.reorder_levels([2,1,0], axis=1).sort_index(axis=1)

In [20]:
s

ref_key,41R002,41R002,41R002,41R002,41R012,41R012,41R012,41R012
test,KS-Test,KS-Test,T-Test,T-Test,KS-Test,KS-Test,T-Test,T-Test
Unnamed: 0_level_2,pvalue,statistic,pvalue,statistic,pvalue,statistic,pvalue,statistic
exp_key,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
41B001,1.0,0.297176,1.8501019999999998e-185,-30.85706,0.0,0.651889,0.0,-76.424326
41B004,1.0,0.062928,5.708183e-06,4.543881,2.700991e-272,0.421289,6.280976e-271,-39.524806
41B006,1.0,0.21937,3.157105e-119,24.070985,1.0,0.24479,1.184674e-98,-21.932094
41B008,1.0,0.258664,3.499832e-116,-23.870924,0.0,0.602412,0.0,-65.671792
41B011,1.0,0.346192,4.0771100000000003e-227,34.512547,1.0,0.119164,1.1126979999999999e-20,-9.396097
41MEU1,1.0,0.182582,1.302347e-80,19.398188,1.0,0.350761,2.335742e-207,-33.104862
41N043,1.0,0.053374,2.823173e-06,-4.691089,0.0,0.466798,0.0,-46.966817
41R001,1.0,0.117919,1.932855e-24,10.273145,1.190893e-220,0.37917,2.424245e-233,-35.840144
41R002,1.0,0.054077,0.2111897,1.250522,0.0,0.468391,0.0,-44.95518
41R012,0.0,0.467471,0.0,56.073081,1.0,0.056212,0.01898931,2.346787
