In [1]:
import itertools
import pandas as pd
from scipy import stats
from odapi.connectors import Irceline
from odapi.settings import settings
settings.logger.setLevel(50)

In [2]:
client = Irceline()

In [3]:
client.meta.columns

Index(['serieid', 'siteid', 'measureid', 'serieunits', 'measurekey',
       'measurename', 'sitekey', 'sitename', 'seriekey', 'molarmass', 'factor',
       'sitelocation', 'sitetype', 'lat', 'lon', 'nuts1id', 'nuts2id',
       'nuts3id', 'nuts1name', 'nuts2name', 'nuts3name', 'lauid', 'launame',
       'started', 'stopped'],
      dtype='object')

In [4]:
sel = client.select(measurekey='BC', sitekey='41')

In [5]:
sel

Unnamed: 0,serieid,siteid,measureid,serieunits,measurekey,measurename,sitekey,sitename,seriekey,molarmass,...,nuts1id,nuts2id,nuts3id,nuts1name,nuts2name,nuts3name,lauid,launame,started,stopped
14,10607,1122,391,µg/m³,BC,Black Carbon,41R012,Uccle,BC/41R012 (µg/m³),,...,BE1,BE10,BE100,RÉGION DE BRUXELLES-CAPITALE/BRUSSELS HOOFDSTE...,Brussels,de Bruxelles-Capitale/van Brussel-Hoofdstad,BE_21016,Uccle,2012-10-31 01:00:00+00:00,2020-08-22 15:00:00+00:00
17,10693,1117,391,µg/m³,BC,Black Carbon,41N043,Haren,BC/41N043 (µg/m³),,...,BE1,BE10,BE100,RÉGION DE BRUXELLES-CAPITALE/BRUSSELS HOOFDSTE...,Brussels,de Bruxelles-Capitale/van Brussel-Hoofdstad,BE_21004,Brussels,2014-03-15 01:00:00+00:00,2020-08-22 16:00:00+00:00
30,6569,1118,391,µg/m³,BC,Black Carbon,41R001,Molenbeek-Saint-Jean,BC/41R001 (µg/m³),,...,BE1,BE10,BE100,RÉGION DE BRUXELLES-CAPITALE/BRUSSELS HOOFDSTE...,Brussels,de Bruxelles-Capitale/van Brussel-Hoofdstad,BE_21012,Molenbeek-Saint-Jean,2012-06-27 01:00:00+00:00,2020-08-22 16:00:00+00:00
31,6609,1119,391,µg/m³,BC,Black Carbon,41R002,Ixelles,BC/41R002 (µg/m³),,...,BE1,BE10,BE100,RÉGION DE BRUXELLES-CAPITALE/BRUSSELS HOOFDSTE...,Brussels,de Bruxelles-Capitale/van Brussel-Hoofdstad,BE_21009,Ixelles,2012-06-27 01:00:00+00:00,2020-08-22 16:00:00+00:00
32,6633,1124,391,µg/m³,BC,Black Carbon,41WOL1,Wol.St.L.,BC/41WOL1 (µg/m³),,...,BE1,BE10,BE100,RÉGION DE BRUXELLES-CAPITALE/BRUSSELS HOOFDSTE...,Brussels,de Bruxelles-Capitale/van Brussel-Hoofdstad,BE_21018,Woluwe-Saint-Lambert,2012-06-27 01:00:00+00:00,2020-08-22 16:00:00+00:00


In [6]:
recs = client.get_records(sel, span='30D')

In [7]:
recs

Unnamed: 0,serieid,start,stop,value
0,6633,2020-07-23 17:00:00+00:00,2020-07-23 18:00:00+00:00,0.360
1,6633,2020-07-23 18:00:00+00:00,2020-07-23 19:00:00+00:00,0.355
2,6633,2020-07-23 19:00:00+00:00,2020-07-23 20:00:00+00:00,0.565
3,6633,2020-07-23 20:00:00+00:00,2020-07-23 21:00:00+00:00,0.730
4,6633,2020-07-23 21:00:00+00:00,2020-07-23 22:00:00+00:00,0.965
...,...,...,...,...
714,6569,2020-08-22 11:00:00+00:00,2020-08-22 12:00:00+00:00,0.205
715,6569,2020-08-22 12:00:00+00:00,2020-08-22 13:00:00+00:00,0.330
716,6569,2020-08-22 13:00:00+00:00,2020-08-22 14:00:00+00:00,0.365
717,6569,2020-08-22 14:00:00+00:00,2020-08-22 15:00:00+00:00,0.475


In [8]:
data = recs.merge(sel).pivot_table(index='start', columns='sitekey', values='value')

In [9]:
data

sitekey,41N043,41R001,41R002,41R012,41WOL1
start,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-07-23 17:00:00+00:00,1.010,,0.455,0.370,0.360
2020-07-23 18:00:00+00:00,1.300,,0.520,0.315,0.355
2020-07-23 19:00:00+00:00,0.815,,,0.535,0.565
2020-07-23 20:00:00+00:00,0.670,,0.900,0.850,0.730
2020-07-23 21:00:00+00:00,1.410,,1.170,1.045,0.965
...,...,...,...,...,...
2020-08-22 11:00:00+00:00,0.365,0.205,0.260,,0.100
2020-08-22 12:00:00+00:00,0.610,0.330,0.345,,0.115
2020-08-22 13:00:00+00:00,0.415,0.365,0.395,,0.300
2020-08-22 14:00:00+00:00,0.385,0.475,0.610,0.135,0.255


In [10]:
def ttest(ref, exp, **params):
    res = stats.ttest_ind(ref, exp, equal_var=False, nan_policy='omit')
    return {k: getattr(res, k) for k in res._fields}

In [11]:
def dispatch(ref, exp, callback, mode='product', **params):
    if mode == 'product':
        for x, y in itertools.product(ref, exp):
            res = callback(ref[x], exp[y], **params)
            res.update({
                'ref_key': x, 'exp_key': y,
                'ref_count': ref[x].count(), 'exp_count': exp[y].count(),
                'ref_mean': ref[x].mean(), 'exp_mean': exp[y].mean(),
                'ref_std': ref[x].std(), 'exp_std': exp[y].std()
            })
            yield res

In [12]:
def apply_test(ref, exp, test_func=ttest, mode='product', **params):
    return pd.DataFrame([r for r in dispatch(ref, exp, callback=test_func, mode=mode, **params)])

In [13]:
t0 = data.index[-100]
t1 = data.index[-1]

In [14]:
t = apply_test(data.loc[:,['41R002', '41R012']], data.loc[t0:t1,:])

In [15]:
t

Unnamed: 0,statistic,pvalue,ref_key,exp_key,ref_count,exp_count,ref_mean,exp_mean,ref_std,exp_std
0,-6.296505,8.752872e-09,41R002,41N043,689,94,0.658766,1.222819,0.358999,0.858347
1,-1.631827,0.1056259,41R002,41R001,689,98,0.658766,0.755765,0.358999,0.572658
2,0.983662,0.3272238,41R002,41R002,689,95,0.658766,0.620632,0.358999,0.35357
3,6.61414,3.129906e-09,41R002,41R012,689,59,0.658766,0.44322,0.358999,0.227207
4,11.673347,1.6852070000000002e-23,41R002,41WOL1,689,97,0.658766,0.342423,0.358999,0.230416
5,-7.533078,2.618061e-11,41R012,41N043,543,94,0.549088,1.222819,0.295668,0.858347
6,-3.489846,0.0007042006,41R012,41R001,543,98,0.549088,0.755765,0.295668,0.572658
7,-1.861622,0.06514246,41R012,41R002,543,95,0.549088,0.620632,0.295668,0.35357
8,3.289222,0.00148814,41R012,41R012,543,59,0.549088,0.44322,0.295668,0.227207
9,7.765157,9.544264e-13,41R012,41WOL1,543,97,0.549088,0.342423,0.295668,0.230416


In [16]:
t.pivot_table(index='ref_key', columns='exp_key', values=["pvalue", "statistic"])

Unnamed: 0_level_0,pvalue,pvalue,pvalue,pvalue,pvalue,statistic,statistic,statistic,statistic,statistic
exp_key,41N043,41R001,41R002,41R012,41WOL1,41N043,41R001,41R002,41R012,41WOL1
ref_key,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
41R002,8.752872e-09,0.105626,0.327224,3.129906e-09,1.6852070000000002e-23,-6.296505,-1.631827,0.983662,6.61414,11.673347
41R012,2.618061e-11,0.000704,0.065142,0.00148814,9.544264e-13,-7.533078,-3.489846,-1.861622,3.289222,7.765157
