Demonstrate we can specify a simple interface to load data sources.

In [1]:
import numpy as np
import pandas as pd

%load_ext autoreload

In [2]:
%autoreload 2

from ldr import Schema, Filter, Selector
from ingresso import Sales0

In [3]:
sources = { "fx": [ "tests/media/gbp-usd.csv", Schema(desc = "fx") ], 
           "fx2": [ "tests/media/gbp-usd2.csv", Schema(desc = "fx-fxcm") ], 
           "sales": [ "tests/media/sales.csv", Schema(desc = "sales") ],  
           "weather": [ "tests/media/london.csv", Schema(desc = "weather") ],
           "weather2": [ "tests/media/metoffice.csv", Schema(desc = "weather-metoffice") ] }

In [4]:
filters = list(map( lambda x: Filter(x[0], x[1]), sources.values()))

In [5]:
def postprocess0(f0):
    f0.series("datetime", index=True)
    return f0.series("value")

In [6]:
series = list(map(lambda x: postprocess0(x), filters))

Finally, merge the series together, we pass the monthly Met-office series separately.

In [7]:
s0s = list(filter(lambda x: x.name != "weather-metoffice", series))
s1 = list(filter(lambda x: x.name == "weather-metoffice", series))
df = pd.DataFrame(s0s).transpose()
s0 = Sales0(df, metoffice=s1)
print(s0)

'Sales0: ()'


In [8]:
s0.constrain()
print(s0._cdf.describe())
print("series-length: {0:d}".format(len(s0._cdf.index)))

               fx      fx-fxcm        sales      weather
count  788.000000  1578.000000  1864.000000  1773.000000
mean     1.366371     1.460032  1115.015558    12.029724
std      0.098996     0.143808   927.505853     5.406125
min      1.204800     1.199990     0.000000    -4.100000
25%      1.292975     1.322625   310.000000     7.900000
50%      1.338350     1.472945   852.500000    11.900000
75%      1.437975     1.570043  1805.250000    16.300000
max      1.577600     1.716680  4864.000000    27.900000
series-length: 1864


In [9]:
print(len(s0._cdf))
Selector.nulls(s0._cdf)

1864


Unnamed: 0,N,R
fx,1076,0.6
fx-fxcm,286,0.2
weather,91,0.0
sales,0,0.0


In [10]:
fx0 = s0.fx()
s0._cdf

Unnamed: 0_level_0,fx,fx-fxcm,sales,weather,fx0
dt0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-06-11,,1.56451,0.0,12.3,1.56451
2013-06-12,,1.56765,3.0,5.8,1.56765
2013-06-13,,1.57186,12.0,,1.57186
2013-06-14,,1.56981,17.0,,1.56981
2013-06-15,,,14.0,,
2013-06-16,,1.57283,27.0,,1.57283
2013-06-17,,1.57204,17.0,,1.57204
2013-06-18,,1.56428,28.0,,1.56428
2013-06-19,,1.54844,40.0,,1.54844
2013-06-20,,1.55076,41.0,,1.55076


In [11]:
s0.weather()
s0.weather

dt0
2013-01-31    15.183333
2013-02-28    15.033333
2013-03-31    14.550000
2013-04-30    14.933333
2013-05-31    14.916667
2013-06-30    14.383333
2013-07-31    20.056000
2013-08-31    17.452000
2013-09-30    14.008333
2013-10-31    12.648000
2013-11-30     7.750000
2013-12-31     9.072000
2014-01-31     8.874194
2014-02-28    10.075000
2014-03-31    10.500000
2014-04-30    11.283333
2014-05-31    13.364516
2014-06-30    15.406667
2014-07-31    18.006452
2014-08-31    14.232258
2014-09-30    14.976667
2014-10-31    13.380645
2014-11-30    10.240000
2014-12-31     8.454839
2015-01-31     7.216129
2015-02-28     8.325000
2015-03-31     9.119355
2015-04-30    11.160000
2015-05-31    12.490323
2015-06-30    15.343333
                ...    
2016-07-31    17.038710
2016-08-31    16.829032
2016-09-30    15.136667
2016-10-31    11.532258
2016-11-30     8.979310
2016-12-31     8.693548
2017-01-31     6.670000
2017-02-28     9.992857
2017-03-31    10.961290
2017-04-30    10.776667
2017-05-31  

In [12]:
s0._df['sales'].resample("M").mean()

dt0
2013-01-31            NaN
2013-02-28            NaN
2013-03-31            NaN
2013-04-30            NaN
2013-05-31            NaN
2013-06-30      42.250000
2013-07-31     117.161290
2013-08-31     112.258065
2013-09-30      86.166667
2013-10-31     134.258065
2013-11-30     255.066667
2013-12-31     497.612903
2014-01-31     456.645161
2014-02-28     401.785714
2014-03-31     345.129032
2014-04-30     304.233333
2014-05-31     302.774194
2014-06-30     221.000000
2014-07-31     252.548387
2014-08-31     229.806452
2014-09-30     190.966667
2014-10-31     347.935484
2014-11-30     407.200000
2014-12-31     417.483871
2015-01-31     386.225806
2015-02-28     412.071429
2015-03-31     308.451613
2015-04-30     279.100000
2015-05-31     257.161290
2015-06-30     283.000000
                 ...     
2016-07-31    1319.612903
2016-08-31    1604.032258
2016-09-30    1686.300000
2016-10-31    2079.612903
2016-11-30    2582.233333
2016-12-31    2437.709677
2017-01-31    1726.774194
2017-02-

In [13]:
s0._df['fx'].resample("M").mean()

dt0
2013-01-31         NaN
2013-02-28         NaN
2013-03-31         NaN
2013-04-30         NaN
2013-05-31         NaN
2013-06-30         NaN
2013-07-31         NaN
2013-08-31         NaN
2013-09-30         NaN
2013-10-31         NaN
2013-11-30         NaN
2013-12-31         NaN
2014-01-31         NaN
2014-02-28         NaN
2014-03-31         NaN
2014-04-30         NaN
2014-05-31         NaN
2014-06-30         NaN
2014-07-31         NaN
2014-08-31         NaN
2014-09-30         NaN
2014-10-31         NaN
2014-11-30         NaN
2014-12-31         NaN
2015-01-31         NaN
2015-02-28         NaN
2015-03-31         NaN
2015-04-30         NaN
2015-05-31         NaN
2015-06-30         NaN
                ...   
2016-07-31    1.314776
2016-08-31    1.310461
2016-09-30    1.314859
2016-10-31    1.233462
2016-11-30    1.244145
2016-12-31    1.247350
2017-01-31    1.235336
2017-02-28    1.248875
2017-03-31    1.234743
2017-04-30    1.264385
2017-05-31    1.292213
2017-06-30    1.281214
2017-07

In [14]:
fw = list(filter(lambda x: x._schema.desc == "weather-metoffice", filters))[0]
x0 = fw._data
x0['day'] = 1
fw._data

Unnamed: 0_level_0,yyyy,mm,tmax,tmin,af,rain,sun,day
dt0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1948-01,1948,1,8.9,3.3,,85.0,,1
1948-02,1948,2,7.9,2.2,,26.0,,1
1948-03,1948,3,14.2,3.8,,14.0,,1
1948-04,1948,4,15.4,5.1,,35.0,,1
1948-05,1948,5,18.1,6.9,,57.0,,1
1948-06,1948,6,19.1,10.3,,67.0,,1
1948-07,1948,7,21.7,12.0,,21.0,,1
1948-08,1948,8,20.8,11.7,,67.0,,1
1948-09,1948,9,19.6,10.2,,35.0,,1
1948-10,1948,10,14.9,6.0,,50.0,,1


In [15]:
x0
x1 = x0.rename(index=str, columns={"yyyy":"year", "mm":"month"})

In [16]:
x2 = pd.to_datetime(x1[['year', 'month', 'day']])
x2.dt.to_period('M')

dt0
1948-01   1948-01
1948-02   1948-02
1948-03   1948-03
1948-04   1948-04
1948-05   1948-05
1948-06   1948-06
1948-07   1948-07
1948-08   1948-08
1948-09   1948-09
1948-10   1948-10
1948-11   1948-11
1948-12   1948-12
1949-01   1949-01
1949-02   1949-02
1949-03   1949-03
1949-04   1949-04
1949-05   1949-05
1949-06   1949-06
1949-07   1949-07
1949-08   1949-08
1949-09   1949-09
1949-10   1949-10
1949-11   1949-11
1949-12   1949-12
1950-01   1950-01
1950-02   1950-02
1950-03   1950-03
1950-04   1950-04
1950-05   1950-05
1950-06   1950-06
            ...  
2016-01   2016-01
2016-02   2016-02
2016-03   2016-03
2016-04   2016-04
2016-05   2016-05
2016-06   2016-06
2016-07   2016-07
2016-08   2016-08
2016-09   2016-09
2016-10   2016-10
2016-11   2016-11
2016-12   2016-12
2017-01   2017-01
2017-02   2017-02
2017-03   2017-03
2017-04   2017-04
2017-05   2017-05
2017-06   2017-06
2017-07   2017-07
2017-08   2017-08
2017-09   2017-09
2017-10   2017-10
2017-11   2017-11
2017-12   2017-12
2018-0

In [17]:
getattr(s0, 'fx_op', 'fail')

'fail'