# Examples of the use of _pandas.resample()_ to calculate sum and average in different periods

## The input file has measurements of air temperature, relative humidity and radiation, as well as a timestamp field for each measurement. It includes one complete day of measurements, taken every 5 minutes, approximately. The measurements are _not_ exactly equidistant in time.

## The function _pandas.resample()_ will be used to help aggregate the measurements of temperature and radiation in two different ways:
    * The temperature will be averaged into hourly temperature
    * The radiation will be integrated

# <center>*</center>

In [17]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
plt.ioff()

In [19]:
from scipy import integrate
import pandas as pd
import numpy as np

def integrate_method(self, how='trapz', unit='s'):
    '''Numerically integrate the time series.

    @param how: the method to use (trapz by default)
    @return 

    Available methods:
     * trapz - trapezoidal
     * cumtrapz - cumulative trapezoidal
     * simps - Simpson's rule
     * romb - Romberger's rule

    See http://docs.scipy.org/doc/scipy/reference/integrate.html for the method details.
    or the source code
    https://github.com/scipy/scipy/blob/master/scipy/integrate/quadrature.py
    '''
    available_rules = set(['trapz', 'cumtrapz', 'simps', 'romb'])
    if how in available_rules:
        rule = integrate.__getattribute__(how)
    else:
        print('Unsupported integration rule: %s' % (how))
        print('Expecting one of these sample-based integration rules: %s' % (str(list(available_rules))))
        raise AttributeError

    result = rule(self.values, self.index.astype(np.int64) / 10**9)
    #result = rule(self.values)
    return result

pd.Series.integrate = integrate_method

In [27]:
np.random.seed(1234)
df = pd.DataFrame(np.random.rand(30,4), columns=list('ABCD'), index=pd.date_range('2017-01-01 09:00:00', periods=30, freq='5min'))
df

Unnamed: 0,A,B,C,D
2017-01-01 09:00:00,0.191519,0.622109,0.437728,0.785359
2017-01-01 09:05:00,0.779976,0.272593,0.276464,0.801872
2017-01-01 09:10:00,0.958139,0.875933,0.357817,0.500995
2017-01-01 09:15:00,0.683463,0.712702,0.370251,0.561196
2017-01-01 09:20:00,0.503083,0.013768,0.772827,0.882641
2017-01-01 09:25:00,0.364886,0.615396,0.075381,0.368824
2017-01-01 09:30:00,0.93314,0.651378,0.397203,0.78873
2017-01-01 09:35:00,0.316836,0.568099,0.869127,0.436173
2017-01-01 09:40:00,0.802148,0.143767,0.704261,0.704581
2017-01-01 09:45:00,0.218792,0.924868,0.442141,0.909316


In [28]:
df.resample('1h').mean()  #can as similar method be used to find the numerical integral (eg. with trapezoidal rule)?

Unnamed: 0,A,B,C,D
2017-01-01 09:00:00,0.533868,0.509851,0.39949,0.664667
2017-01-01 10:00:00,0.472573,0.424557,0.490752,0.650382
2017-01-01 11:00:00,0.53162,0.621379,0.498367,0.49564


In [29]:
df.resample('1h').apply(integrate_method)

Unnamed: 0,A,B,C,D
2017-01-01 09:00:00,1804.003367,1662.149908,1366.005905,2190.781889
2017-01-01 10:00:00,1572.578649,1310.245707,1677.869046,2174.912217
2017-01-01 11:00:00,824.1822,968.214025,753.223981,685.759116


In [30]:
def custom_function(array_like):
    return  sp.integrate.trapz(array_like)   

In [31]:
df.resample('1h').apply(custom_function)

Unnamed: 0,A,B,C,D
2017-01-01 09:00:00,6.013345,5.5405,4.553353,7.302606
2017-01-01 10:00:00,5.241929,4.367486,5.592897,7.249707
2017-01-01 11:00:00,2.747274,3.22738,2.510747,2.285864


In [35]:
df.resample('1h').apply(sp.integrate.trapz)

Unnamed: 0,A,B,C,D
2017-01-01 09:00:00,6.013345,5.5405,4.553353,7.302606
2017-01-01 10:00:00,5.241929,4.367486,5.592897,7.249707
2017-01-01 11:00:00,2.747274,3.22738,2.510747,2.285864


In [32]:
df.resample('1h').sum()

Unnamed: 0,A,B,C,D
2017-01-01 09:00:00,6.406417,6.118209,4.793879,7.976002
2017-01-01 10:00:00,5.670875,5.094683,5.889024,7.804584
2017-01-01 11:00:00,3.189717,3.728275,2.990202,2.973839


## Read data

In [10]:
df = pd.read_csv( '../../data/sensors_to_resample.csv', sep=';' )

In [11]:
df.head()

Unnamed: 0,Timestamp,Temperature,Rel_Humidity,PAR_Radiation
0,2018-09-16 00:05:00,22.64,34.649,0.032
1,2018-09-16 00:10:00,22.699,35.28,0.024
2,2018-09-16 00:15:00,22.7,35.237,0.051
3,2018-09-16 00:20:00,22.7,34.618,0.013
4,2018-09-16 00:25:00,22.603,34.133,0.0


In [12]:
df.index = pd.DatetimeIndex( df['Timestamp'] )
df.index

DatetimeIndex(['2018-09-16 00:05:00', '2018-09-16 00:10:00',
               '2018-09-16 00:15:00', '2018-09-16 00:20:00',
               '2018-09-16 00:25:00', '2018-09-16 00:30:00',
               '2018-09-16 00:35:01', '2018-09-16 00:40:02',
               '2018-09-16 00:45:03', '2018-09-16 00:50:04',
               ...
               '2018-09-16 23:12:23', '2018-09-16 23:17:24',
               '2018-09-16 23:22:25', '2018-09-16 23:27:25',
               '2018-09-16 23:32:25', '2018-09-16 23:37:25',
               '2018-09-16 23:42:25', '2018-09-16 23:47:25',
               '2018-09-16 23:52:26', '2018-09-16 23:57:27'],
              dtype='datetime64[ns]', name='Timestamp', length=287, freq=None)

In [16]:
df['Temperature'].resample( '1H' ).mean()

Timestamp
2018-09-16 00:00:00    22.537909
2018-09-16 01:00:00    22.256083
2018-09-16 02:00:00    22.310167
2018-09-16 03:00:00    22.398333
2018-09-16 04:00:00    22.500833
2018-09-16 05:00:00    22.487500
2018-09-16 06:00:00    22.346250
2018-09-16 07:00:00    22.653083
2018-09-16 08:00:00    24.792167
2018-09-16 09:00:00    27.014000
2018-09-16 10:00:00    27.389000
2018-09-16 11:00:00    27.646667
2018-09-16 12:00:00    27.552833
2018-09-16 13:00:00    27.328333
2018-09-16 14:00:00    27.101250
2018-09-16 15:00:00    27.274917
2018-09-16 16:00:00    27.174500
2018-09-16 17:00:00    27.224167
2018-09-16 18:00:00    26.784750
2018-09-16 19:00:00    24.898917
2018-09-16 20:00:00    22.893500
2018-09-16 21:00:00    22.744917
2018-09-16 22:00:00    23.057083
2018-09-16 23:00:00    22.476333
Freq: H, Name: Temperature, dtype: float64

In [2]:
rad = df.resample('1H').apply(custom_function)

NameError: name 'df' is not defined

# <center>*</center>

## References and further reading

### Documentation and examples:
https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.resample.html

### To check the aggregation functions, and general examples:
http://benalexkeen.com/resampling-time-series-data-with-pandas/

### About the numerical integration of time series, using scipy:
https://nbviewer.jupyter.org/gist/metakermit/5720498