In [6]:
import pandas as pd
import numpy as np

In [7]:
rng = pd.date_range('1/1/2011', periods = 72, freq = 'H')
ts = pd.Series(range(len(rng)), index = rng)

In [8]:
ts.head()

2011-01-01 00:00:00    0
2011-01-01 01:00:00    1
2011-01-01 02:00:00    2
2011-01-01 03:00:00    3
2011-01-01 04:00:00    4
Freq: H, dtype: int32

In [15]:
converted = ts.asfreq('45Min', method = 'ffill')

### What does the above code do to the size and content of your data frame?

In [25]:
print(converted.shape)
converted[0:10]
# converted.tail()

(95,)


2011-01-01 00:00:00    0
2011-01-01 00:45:00    0
2011-01-01 01:30:00    1
2011-01-01 02:15:00    2
2011-01-01 03:00:00    3
2011-01-01 03:45:00    3
2011-01-01 04:30:00    4
2011-01-01 05:15:00    5
2011-01-01 06:00:00    6
2011-01-01 06:45:00    6
Freq: 45T, dtype: int32

In [26]:
print(ts.shape)
ts[0:10]
# ts.tail()

(72,)


2011-01-01 00:00:00    0
2011-01-01 01:00:00    1
2011-01-01 02:00:00    2
2011-01-01 03:00:00    3
2011-01-01 04:00:00    4
2011-01-01 05:00:00    5
2011-01-01 06:00:00    6
2011-01-01 07:00:00    7
2011-01-01 08:00:00    8
2011-01-01 09:00:00    9
Freq: H, dtype: int32

### Take a look at the specs for .asfreq(). What are your options for filling in missing data?

In [159]:
ts.asfreq('45Min', method = None).head()

2011-01-01 00:00:00    0.0
2011-01-01 00:45:00    NaN
2011-01-01 01:30:00    NaN
2011-01-01 02:15:00    NaN
2011-01-01 03:00:00    3.0
Freq: 45T, dtype: float64

### How can you go to less frequent rather than more frequent?

In [27]:
converted = ts.asfreq('3H')

In [29]:
converted[0:10]

2011-01-01 00:00:00     0
2011-01-01 03:00:00     3
2011-01-01 06:00:00     6
2011-01-01 09:00:00     9
2011-01-01 12:00:00    12
2011-01-01 15:00:00    15
2011-01-01 18:00:00    18
2011-01-01 21:00:00    21
2011-01-02 00:00:00    24
2011-01-02 03:00:00    27
Freq: 3H, dtype: int32

In [30]:
ts[0:10]

2011-01-01 00:00:00    0
2011-01-01 01:00:00    1
2011-01-01 02:00:00    2
2011-01-01 03:00:00    3
2011-01-01 04:00:00    4
2011-01-01 05:00:00    5
2011-01-01 06:00:00    6
2011-01-01 07:00:00    7
2011-01-01 08:00:00    8
2011-01-01 09:00:00    9
Freq: H, dtype: int32

In [54]:
# Let's try the more flexible .resample()
ts.resample('2H').mean()[0:10]

2011-01-01 00:00:00     0.5
2011-01-01 02:00:00     2.5
2011-01-01 04:00:00     4.5
2011-01-01 06:00:00     6.5
2011-01-01 08:00:00     8.5
2011-01-01 10:00:00    10.5
2011-01-01 12:00:00    12.5
2011-01-01 14:00:00    14.5
2011-01-01 16:00:00    16.5
2011-01-01 18:00:00    18.5
Freq: 2H, dtype: float64

In [229]:
# What's particularly useful is that we can use reample to event out irregular time series
irreg_ts = ts[np.random.choice(a = len(ts), size = 5, replace = False)]

In [230]:
irreg_ts

2011-01-03 17:00:00    65
2011-01-01 02:00:00     2
2011-01-03 16:00:00    64
2011-01-03 04:00:00    52
2011-01-01 18:00:00    18
dtype: int32

In [231]:
irreg_ts.asfreq('D')

Series([], Freq: D, dtype: int32)

### Why didn't that work?

In [239]:
irreg_ts = irreg_ts.sort_index()
irreg_ts.asfreq('D')

2011-01-01 02:00:00    2.0
2011-01-02 02:00:00    NaN
2011-01-03 02:00:00    NaN
Freq: D, dtype: float64

In [233]:
irreg_ts.asfreq('D', method = 'ffill')

2011-01-01 02:00:00     2
2011-01-02 02:00:00    18
2011-01-03 02:00:00    18
Freq: D, dtype: int32

In [234]:
irreg_ts.resample('D').count()

2011-01-01    2
2011-01-02    0
2011-01-03    3
Freq: D, dtype: int64

# Try

(1) What if you want to go to a higher frequency, but you don't want to back fill or forward fill? Why might you want to do that?

(2) What is the difference between .ressample() and .asfreq()?

(3) How can I forward-fill only a few days? (hint: .fillna())

(4) What are some helpful functions to use with a Resampler object?

In [235]:
type(irreg_ts.asfreq('D'))

pandas.core.series.Series

In [287]:
# More flexible, can do operations on e.g. count(), var(), min(), max(), ...
type(irreg_ts.resample('D'))

pandas.core.resample.DatetimeIndexResampler

In [259]:
irreg_ts.head()

2011-01-01 02:00:00     2
2011-01-01 18:00:00    18
2011-01-03 04:00:00    52
2011-01-03 16:00:00    64
2011-01-03 17:00:00    65
dtype: int32

In [285]:
# limit filling to no more than 5 consecutive NaN
# Do not work if set method='ffill' to an integer e.g. 500, probably a bug
irreg_ts.resample('2H').fillna(method='ffill', limit=5)

2011-01-01 02:00:00     2.0
2011-01-01 04:00:00     2.0
2011-01-01 06:00:00     2.0
2011-01-01 08:00:00     2.0
2011-01-01 10:00:00     2.0
2011-01-01 12:00:00     2.0
2011-01-01 14:00:00     NaN
2011-01-01 16:00:00     NaN
2011-01-01 18:00:00    18.0
2011-01-01 20:00:00    18.0
2011-01-01 22:00:00    18.0
2011-01-02 00:00:00    18.0
2011-01-02 02:00:00    18.0
2011-01-02 04:00:00    18.0
2011-01-02 06:00:00     NaN
2011-01-02 08:00:00     NaN
2011-01-02 10:00:00     NaN
2011-01-02 12:00:00     NaN
2011-01-02 14:00:00     NaN
2011-01-02 16:00:00     NaN
2011-01-02 18:00:00     NaN
2011-01-02 20:00:00     NaN
2011-01-02 22:00:00     NaN
2011-01-03 00:00:00     NaN
2011-01-03 02:00:00     NaN
2011-01-03 04:00:00    52.0
2011-01-03 06:00:00    52.0
2011-01-03 08:00:00    52.0
2011-01-03 10:00:00    52.0
2011-01-03 12:00:00    52.0
2011-01-03 14:00:00    52.0
2011-01-03 16:00:00    64.0
Freq: 2H, dtype: float64

## From the Doc

In [214]:
index = pd.date_range('1/1/2000', periods=9, freq='T')
series = pd.Series(range(9), index=index)
series

2000-01-01 00:00:00    0
2000-01-01 00:01:00    1
2000-01-01 00:02:00    2
2000-01-01 00:03:00    3
2000-01-01 00:04:00    4
2000-01-01 00:05:00    5
2000-01-01 00:06:00    6
2000-01-01 00:07:00    7
2000-01-01 00:08:00    8
Freq: T, dtype: int32

In [294]:
# Pick the first row of each bin as an output, the fill NaN 
series.resample('30S').asfreq().fillna(100).head()

2000-01-01 00:00:00      0.0
2000-01-01 00:00:30    100.0
2000-01-01 00:01:00      1.0
2000-01-01 00:01:30    100.0
2000-01-01 00:02:00      2.0
Freq: 30S, dtype: float64

In [202]:
series.resample('30S').bfill().head()

2000-01-01 00:00:00    0
2000-01-01 00:00:30    1
2000-01-01 00:01:00    1
2000-01-01 00:01:30    2
2000-01-01 00:02:00    2
Freq: 30S, dtype: int32

In [179]:
# Pick the first row of each bin as an output 
series.resample('3T').asfreq()

2000-01-01 00:00:00    0
2000-01-01 00:03:00    3
2000-01-01 00:06:00    6
Freq: 3T, dtype: int32

In [178]:
# Sum each bin as an output
series.resample('3T').sum()

2000-01-01 00:00:00     3
2000-01-01 00:03:00    12
2000-01-01 00:06:00    21
Freq: 3T, dtype: int32

In [188]:
# feed a custom function (acts on each bin) to produce outputs
def custom_resampler(array_like):
    return np.sum(array_like)+5

series.resample('3T').apply(custom_resampler)

2000-01-01 00:00:00     8
2000-01-01 00:03:00    17
2000-01-01 00:06:00    26
Freq: 3T, dtype: int32