In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [15]:
rng = pd.date_range('1/1/2011', periods=72, freq='H')
ts = pd.Series(list(range(len(rng))), index=rng)

In [16]:
ts.head(8)

2011-01-01 00:00:00    0
2011-01-01 01:00:00    1
2011-01-01 02:00:00    2
2011-01-01 03:00:00    3
2011-01-01 04:00:00    4
2011-01-01 05:00:00    5
2011-01-01 06:00:00    6
2011-01-01 07:00:00    7
Freq: H, dtype: int64

In [17]:
converted = ts.asfreq('45Min', method='pad')

In [18]:
converted.head(8)

2011-01-01 00:00:00    0
2011-01-01 00:45:00    0
2011-01-01 01:30:00    1
2011-01-01 02:15:00    2
2011-01-01 03:00:00    3
2011-01-01 03:45:00    3
2011-01-01 04:30:00    4
2011-01-01 05:15:00    5
Freq: 45T, dtype: int64

In [19]:
# Does asfreq change the # of rows?

In [20]:
ts.shape
converted.shape

(72,)

(95,)

In [21]:
# What do the different methods do?
# method : {‘backfill’, ‘bfill’, ‘pad’, ‘ffill’, None}

# backfill and bfill are the same
# pad and ffill are the same
# None means fill with NaN

In [22]:
# Might any of these methods have pitfalls from a logical point of view?

In [23]:
# What's the difference between going to a higher frequency and a lower frequency?

In [24]:
converted = ts.asfreq('90Min', method = 'bfill')

In [26]:
converted.head(8)

2011-01-01 00:00:00     0
2011-01-01 01:30:00     2
2011-01-01 03:00:00     3
2011-01-01 04:30:00     5
2011-01-01 06:00:00     6
2011-01-01 07:30:00     8
2011-01-01 09:00:00     9
2011-01-01 10:30:00    11
Freq: 90T, dtype: int64

In [None]:
# What's different logically about going to a higher frequency vs a lower frequency?
# What do you want to do when switching to a lower freqeuncy that is not logical when switching to a higher frequency?

# lower frequency skips rows in the series, values still align -- higher frequency requires filling new rows with values or NAs

In [29]:
ts.resample('2H').mean()[1:10]

2011-01-01 02:00:00     2.5
2011-01-01 04:00:00     4.5
2011-01-01 06:00:00     6.5
2011-01-01 08:00:00     8.5
2011-01-01 10:00:00    10.5
2011-01-01 12:00:00    12.5
2011-01-01 14:00:00    14.5
2011-01-01 16:00:00    16.5
2011-01-01 18:00:00    18.5
Freq: 2H, dtype: float64

In [None]:
# What if you want to downsample and you don't want to ffill or bfill?

# fill with None

In [None]:
# What is the difference between .resample() and .asfreq()?

# asfreq drops data, resample keeps that data and averages the values from the included ranges

In [None]:
# What are some special things you can do with .resample() you can't do with .asfreq()?

# irregular time series

In [35]:
irreg_ts = ts[list(np.random.choice(a = list(range(len(ts))), size = 10, replace=False))]

In [36]:
irreg_ts

2011-01-02 23:00:00    47
2011-01-01 22:00:00    22
2011-01-01 07:00:00     7
2011-01-01 17:00:00    17
2011-01-01 21:00:00    21
2011-01-03 18:00:00    66
2011-01-03 19:00:00    67
2011-01-02 13:00:00    37
2011-01-02 01:00:00    25
2011-01-03 08:00:00    56
dtype: int64

In [39]:
irreg_ts = irreg_ts.asfreq('D')
irreg_ts

2011-01-02 23:00:00    47
Freq: D, dtype: int64

In [42]:
irreg_ts = irreg_ts.asfreq('D').sum()
irreg_ts

47

In [43]:
rng = pd.date_range('1/1/2011', periods=72, freq='H')
ts = pd.Series(list(range(len(rng))), index=rng)

In [44]:
irreg_ts = ts[list(np.random.choice(a = list(range(len(ts))), size = 10, replace=False))]
irreg_ts = irreg_ts.sort_index()

In [45]:
irreg_ts

2011-01-01 01:00:00     1
2011-01-01 05:00:00     5
2011-01-01 15:00:00    15
2011-01-01 22:00:00    22
2011-01-02 16:00:00    40
2011-01-03 01:00:00    49
2011-01-03 08:00:00    56
2011-01-03 14:00:00    62
2011-01-03 17:00:00    65
2011-01-03 21:00:00    69
dtype: int64

In [51]:
irreg_ts.resample('H').fillna(method='pad', limit=3)

2011-01-01 01:00:00     1.0
2011-01-01 02:00:00     1.0
2011-01-01 03:00:00     1.0
2011-01-01 04:00:00     1.0
2011-01-01 05:00:00     5.0
2011-01-01 06:00:00     5.0
2011-01-01 07:00:00     5.0
2011-01-01 08:00:00     5.0
2011-01-01 09:00:00     NaN
2011-01-01 10:00:00     NaN
2011-01-01 11:00:00     NaN
2011-01-01 12:00:00     NaN
2011-01-01 13:00:00     NaN
2011-01-01 14:00:00     NaN
2011-01-01 15:00:00    15.0
2011-01-01 16:00:00    15.0
2011-01-01 17:00:00    15.0
2011-01-01 18:00:00    15.0
2011-01-01 19:00:00     NaN
2011-01-01 20:00:00     NaN
2011-01-01 21:00:00     NaN
2011-01-01 22:00:00    22.0
2011-01-01 23:00:00    22.0
2011-01-02 00:00:00    22.0
2011-01-02 01:00:00    22.0
2011-01-02 02:00:00     NaN
2011-01-02 03:00:00     NaN
2011-01-02 04:00:00     NaN
2011-01-02 05:00:00     NaN
2011-01-02 06:00:00     NaN
                       ... 
2011-01-02 16:00:00    40.0
2011-01-02 17:00:00    40.0
2011-01-02 18:00:00    40.0
2011-01-02 19:00:00    40.0
2011-01-02 20:00:00 

In [53]:
irreg_ts.resample('H')

DatetimeIndexResampler [freq=<Hour>, axis=0, closed=left, label=left, convention=start, base=0]

In [55]:
irreg_ts.resample('H').count()

2011-01-01 01:00:00   NaN
2011-01-01 02:00:00   NaN
2011-01-01 03:00:00   NaN
2011-01-01 04:00:00   NaN
2011-01-01 05:00:00   NaN
2011-01-01 06:00:00   NaN
2011-01-01 07:00:00   NaN
2011-01-01 08:00:00   NaN
2011-01-01 09:00:00   NaN
2011-01-01 10:00:00   NaN
2011-01-01 11:00:00   NaN
2011-01-01 12:00:00   NaN
2011-01-01 13:00:00   NaN
2011-01-01 14:00:00   NaN
2011-01-01 15:00:00   NaN
2011-01-01 16:00:00   NaN
2011-01-01 17:00:00   NaN
2011-01-01 18:00:00   NaN
2011-01-01 19:00:00   NaN
2011-01-01 20:00:00   NaN
2011-01-01 21:00:00   NaN
2011-01-01 22:00:00   NaN
2011-01-01 23:00:00   NaN
2011-01-02 00:00:00   NaN
2011-01-02 01:00:00   NaN
2011-01-02 02:00:00   NaN
2011-01-02 03:00:00   NaN
2011-01-02 04:00:00   NaN
2011-01-02 05:00:00   NaN
2011-01-02 06:00:00   NaN
                       ..
2011-01-02 16:00:00   NaN
2011-01-02 17:00:00   NaN
2011-01-02 18:00:00   NaN
2011-01-02 19:00:00   NaN
2011-01-02 20:00:00   NaN
2011-01-02 21:00:00   NaN
2011-01-02 22:00:00   NaN
2011-01-02 2