In [1]:
import pandas as pd
import numpy as np

In [2]:
rng = pd.date_range('1/1/2011', periods=72, freq='H')
ts = pd.Series(np.random.randn(len(rng)), index=rng)

In [3]:
len(ts)

72

In [4]:
converted = ts.asfreq('45Min', method='pad')
converted.head()

2011-01-01 00:00:00   -1.117978
2011-01-01 00:45:00   -1.117978
2011-01-01 01:30:00   -0.980373
2011-01-01 02:15:00   -0.346101
2011-01-01 03:00:00   -2.371941
Freq: 45T, dtype: float64

In [5]:
# Does asfreq change the # of rows?

len(converted)

95

Yes, .asfreq() added 23 rows of data 

In [6]:
# What do the different methods do?
# method : {‘backfill’, ‘bfill’, ‘pad’, ‘ffill’, None}

# backfill/bfill are the same
backfill = ts.asfreq('45Min', method='backfill')
bfill = ts.asfreq('45Min', method='bfill')

# pad/ffill are the same
pad = ts.asfreq('45Min', method='pad')
ffill = ts.asfreq('45Min', method='ffill')

# none does nothing 
none = ts.asfreq('45Min', method=None)

backfill and bfill': use next valid observation to fill missing data 

pad and ffill: propagate the last valid observation forward to next valid observation

In [7]:
print(backfill.head())
print(len(backfill))

2011-01-01 00:00:00   -1.117978
2011-01-01 00:45:00   -0.980373
2011-01-01 01:30:00   -0.346101
2011-01-01 02:15:00   -2.371941
2011-01-01 03:00:00   -2.371941
Freq: 45T, dtype: float64
95


In [8]:
print(bfill.head())
print(len(bfill))

2011-01-01 00:00:00   -1.117978
2011-01-01 00:45:00   -0.980373
2011-01-01 01:30:00   -0.346101
2011-01-01 02:15:00   -2.371941
2011-01-01 03:00:00   -2.371941
Freq: 45T, dtype: float64
95


In [9]:
print(pad.head())
print(len(pad))

2011-01-01 00:00:00   -1.117978
2011-01-01 00:45:00   -1.117978
2011-01-01 01:30:00   -0.980373
2011-01-01 02:15:00   -0.346101
2011-01-01 03:00:00   -2.371941
Freq: 45T, dtype: float64
95


In [10]:
print(ffill.head())
print(len(ffill))

2011-01-01 00:00:00   -1.117978
2011-01-01 00:45:00   -1.117978
2011-01-01 01:30:00   -0.980373
2011-01-01 02:15:00   -0.346101
2011-01-01 03:00:00   -2.371941
Freq: 45T, dtype: float64
95


In [11]:
print(none.head())
print(len(none))

2011-01-01 00:00:00   -1.117978
2011-01-01 00:45:00         NaN
2011-01-01 01:30:00         NaN
2011-01-01 02:15:00         NaN
2011-01-01 03:00:00   -2.371941
Freq: 45T, dtype: float64
95


In [12]:
# Might any of these methods have pitfalls from a logical point of view?

Yes, depending on the method of resampling (backfill/bfill or pad/ffill) we could affect the data in unforseen ways that skwews the results. Backfills are particularly dangerous because they "look ahead" to fill in missing data.

In [13]:
# What's the difference between going to a higher frequency and a lower frequency?

Higher frequency means more rows and needing more data points. In other words, there is more frequent measurement.

In [14]:
converted = ts.asfreq('90Min', method = 'bfill')
converted.head()

2011-01-01 00:00:00   -1.117978
2011-01-01 01:30:00   -0.346101
2011-01-01 03:00:00   -2.371941
2011-01-01 04:30:00    0.148865
2011-01-01 06:00:00    0.235757
Freq: 90T, dtype: float64

In [15]:
# What's different logically about going to a higher frequency vs a lower frequency? 
# What do you want to do when switching to a lower freqeuncy that is not logical when switching to a higher frequency?

Going to a higher frequency means more measurement for a given time period. This mean you need more data points but some may not exist and thus need filling techniques.
When you go to a lower frequency, you want to compress the data points in such a way that the values are still representative for the time period.

In [16]:
ts.resample('D').sum()

2011-01-01    0.348825
2011-01-02   -6.810682
2011-01-03    1.044406
Freq: D, dtype: float64

In [17]:
# What if you want to downsample and you don't want to ffill or bfill?

One should expect a loss in orignal data based on the scale of the downsample. If one were to do an upsample with None, one should expect Nan values filled in the extra rows.

In [18]:
# What is the difference between .resample() and .asfreq()?

Resample helps to move to a lower frequency without losing the data points, asfreq simply drops observations that don't match.

In [19]:
# What are some special things you can do with .resample() you can't do with .asfreq()?

.resample() functions as a groupby object from which we can derive inferrential statistics from