In [1]:
import pandas as pd
import numpy as np

In [30]:
rng = pd.date_range('1/1/2011', periods=72, freq='H')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
#‘pad’ / ‘ffill’: propagate last valid observation forward to next valid
#‘backfill’ / ‘bfill’: use NEXT valid observation to fill

In [31]:
ts.head()

2011-01-01 00:00:00    0.940027
2011-01-01 01:00:00   -0.634637
2011-01-01 02:00:00   -0.695604
2011-01-01 03:00:00    0.771074
2011-01-01 04:00:00   -1.217686
Freq: H, dtype: float64

In [33]:
ts.tail()

2011-01-03 19:00:00   -0.237645
2011-01-03 20:00:00   -0.219987
2011-01-03 21:00:00    0.734671
2011-01-03 22:00:00   -0.643190
2011-01-03 23:00:00   -0.462990
Freq: H, dtype: float64

In [4]:
converted = ts.asfreq('45Min', method='ffill')
converted.head()

2011-01-01 00:00:00    0.462412
2011-01-01 00:45:00    0.462412
2011-01-01 01:30:00    0.881426
2011-01-01 02:15:00   -0.215881
2011-01-01 03:00:00   -0.139911
Freq: 45T, dtype: float64

In [32]:
converted.tail()

2011-01-03 16:30:00   -0.003110
2011-01-03 18:00:00    1.314916
2011-01-03 19:30:00   -0.330621
2011-01-03 21:00:00   -0.984721
2011-01-03 22:30:00    1.552623
Freq: 90T, dtype: float64

In [6]:
converted.shape

(95,)

In [7]:
ts.asfreq('45Min')

2011-01-01 00:00:00    0.462412
2011-01-01 00:45:00         NaN
2011-01-01 01:30:00         NaN
2011-01-01 02:15:00         NaN
2011-01-01 03:00:00   -0.139911
2011-01-01 03:45:00         NaN
2011-01-01 04:30:00         NaN
2011-01-01 05:15:00         NaN
2011-01-01 06:00:00    0.809055
2011-01-01 06:45:00         NaN
2011-01-01 07:30:00         NaN
2011-01-01 08:15:00         NaN
2011-01-01 09:00:00    1.440820
2011-01-01 09:45:00         NaN
2011-01-01 10:30:00         NaN
2011-01-01 11:15:00         NaN
2011-01-01 12:00:00    0.419620
2011-01-01 12:45:00         NaN
2011-01-01 13:30:00         NaN
2011-01-01 14:15:00         NaN
2011-01-01 15:00:00    0.882182
2011-01-01 15:45:00         NaN
2011-01-01 16:30:00         NaN
2011-01-01 17:15:00         NaN
2011-01-01 18:00:00   -0.782196
2011-01-01 18:45:00         NaN
2011-01-01 19:30:00         NaN
2011-01-01 20:15:00         NaN
2011-01-01 21:00:00    1.030360
2011-01-01 21:45:00         NaN
                         ...   
2011-01-

In [8]:
ts.resample('2H').mean()[1:10]

2011-01-01 02:00:00   -0.177896
2011-01-01 04:00:00    0.619383
2011-01-01 06:00:00   -0.037244
2011-01-01 08:00:00    0.577294
2011-01-01 10:00:00    0.271346
2011-01-01 12:00:00    0.126603
2011-01-01 14:00:00    1.002084
2011-01-01 16:00:00    1.362973
2011-01-01 18:00:00   -0.439205
Freq: 2H, dtype: float64

In [9]:
ts[1:10]

2011-01-01 01:00:00    0.881426
2011-01-01 02:00:00   -0.215881
2011-01-01 03:00:00   -0.139911
2011-01-01 04:00:00    0.759615
2011-01-01 05:00:00    0.479150
2011-01-01 06:00:00    0.809055
2011-01-01 07:00:00   -0.883544
2011-01-01 08:00:00   -0.286232
2011-01-01 09:00:00    1.440820
Freq: H, dtype: float64

In [29]:
# Does asfreq change the # of rows?
#As we can see the number of row changes hence yes

In [12]:
# What do the different methods do?
# method : {‘backfill’, ‘bfill’, ‘pad’, ‘ffill’, None}
#Here backfill and bfill are same and pad and ffill are same.

In [34]:
# Might any of these methods have pitfalls from a logical point of view?
##backward fill tells us about next valid observation hence tells us future hence 
##not valid

In [38]:
# What's the difference between going to a higher frequency and a lower frequency?
print(ts.head(1))
print(ts.tail(1))
#It tells about spread of timespan

2011-01-01    0.940027
Freq: H, dtype: float64
2011-01-03 23:00:00   -0.46299
Freq: H, dtype: float64


In [39]:
converted = ts.asfreq('90Min', method = 'bfill')

In [40]:
converted

2011-01-01 00:00:00    0.940027
2011-01-01 01:30:00   -0.695604
2011-01-01 03:00:00    0.771074
2011-01-01 04:30:00    0.539687
2011-01-01 06:00:00   -1.438643
2011-01-01 07:30:00    0.683905
2011-01-01 09:00:00    0.578059
2011-01-01 10:30:00    0.900110
2011-01-01 12:00:00   -1.356953
2011-01-01 13:30:00    1.567405
2011-01-01 15:00:00   -2.761577
2011-01-01 16:30:00   -0.423825
2011-01-01 18:00:00   -0.336476
2011-01-01 19:30:00   -0.121570
2011-01-01 21:00:00   -1.302715
2011-01-01 22:30:00    1.436952
2011-01-02 00:00:00   -1.047040
2011-01-02 01:30:00    1.203952
2011-01-02 03:00:00   -1.225332
2011-01-02 04:30:00    0.392825
2011-01-02 06:00:00   -1.847596
2011-01-02 07:30:00   -0.451615
2011-01-02 09:00:00    0.109033
2011-01-02 10:30:00    0.003708
2011-01-02 12:00:00   -0.215083
2011-01-02 13:30:00    0.170589
2011-01-02 15:00:00   -0.389149
2011-01-02 16:30:00   -0.324817
2011-01-02 18:00:00   -0.675269
2011-01-02 19:30:00   -0.477415
2011-01-02 21:00:00   -0.061441
2011-01-

In [41]:
# What's different logically about going to a higher frequency vs a lower frequency? 
# What do you want to do when switching to a lower freqeuncy that is not logical when switching to a higher frequency?

In [18]:
ts.resample('D').sum()

2011-01-01    11.358644
2011-01-02     7.530831
2011-01-03    -2.045527
Freq: D, dtype: float64

In [45]:
# What if you want to downsample and you don't want to ffill or bfill?
converted_none = ts.asfreq('45Min')
converted_none.shape
#To downsample one can use None

(95,)

In [20]:
# What is the difference between .resample() and .asfreq()?


In [21]:
# What are some special things you can do with .resample() you can't do with .asfreq()?


In [22]:
rng = pd.date_range('1/1/2011', periods=72, freq='H')
ts = pd.Series(np.arange(len(rng)), index=rng)

In [23]:
type(ts)

pandas.core.series.Series

In [24]:
irreg_ts = ts[list(np.random.choice(a = list(range(len(ts))),size = 10,replace = False))]

In [25]:
irreg_ts

2011-01-01 17:00:00    17
2011-01-03 08:00:00    56
2011-01-03 07:00:00    55
2011-01-03 11:00:00    59
2011-01-03 18:00:00    66
2011-01-02 13:00:00    37
2011-01-01 07:00:00     7
2011-01-02 01:00:00    25
2011-01-01 01:00:00     1
2011-01-03 03:00:00    51
dtype: int64

In [26]:
irreg_ts.asfreq('D')

2011-01-01 17:00:00    17.0
2011-01-02 17:00:00     NaN
Freq: D, dtype: float64

In [27]:
irreg_ts.resample('D').count()

2011-01-01    3
2011-01-02    2
2011-01-03    5
Freq: D, dtype: int64

In [46]:
#With Resample one can use functions such as count ,var,mean and std