* Used extensively in finacial data analysis space
* Generate fixed frequency dates & time spans
* Time series to a particular frequency
* Non-standard time series

In [1]:
import pandas as pd

#### Create a range of dates

In [2]:
rng = pd.date_range('1/1/2011', periods=72, freq='H')

In [5]:
rng[:10]

DatetimeIndex(['2011-01-01 00:00:00', '2011-01-01 01:00:00',
               '2011-01-01 02:00:00', '2011-01-01 03:00:00',
               '2011-01-01 04:00:00', '2011-01-01 05:00:00',
               '2011-01-01 06:00:00', '2011-01-01 07:00:00',
               '2011-01-01 08:00:00', '2011-01-01 09:00:00'],
              dtype='datetime64[ns]', freq='H')

In [7]:
import numpy as np
ts = pd.Series(data=np.arange(72), index=rng)

In [9]:
ts

2011-01-01 00:00:00     0
2011-01-01 01:00:00     1
2011-01-01 02:00:00     2
2011-01-01 03:00:00     3
2011-01-01 04:00:00     4
2011-01-01 05:00:00     5
2011-01-01 06:00:00     6
2011-01-01 07:00:00     7
2011-01-01 08:00:00     8
2011-01-01 09:00:00     9
2011-01-01 10:00:00    10
2011-01-01 11:00:00    11
2011-01-01 12:00:00    12
2011-01-01 13:00:00    13
2011-01-01 14:00:00    14
2011-01-01 15:00:00    15
2011-01-01 16:00:00    16
2011-01-01 17:00:00    17
2011-01-01 18:00:00    18
2011-01-01 19:00:00    19
2011-01-01 20:00:00    20
2011-01-01 21:00:00    21
2011-01-01 22:00:00    22
2011-01-01 23:00:00    23
2011-01-02 00:00:00    24
2011-01-02 01:00:00    25
2011-01-02 02:00:00    26
2011-01-02 03:00:00    27
2011-01-02 04:00:00    28
2011-01-02 05:00:00    29
                       ..
2011-01-02 18:00:00    42
2011-01-02 19:00:00    43
2011-01-02 20:00:00    44
2011-01-02 21:00:00    45
2011-01-02 22:00:00    46
2011-01-02 23:00:00    47
2011-01-03 00:00:00    48
2011-01-03 0

In [8]:
#Change frequency & fill gaps
ts.asfreq('45min', method='pad')

2011-01-01 00:00:00     0
2011-01-01 00:45:00     0
2011-01-01 01:30:00     1
2011-01-01 02:15:00     2
2011-01-01 03:00:00     3
2011-01-01 03:45:00     3
2011-01-01 04:30:00     4
2011-01-01 05:15:00     5
2011-01-01 06:00:00     6
2011-01-01 06:45:00     6
2011-01-01 07:30:00     7
2011-01-01 08:15:00     8
2011-01-01 09:00:00     9
2011-01-01 09:45:00     9
2011-01-01 10:30:00    10
2011-01-01 11:15:00    11
2011-01-01 12:00:00    12
2011-01-01 12:45:00    12
2011-01-01 13:30:00    13
2011-01-01 14:15:00    14
2011-01-01 15:00:00    15
2011-01-01 15:45:00    15
2011-01-01 16:30:00    16
2011-01-01 17:15:00    17
2011-01-01 18:00:00    18
2011-01-01 18:45:00    18
2011-01-01 19:30:00    19
2011-01-01 20:15:00    20
2011-01-01 21:00:00    21
2011-01-01 21:45:00    21
                       ..
2011-01-03 00:45:00    48
2011-01-03 01:30:00    49
2011-01-03 02:15:00    50
2011-01-03 03:00:00    51
2011-01-03 03:45:00    51
2011-01-03 04:30:00    52
2011-01-03 05:15:00    53
2011-01-03 0

In [11]:
d = ts.resample('D')

In [12]:
d.sum()

2011-01-01     276
2011-01-02     852
2011-01-03    1428
Freq: D, dtype: int32

#### Timestamps vs. Time Spans

In [16]:
from datetime import datetime

pd.Timestamp(datetime(2012, 5, 1))

Timestamp('2012-05-01 00:00:00')

In [17]:
pd.Timestamp('2012-05-01')

Timestamp('2012-05-01 00:00:00')

In [18]:
pd.Timestamp(2012, 5, 1)

Timestamp('2012-05-01 00:00:00')

#### Period 
* many time, its more natural to associate things with span rather than time stamp

In [19]:
pd.Period('2011-01')

Period('2011-01', 'M')

In [20]:
pd.Period('2012-05', freq='D')

Period('2012-05-01', 'D')

#### Timestamp as index

In [21]:
dates = [pd.Timestamp('2012-05-01'), pd.Timestamp('2012-05-02'), pd.Timestamp('2012-05-03')]

In [22]:
dates

[Timestamp('2012-05-01 00:00:00'),
 Timestamp('2012-05-02 00:00:00'),
 Timestamp('2012-05-03 00:00:00')]

In [23]:
ts = pd.Series(np.random.randn(3), dates)

In [24]:
ts

2012-05-01    0.071510
2012-05-02   -0.766779
2012-05-03    1.160882
dtype: float64

In [25]:
ts.index

DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)

#### Period as index

In [26]:
periods = [pd.Period('2012-01'), pd.Period('2012-02'), pd.Period('2012-03')]

In [27]:
ts = pd.Series(np.random.randn(3), periods)

In [28]:
ts

2012-01    1.188225
2012-02   -1.392019
2012-03   -1.028870
Freq: M, dtype: float64

### Converting to Timestamps

In [31]:
pd.to_datetime(pd.Series(['Jul 31, 2009', 'Nov 22 1985', '2005/11/22']))

0   2009-07-31
1   1985-11-22
2   2005-11-22
dtype: datetime64[ns]

In [32]:
pd.to_datetime('2010/11/12', format='%Y/%m/%d')

Timestamp('2010-11-12 00:00:00')

In [33]:
pd.to_datetime('12-11-2010 00:00', format='%d-%m-%Y %H:%M')

Timestamp('2010-11-12 00:00:00')

### Generating Ranges of Timestamps

#### Genrating date between ranges

In [34]:
start = datetime(2011,1,1)
end = datetime(2012,1,1)

In [36]:
pd.date_range(start,end, freq='M')

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',
               '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-31'],
              dtype='datetime64[ns]', freq='M')

#### Generating date of periods

In [37]:
pd.date_range(start, periods=1000, freq='M')

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',
               '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',
               '2011-09-30', '2011-10-31',
               ...
               '2093-07-31', '2093-08-31', '2093-09-30', '2093-10-31',
               '2093-11-30', '2093-12-31', '2094-01-31', '2094-02-28',
               '2094-03-31', '2094-04-30'],
              dtype='datetime64[ns]', length=1000, freq='M')

In [38]:
#business days
pd.bdate_range(start,end)

DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',
               '2011-01-07', '2011-01-10', '2011-01-11', '2011-01-12',
               '2011-01-13', '2011-01-14',
               ...
               '2011-12-19', '2011-12-20', '2011-12-21', '2011-12-22',
               '2011-12-23', '2011-12-26', '2011-12-27', '2011-12-28',
               '2011-12-29', '2011-12-30'],
              dtype='datetime64[ns]', length=260, freq='B')

### Custom Frequency Dates

In [39]:
weekmask = 'Mon Wed Fri'

In [40]:
holidays = [datetime(2011, 1, 5), datetime(2011, 3, 14)]

In [45]:
pd.bdate_range(start=start, end=end, freq='B', weekmask=weekmask, holidays=holidays)

DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',
               '2011-01-07', '2011-01-10', '2011-01-11', '2011-01-12',
               '2011-01-13', '2011-01-14',
               ...
               '2011-12-19', '2011-12-20', '2011-12-21', '2011-12-22',
               '2011-12-23', '2011-12-26', '2011-12-27', '2011-12-28',
               '2011-12-29', '2011-12-30'],
              dtype='datetime64[ns]', length=260, freq='B')

In [44]:
help(pd.bdate_range)

Help on function bdate_range in module pandas.core.indexes.datetimes:

bdate_range(start=None, end=None, periods=None, freq='B', tz=None, normalize=True, name=None, closed=None, **kwargs)
    Return a fixed frequency datetime index, with business day as the default
    frequency
    
    Parameters
    ----------
    start : string or datetime-like, default None
        Left bound for generating dates
    end : string or datetime-like, default None
        Right bound for generating dates
    periods : integer or None, default None
        If None, must specify start and end
    freq : string or DateOffset, default 'B' (business daily)
        Frequency strings can have multiples, e.g. '5H'
    tz : string or None
        Time zone name for returning localized DatetimeIndex, for example
        Asia/Beijing
    normalize : bool, default False
        Normalize start/end dates to midnight before generating date range
    name : str, default None
        Name for the resulting index
    

Alias   Description
B       business day frequency
C       custom business day frequency (experimental)
D       calendar day frequency
W       weekly frequency
M       month end frequency
BM      business month end frequency
CBM     custom business month end frequency
MS      month start frequency
BMS     business month start frequency
CBMS    custom business month start frequency
Q       quarter end frequency
BQ      business quarter endfrequency
QS      quarter start frequency
BQS     business quarter start frequency
A       year end frequency
BA      business year end frequency
AS      year start frequency
BAS     business year start frequency
BH      business hour frequency
H       hourly frequency
T, min  minutely frequency
S       secondly frequency
L, ms   milliseonds
U, us   microseconds
N       nanoseconds

In [46]:
from pandas.tseries.holiday import USFederalHolidayCalendar

In [49]:
s = USFederalHolidayCalendar()

In [50]:
s.holidays()

DatetimeIndex(['1970-01-01', '1970-02-16', '1970-05-25', '1970-07-03',
               '1970-09-07', '1970-10-12', '1970-11-11', '1970-11-26',
               '1970-12-25', '1971-01-01',
               ...
               '2030-01-01', '2030-01-21', '2030-02-18', '2030-05-27',
               '2030-07-04', '2030-09-02', '2030-10-14', '2030-11-11',
               '2030-11-28', '2030-12-25'],
              dtype='datetime64[ns]', length=594, freq=None)

#### Aggregation

In [52]:
df = pd.DataFrame(np.random.randn(1000, 3),
                      index=pd.date_range('1/1/2012', freq='S', periods=1000),
                      columns=['A', 'B', 'C'])

In [54]:
df.head()

Unnamed: 0,A,B,C
2012-01-01 00:00:00,0.802788,-2.005441,0.756607
2012-01-01 00:00:01,0.025394,0.129377,0.072919
2012-01-01 00:00:02,-0.010655,-1.515564,0.76086
2012-01-01 00:00:03,0.823513,0.323116,1.005932
2012-01-01 00:00:04,0.722588,-0.540678,3.690826


In [56]:
df.resample('3T').mean()

Unnamed: 0,A,B,C
2012-01-01 00:00:00,-0.087409,-0.107333,0.101966
2012-01-01 00:03:00,-0.047289,0.06372,0.098552
2012-01-01 00:06:00,-0.021617,0.02934,0.068955
2012-01-01 00:09:00,0.038062,0.05196,-0.061551
2012-01-01 00:12:00,0.166193,0.047709,6.9e-05
2012-01-01 00:15:00,0.060133,0.035105,-0.038638


In [57]:
r = df.resample('3T')

In [58]:
r.agg([ np.sum, np.mean])

Unnamed: 0_level_0,A,A,B,B,C,C
Unnamed: 0_level_1,sum,mean,sum,mean,sum,mean
2012-01-01 00:00:00,-15.733641,-0.087409,-19.319873,-0.107333,18.353943,0.101966
2012-01-01 00:03:00,-8.51196,-0.047289,11.469646,0.06372,17.739302,0.098552
2012-01-01 00:06:00,-3.891025,-0.021617,5.281223,0.02934,12.411859,0.068955
2012-01-01 00:09:00,6.8511,0.038062,9.352731,0.05196,-11.079093,-0.061551
2012-01-01 00:12:00,29.9148,0.166193,8.587597,0.047709,0.012357,6.9e-05
2012-01-01 00:15:00,6.013325,0.060133,3.510453,0.035105,-3.863761,-0.038638
