In [1]:
import pandas as pd
import numpy as np
%config Completer.use_jedi=False

In [2]:
import datetime
dti = pd.to_datetime(
        ["1/1/2018", np.datetime64("2018-01-01"), datetime.datetime(2018, 1, 1)]
    )
dti

DatetimeIndex(['2018-01-01', '2018-01-01', '2018-01-01'], dtype='datetime64[ns]', freq=None)

In [3]:
dti = pd.date_range("2018-01-01", periods=3, freq="H")
dti

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',
               '2018-01-01 02:00:00'],
              dtype='datetime64[ns]', freq='H')

In [4]:
dti = dti.tz_localize("UTC")
dti

DatetimeIndex(['2018-01-01 00:00:00+00:00', '2018-01-01 01:00:00+00:00',
               '2018-01-01 02:00:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='H')

In [5]:
dti.tz_convert("US/Pacific")

DatetimeIndex(['2017-12-31 16:00:00-08:00', '2017-12-31 17:00:00-08:00',
               '2017-12-31 18:00:00-08:00'],
              dtype='datetime64[ns, US/Pacific]', freq='H')

In [6]:
idx = pd.date_range("2018-01-01", periods=5, freq="H")
ts = pd.Series(range(len(idx)), index=idx)
ts

2018-01-01 00:00:00    0
2018-01-01 01:00:00    1
2018-01-01 02:00:00    2
2018-01-01 03:00:00    3
2018-01-01 04:00:00    4
Freq: H, dtype: int64

In [7]:
ts.resample("2H").mean()

2018-01-01 00:00:00    0.5
2018-01-01 02:00:00    2.5
2018-01-01 04:00:00    4.0
Freq: 2H, dtype: float64

In [8]:
friday = pd.Timestamp("2018-01-05")
friday.day_name()

'Friday'

In [9]:
saturday = friday + pd.Timedelta("1 day")
saturday.day_name()
monday = friday + pd.offsets.BDay()
monday.day_name()

'Monday'

## Overview
---

In [10]:
pd.Series(range(3), index=pd.date_range('2020', periods=3, freq='D'))

2020-01-01    0
2020-01-02    1
2020-01-03    2
Freq: D, dtype: int64

In [18]:
pd.Series(pd.date_range("2000", freq="D", periods=3))

0   2000-01-01
1   2000-01-02
2   2000-01-03
dtype: datetime64[ns]

In [13]:
pd.Series(pd.period_range("1/1/2011", freq="M", periods=3)) 

0    2011-01
1    2011-02
2    2011-03
dtype: period[M]

In [14]:
pd.Series([pd.DateOffset(1), pd.DateOffset(2)])

0         <DateOffset>
1    <2 * DateOffsets>
dtype: object

In [15]:
pd.Series(pd.date_range("1/1/2011", freq="M", periods=3))

0   2011-01-31
1   2011-02-28
2   2011-03-31
dtype: datetime64[ns]

In [19]:
pd.Series(pd.date_range("1/1/2011", freq="M", periods=3))

0   2011-01-31
1   2011-02-28
2   2011-03-31
dtype: datetime64[ns]

In [20]:
pd.Timestamp(pd.NaT)

NaT

In [21]:
pd.Timedelta(pd.NaT)

NaT

In [22]:
pd.Period(pd.NaT)

NaT

In [23]:
pd.NaT == pd.NaT

False

## Timestamps vs. time spans
---

In [24]:
pd.Timestamp(datetime.datetime(2012, 5, 1))

Timestamp('2012-05-01 00:00:00')

In [25]:
pd.Timestamp("2012-05-01")

Timestamp('2012-05-01 00:00:00')

In [26]:
pd.Timestamp(2012, 5, 1)

Timestamp('2012-05-01 00:00:00')

In [27]:
pd.Period("2011-01")

Period('2011-01', 'M')

In [28]:
pd.Period("2012-05", freq="D")

Period('2012-05-01', 'D')

In [29]:
dates = [
        pd.Timestamp("2012-05-01"),
        pd.Timestamp("2012-05-02"),
        pd.Timestamp("2012-05-03"),
    ]
ts = pd.Series(np.random.randn(3), dates)
type(ts.index)

pandas.core.indexes.datetimes.DatetimeIndex

In [30]:
ts.index

DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)

In [32]:
periods = [pd.Period("2012-01"), pd.Period("2012-02"), pd.Period("2012-03")]
ts = pd.Series(np.random.randn(3), periods)
type(ts.index)

pandas.core.indexes.period.PeriodIndex

In [33]:
ts.index

PeriodIndex(['2012-01', '2012-02', '2012-03'], dtype='period[M]', freq='M')

In [34]:
ts

2012-01   -1.571402
2012-02    2.024102
2012-03    0.681487
Freq: M, dtype: float64

## Converting to timestamps
---

In [36]:
pd.to_datetime(pd.Series(["Jul 31, 2009", "2010-01-10", None]))

0   2009-07-31
1   2010-01-10
2          NaT
dtype: datetime64[ns]

In [37]:
pd.to_datetime(["2005/11/23", "2010.12.31"])

DatetimeIndex(['2005-11-23', '2010-12-31'], dtype='datetime64[ns]', freq=None)

In [38]:
#If you use dates which start with the day first (i.e. European style), you can pass the dayfirst flag:
pd.to_datetime(["04-01-2012 10:00"], dayfirst=True)

DatetimeIndex(['2012-01-04 10:00:00'], dtype='datetime64[ns]', freq=None)

In [39]:
pd.to_datetime(["14-01-2012", "01-14-2012"], dayfirst=True)

DatetimeIndex(['2012-01-14', '2012-01-14'], dtype='datetime64[ns]', freq=None)

In [40]:
pd.to_datetime("2010/11/12")

Timestamp('2010-11-12 00:00:00')

In [41]:
pd.Timestamp("2010/11/12")

Timestamp('2010-11-12 00:00:00')

In [42]:
pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"])

DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], dtype='datetime64[ns]', freq=None)

In [43]:
pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"], freq="infer")


DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], dtype='datetime64[ns]', freq='2D')

## Providing a format argument
---

In [44]:
pd.to_datetime("2010/11/12", format="%Y/%m/%d")

Timestamp('2010-11-12 00:00:00')

In [45]:
pd.to_datetime("12-11-2010 00:00", format="%d-%m-%Y %H:%M")

Timestamp('2010-11-12 00:00:00')

## Assembling datetime from multiple DataFrame columns
---

In [46]:
df = pd.DataFrame(
        {"year": [2015, 2016], "month": [2, 3], "day": [4, 5], "hour": [2, 3]}
    )
df

Unnamed: 0,year,month,day,hour
0,2015,2,4,2
1,2016,3,5,3


In [47]:
pd.to_datetime(df)

0   2015-02-04 02:00:00
1   2016-03-05 03:00:00
dtype: datetime64[ns]

In [48]:
pd.to_datetime(df[["year", "month", "day"]])

0   2015-02-04
1   2016-03-05
dtype: datetime64[ns]

## Invalid data
---

In [52]:
## pd.to_datetime(['2009/07/31', 'asd'], errors='raise')
# ValueError: Unknown string format
pd.to_datetime(['2009/07/31', 'asd'], errors='ignore')

Index(['2009/07/31', 'asd'], dtype='object')

In [53]:
pd.to_datetime(['2009/07/31', 'asd'], errors='coerce')

DatetimeIndex(['2009-07-31', 'NaT'], dtype='datetime64[ns]', freq=None)

## Epoch timestamps
---

In [54]:
 pd.to_datetime(
   ....:     [1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit="s"
   ....: )

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05',
               '2012-10-12 18:15:05'],
              dtype='datetime64[ns]', freq=None)

In [55]:
pd.to_datetime(
   ....:     [1349720105100, 1349720105200, 1349720105300, 1349720105400, 1349720105500],
   ....:     unit="ms",
   ....: )
   ....: 

DatetimeIndex(['2012-10-08 18:15:05.100000', '2012-10-08 18:15:05.200000',
               '2012-10-08 18:15:05.300000', '2012-10-08 18:15:05.400000',
               '2012-10-08 18:15:05.500000'],
              dtype='datetime64[ns]', freq=None)

In [56]:
pd.Timestamp(1262347200000000000).tz_localize("US/Pacific")

Timestamp('2010-01-01 12:00:00-0800', tz='US/Pacific')

In [57]:
pd.DatetimeIndex([1262347200000000000]).tz_localize("US/Pacific")

DatetimeIndex(['2010-01-01 12:00:00-08:00'], dtype='datetime64[ns, US/Pacific]', freq=None)

## From timestamps to epoch
---

In [59]:
stamps = pd.date_range("2012-10-08 18:15:05", periods=4, freq="D")
stamps

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05'],
              dtype='datetime64[ns]', freq='D')

In [60]:
(stamps - pd.Timestamp("1970-01-01")) // pd.Timedelta("1s")

Int64Index([1349720105, 1349806505, 1349892905, 1349979305], dtype='int64')

## Using the origin Parameter
---

In [61]:
pd.to_datetime([1, 2, 3], unit="D", origin=pd.Timestamp("1960-01-01"))

DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], dtype='datetime64[ns]', freq=None)

In [62]:
pd.to_datetime([1, 2, 3], unit="D")

DatetimeIndex(['1970-01-02', '1970-01-03', '1970-01-04'], dtype='datetime64[ns]', freq=None)

## Generating ranges of timestamps
---

In [66]:
dates = [
   ....:     datetime.datetime(2012, 5, 1),
   ....:     datetime.datetime(2012, 5, 2),
   ....:     datetime.datetime(2012, 5, 3),
   ....: ]
print(dates)
index = pd.DatetimeIndex(dates)
index

[datetime.datetime(2012, 5, 1, 0, 0), datetime.datetime(2012, 5, 2, 0, 0), datetime.datetime(2012, 5, 3, 0, 0)]


DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)

In [67]:
index = pd.Index(dates)
index

DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)

In [68]:
start = datetime.datetime(2011, 1, 1)
end = datetime.datetime(2012, 1, 1)
index = pd.date_range(start, end)
index

DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04',
               '2011-01-05', '2011-01-06', '2011-01-07', '2011-01-08',
               '2011-01-09', '2011-01-10',
               ...
               '2011-12-23', '2011-12-24', '2011-12-25', '2011-12-26',
               '2011-12-27', '2011-12-28', '2011-12-29', '2011-12-30',
               '2011-12-31', '2012-01-01'],
              dtype='datetime64[ns]', length=366, freq='D')

In [69]:
index = pd.bdate_range(start, end)
index

DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',
               '2011-01-07', '2011-01-10', '2011-01-11', '2011-01-12',
               '2011-01-13', '2011-01-14',
               ...
               '2011-12-19', '2011-12-20', '2011-12-21', '2011-12-22',
               '2011-12-23', '2011-12-26', '2011-12-27', '2011-12-28',
               '2011-12-29', '2011-12-30'],
              dtype='datetime64[ns]', length=260, freq='B')

In [70]:
pd.date_range(start, periods=1000, freq="M")

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',
               '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',
               '2011-09-30', '2011-10-31',
               ...
               '2093-07-31', '2093-08-31', '2093-09-30', '2093-10-31',
               '2093-11-30', '2093-12-31', '2094-01-31', '2094-02-28',
               '2094-03-31', '2094-04-30'],
              dtype='datetime64[ns]', length=1000, freq='M')

In [73]:
# https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases
pd.bdate_range(start, periods=250, freq="BQS")

DatetimeIndex(['2011-01-03', '2011-04-01', '2011-07-01', '2011-10-03',
               '2012-01-02', '2012-04-02', '2012-07-02', '2012-10-01',
               '2013-01-01', '2013-04-01',
               ...
               '2071-01-01', '2071-04-01', '2071-07-01', '2071-10-01',
               '2072-01-01', '2072-04-01', '2072-07-01', '2072-10-03',
               '2073-01-02', '2073-04-03'],
              dtype='datetime64[ns]', length=250, freq='BQS-JAN')

In [72]:
pd.date_range(start, end, freq="BM")

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-29',
               '2011-05-31', '2011-06-30', '2011-07-29', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-30'],
              dtype='datetime64[ns]', freq='BM')

In [74]:
pd.date_range(start, end, freq="W")

DatetimeIndex(['2011-01-02', '2011-01-09', '2011-01-16', '2011-01-23',
               '2011-01-30', '2011-02-06', '2011-02-13', '2011-02-20',
               '2011-02-27', '2011-03-06', '2011-03-13', '2011-03-20',
               '2011-03-27', '2011-04-03', '2011-04-10', '2011-04-17',
               '2011-04-24', '2011-05-01', '2011-05-08', '2011-05-15',
               '2011-05-22', '2011-05-29', '2011-06-05', '2011-06-12',
               '2011-06-19', '2011-06-26', '2011-07-03', '2011-07-10',
               '2011-07-17', '2011-07-24', '2011-07-31', '2011-08-07',
               '2011-08-14', '2011-08-21', '2011-08-28', '2011-09-04',
               '2011-09-11', '2011-09-18', '2011-09-25', '2011-10-02',
               '2011-10-09', '2011-10-16', '2011-10-23', '2011-10-30',
               '2011-11-06', '2011-11-13', '2011-11-20', '2011-11-27',
               '2011-12-04', '2011-12-11', '2011-12-18', '2011-12-25',
               '2012-01-01'],
              dtype='datetime64[ns]', freq='W-S

In [75]:
pd.bdate_range(end=end, periods=20)

DatetimeIndex(['2011-12-05', '2011-12-06', '2011-12-07', '2011-12-08',
               '2011-12-09', '2011-12-12', '2011-12-13', '2011-12-14',
               '2011-12-15', '2011-12-16', '2011-12-19', '2011-12-20',
               '2011-12-21', '2011-12-22', '2011-12-23', '2011-12-26',
               '2011-12-27', '2011-12-28', '2011-12-29', '2011-12-30'],
              dtype='datetime64[ns]', freq='B')

In [76]:
pd.bdate_range(start=start, periods=20)

DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',
               '2011-01-07', '2011-01-10', '2011-01-11', '2011-01-12',
               '2011-01-13', '2011-01-14', '2011-01-17', '2011-01-18',
               '2011-01-19', '2011-01-20', '2011-01-21', '2011-01-24',
               '2011-01-25', '2011-01-26', '2011-01-27', '2011-01-28'],
              dtype='datetime64[ns]', freq='B')

In [77]:
 pd.date_range("2018-01-01", "2018-01-05", periods=5)

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05'],
              dtype='datetime64[ns]', freq=None)

In [78]:
 pd.date_range("2018-01-01", "2018-01-05", periods=10)

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 10:40:00',
               '2018-01-01 21:20:00', '2018-01-02 08:00:00',
               '2018-01-02 18:40:00', '2018-01-03 05:20:00',
               '2018-01-03 16:00:00', '2018-01-04 02:40:00',
               '2018-01-04 13:20:00', '2018-01-05 00:00:00'],
              dtype='datetime64[ns]', freq=None)

## Custom frequency ranges
---

In [79]:
weekmask = "Mon Wed Fri"
holidays = [datetime.datetime(2011, 1, 5), datetime.datetime(2011, 3, 14)]
pd.bdate_range(start, end, freq="C", weekmask=weekmask, holidays=holidays)

DatetimeIndex(['2011-01-03', '2011-01-07', '2011-01-10', '2011-01-12',
               '2011-01-14', '2011-01-17', '2011-01-19', '2011-01-21',
               '2011-01-24', '2011-01-26',
               ...
               '2011-12-09', '2011-12-12', '2011-12-14', '2011-12-16',
               '2011-12-19', '2011-12-21', '2011-12-23', '2011-12-26',
               '2011-12-28', '2011-12-30'],
              dtype='datetime64[ns]', length=154, freq='C')

In [80]:
pd.bdate_range(start, end, freq="CBMS", weekmask=weekmask)

DatetimeIndex(['2011-01-03', '2011-02-02', '2011-03-02', '2011-04-01',
               '2011-05-02', '2011-06-01', '2011-07-01', '2011-08-01',
               '2011-09-02', '2011-10-03', '2011-11-02', '2011-12-02'],
              dtype='datetime64[ns]', freq='CBMS')

## Timestamp limitations
---

In [81]:
pd.Timestamp.min

Timestamp('1677-09-21 00:12:43.145225')

In [82]:
pd.Timestamp.max

Timestamp('2262-04-11 23:47:16.854775807')

## Indexing
---

In [83]:
rng = pd.date_range(start, end, freq="BM")
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts.index

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-29',
               '2011-05-31', '2011-06-30', '2011-07-29', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-30'],
              dtype='datetime64[ns]', freq='BM')

In [84]:
ts[:5].index

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-29',
               '2011-05-31'],
              dtype='datetime64[ns]', freq='BM')

In [85]:
ts[::2].index

DatetimeIndex(['2011-01-31', '2011-03-31', '2011-05-31', '2011-07-29',
               '2011-09-30', '2011-11-30'],
              dtype='datetime64[ns]', freq='2BM')

## Partial string indexing
---

In [89]:
print(ts)
ts["1/31/2011"]

2011-01-31   -1.502357
2011-02-28    1.728041
2011-03-31    0.243407
2011-04-29   -0.131327
2011-05-31   -0.010348
2011-06-30    0.712419
2011-07-29   -1.597573
2011-08-31    0.393782
2011-09-30   -0.641338
2011-10-31   -0.170090
2011-11-30    0.395859
2011-12-30    2.066902
Freq: BM, dtype: float64


-1.5023565378809378

In [87]:
ts[datetime.datetime(2011, 12, 25):]

2011-12-30    2.066902
Freq: BM, dtype: float64

In [88]:
ts["10/31/2011":"12/31/2011"]

2011-10-31   -0.170090
2011-11-30    0.395859
2011-12-30    2.066902
Freq: BM, dtype: float64

In [90]:
ts["2011"]

2011-01-31   -1.502357
2011-02-28    1.728041
2011-03-31    0.243407
2011-04-29   -0.131327
2011-05-31   -0.010348
2011-06-30    0.712419
2011-07-29   -1.597573
2011-08-31    0.393782
2011-09-30   -0.641338
2011-10-31   -0.170090
2011-11-30    0.395859
2011-12-30    2.066902
Freq: BM, dtype: float64

In [91]:
ts["2011-6"]

2011-06-30    0.712419
Freq: BM, dtype: float64

In [92]:
dft = pd.DataFrame(
   .....:     np.random.randn(100000, 1),
   .....:     columns=["A"],
   .....:     index=pd.date_range("20130101", periods=100000, freq="T"),
   .....: )
dft

Unnamed: 0,A
2013-01-01 00:00:00,0.678586
2013-01-01 00:01:00,-0.594238
2013-01-01 00:02:00,1.856867
2013-01-01 00:03:00,-0.691965
2013-01-01 00:04:00,0.597848
...,...
2013-03-11 10:35:00,-1.561986
2013-03-11 10:36:00,-1.362676
2013-03-11 10:37:00,0.983200
2013-03-11 10:38:00,-0.521354


In [93]:
dft.loc['2013']

Unnamed: 0,A
2013-01-01 00:00:00,0.678586
2013-01-01 00:01:00,-0.594238
2013-01-01 00:02:00,1.856867
2013-01-01 00:03:00,-0.691965
2013-01-01 00:04:00,0.597848
...,...
2013-03-11 10:35:00,-1.561986
2013-03-11 10:36:00,-1.362676
2013-03-11 10:37:00,0.983200
2013-03-11 10:38:00,-0.521354


In [94]:
dft["2013-1":"2013-2"]

Unnamed: 0,A
2013-01-01 00:00:00,0.678586
2013-01-01 00:01:00,-0.594238
2013-01-01 00:02:00,1.856867
2013-01-01 00:03:00,-0.691965
2013-01-01 00:04:00,0.597848
...,...
2013-02-28 23:55:00,1.180343
2013-02-28 23:56:00,0.577835
2013-02-28 23:57:00,-0.463504
2013-02-28 23:58:00,-0.135294


In [95]:
dft["2013-1":"2013-2-28"]

Unnamed: 0,A
2013-01-01 00:00:00,0.678586
2013-01-01 00:01:00,-0.594238
2013-01-01 00:02:00,1.856867
2013-01-01 00:03:00,-0.691965
2013-01-01 00:04:00,0.597848
...,...
2013-02-28 23:55:00,1.180343
2013-02-28 23:56:00,0.577835
2013-02-28 23:57:00,-0.463504
2013-02-28 23:58:00,-0.135294


In [96]:
dft["2013-1":"2013-2-28 00:00:00"]

Unnamed: 0,A
2013-01-01 00:00:00,0.678586
2013-01-01 00:01:00,-0.594238
2013-01-01 00:02:00,1.856867
2013-01-01 00:03:00,-0.691965
2013-01-01 00:04:00,0.597848
...,...
2013-02-27 23:56:00,-0.943525
2013-02-27 23:57:00,-1.095366
2013-02-27 23:58:00,-0.600660
2013-02-27 23:59:00,-2.203072


In [97]:
dft2 = pd.DataFrame(
   .....:     np.random.randn(20, 1),
   .....:     columns=["A"],
   .....:     index=pd.MultiIndex.from_product(
   .....:         [pd.date_range("20130101", periods=10, freq="12H"), ["a", "b"]]
   .....:     ),
   .....: )
dft2

Unnamed: 0,Unnamed: 1,A
2013-01-01 00:00:00,a,0.817408
2013-01-01 00:00:00,b,-0.236491
2013-01-01 12:00:00,a,1.385577
2013-01-01 12:00:00,b,0.059385
2013-01-02 00:00:00,a,0.839328
2013-01-02 00:00:00,b,0.408915
2013-01-02 12:00:00,a,-1.995596
2013-01-02 12:00:00,b,-0.273039
2013-01-03 00:00:00,a,0.654115
2013-01-03 00:00:00,b,-0.46945


In [98]:
dft2.loc["2013-01-05"]

Unnamed: 0,Unnamed: 1,A
2013-01-05 00:00:00,a,1.088992
2013-01-05 00:00:00,b,0.51955
2013-01-05 12:00:00,a,2.101527
2013-01-05 12:00:00,b,0.422159


In [99]:
idx = pd.IndexSlice
dft2 = dft2.swaplevel(0, 1).sort_index()
dft2.loc[idx[:, "2013-01-05"], :]

Unnamed: 0,Unnamed: 1,A
a,2013-01-05 00:00:00,1.088992
a,2013-01-05 12:00:00,2.101527
b,2013-01-05 00:00:00,0.51955
b,2013-01-05 12:00:00,0.422159


In [100]:
df = pd.DataFrame([0], index=pd.DatetimeIndex(["2019-01-01"], tz="US/Pacific"))
df

Unnamed: 0,0
2019-01-01 00:00:00-08:00,0


In [101]:
df["2019-01-01 12:00:00+04:00":"2019-01-01 13:00:00+04:00"]

Unnamed: 0,0
2019-01-01 00:00:00-08:00,0


## Slice vs. exact match
---

In [102]:
series_minute = pd.Series(
   .....:     [1, 2, 3],
   .....:     pd.DatetimeIndex(
   .....:         ["2011-12-31 23:59:00", "2012-01-01 00:00:00", "2012-01-01 00:02:00"]
   .....:     ),
   .....: )
series_minute

2011-12-31 23:59:00    1
2012-01-01 00:00:00    2
2012-01-01 00:02:00    3
dtype: int64

In [103]:
series_minute.index.resolution

'minute'

In [104]:
series_minute["2011-12-31 23"]

2011-12-31 23:59:00    1
dtype: int64

In [109]:
series_minute["2011-12-31 23:59"]

1

In [110]:
series_minute["2011-12-31 23:59:00"]

1

In [112]:
series_second = pd.Series(
   .....:     [1, 2, 3],
   .....:     pd.DatetimeIndex(
   .....:         ["2011-12-31 23:59:59", "2012-01-01 00:00:00", "2012-01-01 00:00:01"]
   .....:     ),
   .....: )
series_second.index.resolution

'second'

In [113]:
series_second["2011-12-31 23:59"]

2011-12-31 23:59:59    1
dtype: int64

In [114]:
dft_minute = pd.DataFrame(
   .....:     {"a": [1, 2, 3], "b": [4, 5, 6]}, index=series_minute.index
   .....: )
dft_minute

Unnamed: 0,a,b
2011-12-31 23:59:00,1,4
2012-01-01 00:00:00,2,5
2012-01-01 00:02:00,3,6


In [115]:
dft_minute.loc["2011-12-31 23"]

Unnamed: 0,a,b
2011-12-31 23:59:00,1,4


## Exact indexing
---

In [116]:
dft[datetime.datetime(2013, 1, 1): datetime.datetime(2013, 2, 28)]

Unnamed: 0,A
2013-01-01 00:00:00,0.678586
2013-01-01 00:01:00,-0.594238
2013-01-01 00:02:00,1.856867
2013-01-01 00:03:00,-0.691965
2013-01-01 00:04:00,0.597848
...,...
2013-02-27 23:56:00,-0.943525
2013-02-27 23:57:00,-1.095366
2013-02-27 23:58:00,-0.600660
2013-02-27 23:59:00,-2.203072


In [117]:
dft[
   .....:     datetime.datetime(2013, 1, 1, 10, 12, 0): datetime.datetime(
   .....:         2013, 2, 28, 10, 12, 0
   .....:     )
   .....: ]

Unnamed: 0,A
2013-01-01 10:12:00,2.088715
2013-01-01 10:13:00,0.697456
2013-01-01 10:14:00,0.343280
2013-01-01 10:15:00,1.102254
2013-01-01 10:16:00,-0.268270
...,...
2013-02-28 10:08:00,-2.413666
2013-02-28 10:09:00,-0.222284
2013-02-28 10:10:00,0.297521
2013-02-28 10:11:00,-0.398834


## Truncating & fancy indexing
---

In [118]:
rng2 = pd.date_range("2011-01-01", "2012-01-01", freq="W")
ts2 = pd.Series(np.random.randn(len(rng2)), index=rng2)
ts2.truncate(before="2011-11", after="2011-12")

2011-11-06   -0.585787
2011-11-13    0.981992
2011-11-20   -1.015646
2011-11-27   -0.858979
Freq: W-SUN, dtype: float64

In [119]:
ts2["2011-11":"2011-12"]

2011-11-06   -0.585787
2011-11-13    0.981992
2011-11-20   -1.015646
2011-11-27   -0.858979
2011-12-04   -0.765595
2011-12-11   -1.133976
2011-12-18   -0.207292
2011-12-25    0.776680
Freq: W-SUN, dtype: float64

In [120]:
ts2[[0, 2, 6]].index

DatetimeIndex(['2011-01-02', '2011-01-16', '2011-02-13'], dtype='datetime64[ns]', freq=None)

## DateOffset objects
---

In [123]:
ts = pd.Timestamp("2016-10-30 00:00:00", tz="Europe/Helsinki")
print(ts)
ts + pd.Timedelta(days=1)

2016-10-30 00:00:00+03:00


Timestamp('2016-10-30 23:00:00+0200', tz='Europe/Helsinki')

In [122]:
ts + pd.DateOffset(days=1)

Timestamp('2016-10-31 00:00:00+0200', tz='Europe/Helsinki')

In [125]:
friday = pd.Timestamp("2018-01-05")
friday.day_name()

'Friday'

In [126]:
two_business_days = 2 * pd.offsets.BDay()
two_business_days

<2 * BusinessDays>

In [127]:
two_business_days.apply(friday)

Timestamp('2018-01-09 00:00:00')

In [128]:
friday + two_business_days

Timestamp('2018-01-09 00:00:00')

In [129]:
(friday + two_business_days).day_name()

'Tuesday'

In [130]:
ts = pd.Timestamp("2018-01-06 00:00:00")
ts.day_name()

'Saturday'

In [131]:
offset = pd.offsets.BusinessHour(start="09:00")

In [132]:
offset.rollforward(ts)

Timestamp('2018-01-08 09:00:00')

In [133]:
ts + offset

Timestamp('2018-01-08 10:00:00')

In [135]:
ts = pd.Timestamp("2014-01-01 09:00")
day = pd.offsets.Day()
day.apply(ts)

Timestamp('2014-01-02 09:00:00')

In [136]:
day.apply(ts).normalize()

Timestamp('2014-01-02 00:00:00')

In [138]:
ts = pd.Timestamp("2014-01-01 22:00")
hour = pd.offsets.Hour()
hour.apply(ts)

Timestamp('2014-01-01 23:00:00')

In [139]:
hour.apply(ts).normalize()

Timestamp('2014-01-01 00:00:00')

In [142]:
hour.apply(pd.Timestamp("2014-01-01 23:30")).normalize()

Timestamp('2014-01-02 00:00:00')

## Parametric offsets
---

In [148]:
d = datetime.datetime(2008, 8, 18, 9, 0)
d

datetime.datetime(2008, 8, 18, 9, 0)

In [144]:
d + pd.offsets.Week()

Timestamp('2008-08-25 09:00:00')

In [145]:
d + pd.offsets.Week(weekday=4)

Timestamp('2008-08-22 09:00:00')

In [149]:
d - pd.offsets.Week()

Timestamp('2008-08-11 09:00:00')

In [150]:
d + pd.offsets.Week(normalize=True)

Timestamp('2008-08-25 00:00:00')

In [151]:
d - pd.offsets.Week(normalize=True)

Timestamp('2008-08-11 00:00:00')

In [152]:
d + pd.offsets.YearEnd()

Timestamp('2008-12-31 09:00:00')

In [153]:
d + pd.offsets.YearEnd(month=6)

Timestamp('2009-06-30 09:00:00')

## Using offsets with Series / DatetimeIndex
---

In [154]:
rng = pd.date_range("2012-01-01", "2012-01-03")
s = pd.Series(rng)
rng

DatetimeIndex(['2012-01-01', '2012-01-02', '2012-01-03'], dtype='datetime64[ns]', freq='D')

In [155]:
rng + pd.DateOffset(months=2)

DatetimeIndex(['2012-03-01', '2012-03-02', '2012-03-03'], dtype='datetime64[ns]', freq=None)

In [156]:
s + pd.DateOffset(months=2)

0   2012-03-01
1   2012-03-02
2   2012-03-03
dtype: datetime64[ns]

In [157]:
s - pd.DateOffset(months=2)

0   2011-11-01
1   2011-11-02
2   2011-11-03
dtype: datetime64[ns]

In [158]:
s - pd.offsets.Day(2)

0   2011-12-30
1   2011-12-31
2   2012-01-01
dtype: datetime64[ns]

In [159]:
td = s - pd.Series(pd.date_range('2011-12-29', '2011-12-31'))
td

0   3 days
1   3 days
2   3 days
dtype: timedelta64[ns]

In [160]:
td + pd.offsets.Minute(15)

0   3 days 00:15:00
1   3 days 00:15:00
2   3 days 00:15:00
dtype: timedelta64[ns]

## Custom business days
---

In [167]:
weekmask_egypt = "Sun Mon Tue Wed Thu"
holidays = [
       "2012-05-01",
       datetime.datetime(2013, 5, 1),
       np.datetime64("2014-05-01"),
    ]
bday_egypt = pd.offsets.CustomBusinessDay(
         holidays=holidays,
         weekmask=weekmask_egypt,
     )

In [169]:
dt = datetime.datetime(2013, 4, 30)
dt + 2 * bday_egypt

Timestamp('2013-05-05 00:00:00')

In [170]:
dts = pd.date_range(dt, periods=5, freq=bday_egypt)
pd.Series(dts.weekday, dts).map(pd.Series("Mon Tue Wed Thu Fri Sat Sun".split()))

2013-04-30    Tue
2013-05-02    Thu
2013-05-05    Sun
2013-05-06    Mon
2013-05-07    Tue
Freq: C, dtype: object

In [171]:
from pandas.tseries.holiday import USFederalHolidayCalendar
bday_us = pd.offsets.CustomBusinessDay(calendar=USFederalHolidayCalendar())
dt = datetime.datetime(2014, 1, 17)
dt + bday_us

Timestamp('2014-01-21 00:00:00')

## Business hour
---

In [173]:
bh = pd.offsets.BusinessHour()
bh

<BusinessHour: BH=09:00-17:00>

In [174]:
pd.Timestamp("2014-08-01 10:00").weekday()

4

In [175]:
pd.Timestamp("2014-08-01 10:00") + bh

Timestamp('2014-08-01 11:00:00')

In [176]:
pd.Timestamp("2014-08-01 08:00") + bh

Timestamp('2014-08-01 10:00:00')

In [178]:
pd.Timestamp("2014-08-01 16:00") + bh

Timestamp('2014-08-04 09:00:00')

In [179]:
pd.Timestamp("2014-08-01 16:30") + bh

Timestamp('2014-08-04 09:30:00')

In [180]:
pd.Timestamp("2014-08-01 10:00") + pd.offsets.BusinessHour(2)

Timestamp('2014-08-01 12:00:00')

In [181]:
pd.Timestamp("2014-08-01 10:00") + pd.offsets.BusinessHour(-3)

Timestamp('2014-07-31 15:00:00')

In [182]:
bh = pd.offsets.BusinessHour(start="11:00", end=datetime.time(20, 0))
bh

<BusinessHour: BH=11:00-20:00>

In [183]:
pd.Timestamp("2014-08-01 13:00") + bh

Timestamp('2014-08-01 14:00:00')

In [184]:
pd.Timestamp("2014-08-01 09:00") + bh

Timestamp('2014-08-01 12:00:00')

In [185]:
pd.Timestamp("2014-08-01 18:00") + bh

Timestamp('2014-08-01 19:00:00')

In [186]:
bh = pd.offsets.BusinessHour(start="17:00", end="09:00")
bh

<BusinessHour: BH=17:00-09:00>

In [187]:
pd.Timestamp("2014-08-01 17:00") + bh

Timestamp('2014-08-01 18:00:00')

In [188]:
pd.Timestamp("2014-08-01 23:00") + bh

Timestamp('2014-08-02 00:00:00')

In [189]:
pd.Timestamp("2014-08-02 04:00") + bh

Timestamp('2014-08-02 05:00:00')

In [190]:
pd.Timestamp("2014-08-04 04:00") + bh

Timestamp('2014-08-04 18:00:00')

## Custom business hour
---

In [192]:
from pandas.tseries.holiday import USFederalHolidayCalendar
bhour_us = pd.offsets.CustomBusinessHour(calendar=USFederalHolidayCalendar())
dt = datetime.datetime(2014, 1, 17, 15)
dt + bhour_us

Timestamp('2014-01-17 16:00:00')

In [193]:
dt + bhour_us * 2

Timestamp('2014-01-21 09:00:00')

## Offset aliases
---
A number of string aliases are given to useful common time series frequencies. We will refer to these aliases as offset aliases.

Alias

Description

B

business day frequency

C

custom business day frequency

D

calendar day frequency

W

weekly frequency

M

month end frequency

SM

semi-month end frequency (15th and end of month)

BM

business month end frequency

CBM

custom business month end frequency

MS

month start frequency

SMS

semi-month start frequency (1st and 15th)

BMS

business month start frequency

CBMS

custom business month start frequency

Q

quarter end frequency

BQ

business quarter end frequency

QS

quarter start frequency

BQS

business quarter start frequency

A, Y

year end frequency

BA, BY

business year end frequency

AS, YS

year start frequency

BAS, BYS

business year start frequency

BH

business hour frequency

H

hourly frequency

T, min

minutely frequency

S

secondly frequency

L, ms

milliseconds

U, us

microseconds

N

nanoseconds

## Combining aliases
---

In [194]:
pd.date_range(start, periods=5, freq="B")

DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',
               '2011-01-07'],
              dtype='datetime64[ns]', freq='B')

In [195]:
pd.date_range(start, periods=5, freq=pd.offsets.BDay())

DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',
               '2011-01-07'],
              dtype='datetime64[ns]', freq='B')

In [196]:
pd.date_range(start, periods=10, freq="2h20min")

DatetimeIndex(['2011-01-01 00:00:00', '2011-01-01 02:20:00',
               '2011-01-01 04:40:00', '2011-01-01 07:00:00',
               '2011-01-01 09:20:00', '2011-01-01 11:40:00',
               '2011-01-01 14:00:00', '2011-01-01 16:20:00',
               '2011-01-01 18:40:00', '2011-01-01 21:00:00'],
              dtype='datetime64[ns]', freq='140T')

In [197]:
pd.date_range(start, periods=10, freq="1D10U")

DatetimeIndex([       '2011-01-01 00:00:00', '2011-01-02 00:00:00.000010',
               '2011-01-03 00:00:00.000020', '2011-01-04 00:00:00.000030',
               '2011-01-05 00:00:00.000040', '2011-01-06 00:00:00.000050',
               '2011-01-07 00:00:00.000060', '2011-01-08 00:00:00.000070',
               '2011-01-09 00:00:00.000080', '2011-01-10 00:00:00.000090'],
              dtype='datetime64[ns]', freq='86400000010U')

## Anchored offset semantics
---

In [198]:
pd.Timestamp("2014-01-02") + pd.offsets.MonthBegin(n=1)

Timestamp('2014-02-01 00:00:00')

In [199]:
pd.Timestamp("2014-01-02") + pd.offsets.MonthEnd(n=1)

Timestamp('2014-01-31 00:00:00')

In [200]:
pd.Timestamp("2014-01-02") - pd.offsets.MonthBegin(n=1)

Timestamp('2014-01-01 00:00:00')

In [201]:
pd.Timestamp("2014-01-02") - pd.offsets.MonthEnd(n=1)

Timestamp('2013-12-31 00:00:00')

In [202]:
pd.Timestamp("2014-01-02") + pd.offsets.MonthBegin(n=4)

Timestamp('2014-05-01 00:00:00')

In [203]:
pd.Timestamp("2014-01-02") - pd.offsets.MonthBegin(n=4)

Timestamp('2013-10-01 00:00:00')

## Holidays / holiday calendars
---

## Time series-related instance methods
---

In [204]:
ts = pd.Series(range(len(rng)), index=rng)
ts = ts[:5]
print(ts)
ts.shift(1)

2012-01-01    0
2012-01-02    1
2012-01-03    2
Freq: D, dtype: int64


2012-01-01    NaN
2012-01-02    0.0
2012-01-03    1.0
Freq: D, dtype: float64

In [205]:
ts.shift(5, freq="D")

2012-01-06    0
2012-01-07    1
2012-01-08    2
Freq: D, dtype: int64

In [206]:
ts.shift(5, freq=pd.offsets.BDay())

2012-01-06    0
2012-01-09    1
2012-01-10    2
dtype: int64

## Frequency conversion
---

In [207]:
dr = pd.date_range("1/1/2010", periods=3, freq=3 * pd.offsets.BDay())
ts = pd.Series(np.random.randn(3), index=dr)
ts

2010-01-01    0.837518
2010-01-06   -0.014319
2010-01-11    0.585742
Freq: 3B, dtype: float64

In [208]:
ts.asfreq(pd.offsets.BDay())

2010-01-01    0.837518
2010-01-04         NaN
2010-01-05         NaN
2010-01-06   -0.014319
2010-01-07         NaN
2010-01-08         NaN
2010-01-11    0.585742
Freq: B, dtype: float64

In [209]:
ts.asfreq(pd.offsets.BDay(), method="pad")

2010-01-01    0.837518
2010-01-04    0.837518
2010-01-05    0.837518
2010-01-06   -0.014319
2010-01-07   -0.014319
2010-01-08   -0.014319
2010-01-11    0.585742
Freq: B, dtype: float64

## Resampling
---

In [210]:
rng = pd.date_range("1/1/2012", periods=100, freq="S")
ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)
ts

2012-01-01 00:00:00    223
2012-01-01 00:00:01    228
2012-01-01 00:00:02    358
2012-01-01 00:00:03    444
2012-01-01 00:00:04    363
                      ... 
2012-01-01 00:01:35    329
2012-01-01 00:01:36    499
2012-01-01 00:01:37     94
2012-01-01 00:01:38    425
2012-01-01 00:01:39    346
Freq: S, Length: 100, dtype: int32

In [213]:
ts.resample('1Min').sum()

2012-01-01 00:00:00    15273
2012-01-01 00:01:00     9789
Freq: T, dtype: int32

In [214]:
ts.resample('5Min').sum()

2012-01-01    25062
Freq: 5T, dtype: int32

In [215]:
ts.resample("5Min").mean()

2012-01-01    250.62
Freq: 5T, dtype: float64

In [216]:
ts.resample("5Min").ohlc()

Unnamed: 0,open,high,low,close
2012-01-01,223,499,0,346


In [217]:
ts.resample('5Min').max()

2012-01-01    499
Freq: 5T, dtype: int32

## Upsampling
---

In [218]:
ts[:2].resample("250L").asfreq()

2012-01-01 00:00:00.000    223.0
2012-01-01 00:00:00.250      NaN
2012-01-01 00:00:00.500      NaN
2012-01-01 00:00:00.750      NaN
2012-01-01 00:00:01.000    228.0
Freq: 250L, dtype: float64

In [219]:
ts[:2].resample("250L").ffill()

2012-01-01 00:00:00.000    223
2012-01-01 00:00:00.250    223
2012-01-01 00:00:00.500    223
2012-01-01 00:00:00.750    223
2012-01-01 00:00:01.000    228
Freq: 250L, dtype: int32

In [220]:
ts[:2].resample("250L").ffill(limit=2)

2012-01-01 00:00:00.000    223.0
2012-01-01 00:00:00.250    223.0
2012-01-01 00:00:00.500    223.0
2012-01-01 00:00:00.750      NaN
2012-01-01 00:00:01.000    228.0
Freq: 250L, dtype: float64

## Sparse resampling
---

In [221]:
rng = pd.date_range('2004-1-1', periods=100, freq='D') + pd.Timedelta('1s')
ts = pd.Series(range(100), index=rng)
ts

2004-01-01 00:00:01     0
2004-01-02 00:00:01     1
2004-01-03 00:00:01     2
2004-01-04 00:00:01     3
2004-01-05 00:00:01     4
                       ..
2004-04-05 00:00:01    95
2004-04-06 00:00:01    96
2004-04-07 00:00:01    97
2004-04-08 00:00:01    98
2004-04-09 00:00:01    99
Freq: D, Length: 100, dtype: int64

In [222]:
ts.resample('3T').sum()

2004-01-01 00:00:00     0
2004-01-01 00:03:00     0
2004-01-01 00:06:00     0
2004-01-01 00:09:00     0
2004-01-01 00:12:00     0
                       ..
2004-04-08 23:48:00     0
2004-04-08 23:51:00     0
2004-04-08 23:54:00     0
2004-04-08 23:57:00     0
2004-04-09 00:00:00    99
Freq: 3T, Length: 47521, dtype: int64

In [224]:
from functools import partial
from pandas.tseries.frequencies import to_offset

def round(t, freq):
    freq = to_offset(freq)
    return pd.Timestamp((t.value// freq.delta.value) * freq.delta.value)

ts.groupby(partial(round, freq='3T')).sum()

2004-01-01     0
2004-01-02     1
2004-01-03     2
2004-01-04     3
2004-01-05     4
              ..
2004-04-05    95
2004-04-06    96
2004-04-07    97
2004-04-08    98
2004-04-09    99
Length: 100, dtype: int64

## Aggregation
---

In [234]:
df = pd.DataFrame(np.random.randn(1000, 3),
                 index=pd.date_range('1/1/2012', freq='S', periods=1000),
                 columns=["A", "B", "C"])
r = df.resample('3T')
r.sum()

Unnamed: 0,A,B,C
2012-01-01 00:00:00,-12.226821,-6.877334,17.080757
2012-01-01 00:03:00,-11.02467,-5.531785,-9.560157
2012-01-01 00:06:00,-9.758997,-9.613683,13.172642
2012-01-01 00:09:00,5.115704,-36.225925,-2.272217
2012-01-01 00:12:00,-1.860125,5.44895,14.641121
2012-01-01 00:15:00,2.334707,4.017208,-5.700143


In [235]:
r.mean()

Unnamed: 0,A,B,C
2012-01-01 00:00:00,-0.067927,-0.038207,0.094893
2012-01-01 00:03:00,-0.061248,-0.030732,-0.053112
2012-01-01 00:06:00,-0.054217,-0.053409,0.073181
2012-01-01 00:09:00,0.028421,-0.201255,-0.012623
2012-01-01 00:12:00,-0.010334,0.030272,0.08134
2012-01-01 00:15:00,0.023347,0.040172,-0.057001


In [237]:
r['A'].mean()

2012-01-01 00:00:00   -0.067927
2012-01-01 00:03:00   -0.061248
2012-01-01 00:06:00   -0.054217
2012-01-01 00:09:00    0.028421
2012-01-01 00:12:00   -0.010334
2012-01-01 00:15:00    0.023347
Freq: 3T, Name: A, dtype: float64

In [238]:
r[["A", "B"]].mean()

Unnamed: 0,A,B
2012-01-01 00:00:00,-0.067927,-0.038207
2012-01-01 00:03:00,-0.061248,-0.030732
2012-01-01 00:06:00,-0.054217,-0.053409
2012-01-01 00:09:00,0.028421,-0.201255
2012-01-01 00:12:00,-0.010334,0.030272
2012-01-01 00:15:00,0.023347,0.040172


In [239]:
r["A"].agg([np.sum, np.mean, np.std])

Unnamed: 0,sum,mean,std
2012-01-01 00:00:00,-12.226821,-0.067927,0.917309
2012-01-01 00:03:00,-11.02467,-0.061248,1.001097
2012-01-01 00:06:00,-9.758997,-0.054217,1.017009
2012-01-01 00:09:00,5.115704,0.028421,0.944827
2012-01-01 00:12:00,-1.860125,-0.010334,0.996615
2012-01-01 00:15:00,2.334707,0.023347,0.967718


In [240]:
r.agg([np.sum, np.mean])

Unnamed: 0_level_0,A,A,B,B,C,C
Unnamed: 0_level_1,sum,mean,sum,mean,sum,mean
2012-01-01 00:00:00,-12.226821,-0.067927,-6.877334,-0.038207,17.080757,0.094893
2012-01-01 00:03:00,-11.02467,-0.061248,-5.531785,-0.030732,-9.560157,-0.053112
2012-01-01 00:06:00,-9.758997,-0.054217,-9.613683,-0.053409,13.172642,0.073181
2012-01-01 00:09:00,5.115704,0.028421,-36.225925,-0.201255,-2.272217,-0.012623
2012-01-01 00:12:00,-1.860125,-0.010334,5.44895,0.030272,14.641121,0.08134
2012-01-01 00:15:00,2.334707,0.023347,4.017208,0.040172,-5.700143,-0.057001


## Iterating through groups
---

In [244]:
small = pd.Series(
   .....:     range(6),
   .....:     index=pd.to_datetime(
   .....:         [
   .....:             "2017-01-01T00:00:00",
   .....:             "2017-01-01T00:30:00",
   .....:             "2017-01-01T00:31:00",
   .....:             "2017-01-01T01:00:00",
   .....:             "2017-01-01T03:00:00",
   .....:             "2017-01-01T03:05:00",
   .....:         ]
   .....:     ),
   .....: )
resampled = small.resample('H')
for name, group in resampled:
    print("Grounp: ", name)
    print("-" *27)
    print(group)

Grounp:  2017-01-01 00:00:00
---------------------------
2017-01-01 00:00:00    0
2017-01-01 00:30:00    1
2017-01-01 00:31:00    2
dtype: int64
Grounp:  2017-01-01 01:00:00
---------------------------
2017-01-01 01:00:00    3
dtype: int64
Grounp:  2017-01-01 02:00:00
---------------------------
Series([], dtype: int64)
Grounp:  2017-01-01 03:00:00
---------------------------
2017-01-01 03:00:00    4
2017-01-01 03:05:00    5
dtype: int64


## Use origin or offset to adjust the start of the bins
---

In [245]:
start, end = "2000-10-01 23:30:00", "2000-10-02 00:30:00"
middle = "2020-10-02 00:00:00"

rng = pd.date_range(start, end, freq='7min')
rng

DatetimeIndex(['2000-10-01 23:30:00', '2000-10-01 23:37:00',
               '2000-10-01 23:44:00', '2000-10-01 23:51:00',
               '2000-10-01 23:58:00', '2000-10-02 00:05:00',
               '2000-10-02 00:12:00', '2000-10-02 00:19:00',
               '2000-10-02 00:26:00'],
              dtype='datetime64[ns]', freq='7T')

In [246]:
ts = pd.Series(np.arange(len(rng)) *3, index=rng)
ts

2000-10-01 23:30:00     0
2000-10-01 23:37:00     3
2000-10-01 23:44:00     6
2000-10-01 23:51:00     9
2000-10-01 23:58:00    12
2000-10-02 00:05:00    15
2000-10-02 00:12:00    18
2000-10-02 00:19:00    21
2000-10-02 00:26:00    24
Freq: 7T, dtype: int32

In [247]:
ts.resample("17min", origin="start_day").sum()

2000-10-01 23:14:00     0
2000-10-01 23:31:00     9
2000-10-01 23:48:00    21
2000-10-02 00:05:00    54
2000-10-02 00:22:00    24
Freq: 17T, dtype: int32

In [250]:
ts.resample("17min", origin="start_day").sum()

2000-10-01 23:14:00     0
2000-10-01 23:31:00     9
2000-10-01 23:48:00    21
2000-10-02 00:05:00    54
2000-10-02 00:22:00    24
Freq: 17T, dtype: int32

In [253]:
ts[middle:end].resample("17min", origin="start_day").sum()

Series([], Freq: 17T, dtype: int32)

## Time span representation
---
### Period

In [254]:
pd.Period("2012", freq="A-DEC")

Period('2012', 'A-DEC')

In [255]:
pd.Period("2012-1-1", freq="D")

Period('2012-01-01', 'D')

In [256]:
pd.Period("2012-1-1 19:00", freq="H")

Period('2012-01-01 19:00', 'H')

In [257]:
pd.Period("2012-1-1 19:00", freq="5H")

Period('2012-01-01 19:00', '5H')

In [258]:
p = pd.Period("2012", freq="A-DEC")
p +1 

Period('2013', 'A-DEC')

In [259]:
p = pd.Period("2012-1-1 19:00", freq="H")
p + 2

Period('2012-01-01 21:00', 'H')

In [260]:
p = pd.Period("2014-07-01 09:00", freq="H")
p + pd.offsets.Hour(2)

Period('2014-07-01 11:00', 'H')

In [261]:
p + datetime.timedelta(minutes=120)

Period('2014-07-01 11:00', 'H')

In [262]:
p + np.timedelta64(7200, "s")

Period('2014-07-01 11:00', 'H')

In [263]:
p = pd.Period("2014-07", freq="M")
p + pd.offsets.MonthEnd(3)

Period('2014-10', 'M')

## PeriodIndex and period_range
---

In [264]:
prng = pd.period_range("1/1/2011", "1/1/2012", freq="M")
prng

PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05', '2011-06',
             '2011-07', '2011-08', '2011-09', '2011-10', '2011-11', '2011-12',
             '2012-01'],
            dtype='period[M]', freq='M')

In [265]:
pd.PeriodIndex(["2011-1", "2011-2", "2011-3"], freq="M")

PeriodIndex(['2011-01', '2011-02', '2011-03'], dtype='period[M]', freq='M')

In [266]:
pd.period_range(start='2014-01', freq='3M', periods=4)

PeriodIndex(['2014-01', '2014-04', '2014-07', '2014-10'], dtype='period[3M]', freq='3M')

In [270]:
pd.period_range(
    start=pd.Period('2017Q1', freq='Q'), 
    end=pd.Period('2017Q2', freq='Q'), 
    freq='M')

PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'], dtype='period[M]', freq='M')

In [271]:
ps = pd.Series(np.random.randn(len(prng)), prng)
ps

2011-01    0.025014
2011-02   -0.539004
2011-03    0.500357
2011-04    0.483138
2011-05    0.649069
2011-06   -0.459086
2011-07    0.559113
2011-08    0.128031
2011-09    1.617428
2011-10    1.883977
2011-11    0.832241
2011-12   -0.364446
2012-01   -0.300782
Freq: M, dtype: float64

## Period dtypes
---

In [274]:
pi = pd.period_range('2015-1-1', periods=3, freq='M')
pi

PeriodIndex(['2015-01', '2015-02', '2015-03'], dtype='period[M]', freq='M')

In [275]:
pi.dtype

period[M]

In [276]:
pi.astype("period[D]")

PeriodIndex(['2015-01-31', '2015-02-28', '2015-03-31'], dtype='period[D]', freq='D')

In [277]:
pi.astype("datetime64[ns]")

DatetimeIndex(['2015-01-01', '2015-02-01', '2015-03-01'], dtype='datetime64[ns]', freq='MS')

## PeriodIndex partial string indexing
---

In [279]:
ps['2011-01']

0.02501374303778804

In [280]:
ps[datetime.datetime(2011,12,25)]

-0.36444559148633576

In [281]:
ps["10/31/2011":"12/31/2011"]

2011-10    1.883977
2011-11    0.832241
2011-12   -0.364446
Freq: M, dtype: float64

In [282]:
dfp = pd.DataFrame(
   .....:     np.random.randn(600, 1),
   .....:     columns=["A"],
   .....:     index=pd.period_range("2013-01-01 9:00", periods=600, freq="T"),
   .....: )
dfp

Unnamed: 0,A
2013-01-01 09:00,0.596107
2013-01-01 09:01,-2.155222
2013-01-01 09:02,0.016919
2013-01-01 09:03,0.628148
2013-01-01 09:04,2.198930
...,...
2013-01-01 18:55,-1.105035
2013-01-01 18:56,0.050323
2013-01-01 18:57,0.373757
2013-01-01 18:58,1.153487


In [284]:
dfp.loc["2013-01-01 10H"]

Unnamed: 0,A
2013-01-01 10:00,0.755791
2013-01-01 10:01,-1.502977
2013-01-01 10:02,0.516917
2013-01-01 10:03,0.954083
2013-01-01 10:04,-0.680998
2013-01-01 10:05,-1.366026
2013-01-01 10:06,1.639424
2013-01-01 10:07,-0.536162
2013-01-01 10:08,-1.55343
2013-01-01 10:09,-0.111383


## Frequency conversion and resampling with PeriodIndex
---

In [285]:
p = pd.Period("2011", freq="A-DEC")
p

Period('2011', 'A-DEC')

In [286]:
p.asfreq("M", how='start')

Period('2011-01', 'M')

In [288]:
p.asfreq('M', how='end')

Period('2011-12', 'M')

## Converting between representations
---

In [289]:
rng = pd.date_range("1/1/2012", periods=5, freq="M")
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2012-01-31   -0.710184
2012-02-29   -0.915732
2012-03-31   -2.971479
2012-04-30    0.778232
2012-05-31   -0.514317
Freq: M, dtype: float64

In [291]:
ps = ts.to_period()
ps

2012-01   -0.710184
2012-02   -0.915732
2012-03   -2.971479
2012-04    0.778232
2012-05   -0.514317
Freq: M, dtype: float64

In [292]:
ps.to_timestamp()

2012-01-01   -0.710184
2012-02-01   -0.915732
2012-03-01   -2.971479
2012-04-01    0.778232
2012-05-01   -0.514317
Freq: MS, dtype: float64

## Representing out-of-bounds spans
---

In [294]:
span = pd.period_range("1215-01-01", "1381-01-01", freq="D")
span

PeriodIndex(['1215-01-01', '1215-01-02', '1215-01-03', '1215-01-04',
             '1215-01-05', '1215-01-06', '1215-01-07', '1215-01-08',
             '1215-01-09', '1215-01-10',
             ...
             '1380-12-23', '1380-12-24', '1380-12-25', '1380-12-26',
             '1380-12-27', '1380-12-28', '1380-12-29', '1380-12-30',
             '1380-12-31', '1381-01-01'],
            dtype='period[D]', length=60632, freq='D')

In [295]:
s = pd.Series([20121231, 20141130, 99991231])

##  Time zone handling
---

In [297]:
rng = pd.date_range("3/6/2012 00:00", periods=15, freq="D")
rng.tz is None

True

In [298]:
import dateutil

rng_pytz = pd.date_range('3/6/2012 00:00', periods=3, freq='D', tz='Europe/London'
                        )
rng_pytz.tz

<DstTzInfo 'Europe/London' LMT-1 day, 23:59:00 STD>

In [299]:
rng_dateutil = pd.date_range("3/6/2012 00:00", periods=3, freq="D")
rng_dateutil = rng_dateutil.tz_localize("dateutil/Europe/London")
rng_dateutil.tz

tzfile('GB-Eire')

In [300]:
rng_utc = pd.date_range(
   .....:     "3/6/2012 00:00",
   .....:     periods=3,
   .....:     freq="D",
   .....:     tz=datetime.timezone.utc,
   .....: )
rng_utc.tz

datetime.timezone.utc

In [301]:
import pytz
tz_pytz = pytz.timezone("Europe/London")
rng_pytz = pd.date_range("3/6/2012 00:00", periods=3, freq="D")
rng_pytz = rng_pytz.tz_localize(tz_pytz)
rng_pytz.tz == tz_pytz

True

In [302]:
tz_dateutil = dateutil.tz.gettz("Europe/London")

In [303]:
rng_dateutil = pd.date_range("3/6/2012 00:00", periods=3, freq="D", tz=tz_dateutil)
rng_dateutil.tz == tz_dateutil

True

## Fold
---

In [304]:
pd.Timestamp(
   .....:     datetime.datetime(2019, 10, 27, 1, 30, 0, 0),
   .....:     tz="dateutil/Europe/London",
   .....:     fold=0,
   .....: )

Timestamp('2019-10-27 01:30:00+0100', tz='dateutil/GB-Eire')

In [305]:
pd.Timestamp(
   .....:     year=2019,
   .....:     month=10,
   .....:     day=27,
   .....:     hour=1,
   .....:     minute=30,
   .....:     tz="dateutil/Europe/London",
   .....:     fold=1,
   .....: )

Timestamp('2019-10-27 01:30:00+0000', tz='dateutil/GB-Eire')

## Time zone series operations
---

In [307]:
s_naive = pd.Series(pd.date_range("20130101", periods=3))
s_naive

0   2013-01-01
1   2013-01-02
2   2013-01-03
dtype: datetime64[ns]

In [308]:
s_aware = pd.Series(pd.date_range("20130101", periods=3, tz="US/Eastern"))
s_aware

0   2013-01-01 00:00:00-05:00
1   2013-01-02 00:00:00-05:00
2   2013-01-03 00:00:00-05:00
dtype: datetime64[ns, US/Eastern]