# Dates and Times

In [2]:
import pandas as pd
import datetime as dt

## Python's datetime Module

In [4]:
someday = dt.date(1989,6,27)

In [5]:
someday.year

1989

In [6]:
someday.month

6

In [7]:
someday.day

27

In [8]:
# datetime takes a date and time
dt.datetime(2010,1,20)

datetime.datetime(2010, 1, 20, 0, 0)

In [13]:
sometime = dt.datetime(2010,1,20,8,13,57)

In [10]:
str(dt.datetime(2010,1,20,8,13,57))

'2010-01-20 08:13:57'

In [12]:
str(dt.date(1989,6,27))

'1989-06-27'

In [14]:
sometime.year

2010

In [15]:
sometime.month

1

In [16]:
sometime.day

20

In [17]:
sometime.hour

8

In [18]:
sometime.minute

13

In [19]:
sometime.second

57

## The pandas Timestamp Object

In [20]:
pd.Timestamp('2015-03-31')

Timestamp('2015-03-31 00:00:00')

In [21]:
pd.Timestamp('2015/03/31')

Timestamp('2015-03-31 00:00:00')

In [24]:
pd.Timestamp('2013, 11, 03')

Timestamp('2013-11-03 00:00:00')

In [25]:
pd.Timestamp('1/1/2015')

Timestamp('2015-01-01 00:00:00')

In [26]:
pd.Timestamp('19/12/2015')

Timestamp('2015-12-19 00:00:00')

In [27]:
pd.Timestamp('4/3/2000')

Timestamp('2000-04-03 00:00:00')

In [28]:
pd.Timestamp('2021-03-08 8:35:15')

Timestamp('2021-03-08 08:35:15')

In [29]:
pd.Timestamp('2021-03-08 6:13:15 PM')

Timestamp('2021-03-08 18:13:15')

In [30]:
pd.Timestamp(dt.date(2015,1,15))

Timestamp('2015-01-15 00:00:00')

In [32]:
pd.Timestamp(dt.datetime(2015,1,15,21,35,22))

Timestamp('2015-01-15 21:35:22')

## The pandas DatetimeIndex Object

In [34]:
dates = ['2016-03-12','1989-06-27','2009-07-07']
pd.DatetimeIndex(dates)

DatetimeIndex(['2016-03-12', '1989-06-27', '2009-07-07'], dtype='datetime64[ns]', freq=None)

In [35]:
type(pd.DatetimeIndex(dates))

pandas.core.indexes.datetimes.DatetimeIndex

In [38]:
dates = [dt.date(2016,1,31),dt.date(2016,4,21),dt.date(2013,2,12)]
dt_index = pd.DatetimeIndex(dates)

In [39]:
values = [100,200,300]
pd.Series(data=values,index=dt_index)

2016-01-31    100
2016-04-21    200
2013-02-12    300
dtype: int64

## The pd.to_datetime() Method

In [40]:
pd.to_datetime('2001-04-19')

Timestamp('2001-04-19 00:00:00')

In [41]:
pd.to_datetime(dt.date(2001,4,19))

Timestamp('2001-04-19 00:00:00')

In [42]:
pd.to_datetime(['2015-01-03','2014/02/08','2016','July 4th, 1996'])

DatetimeIndex(['2015-01-03', '2014-02-08', '2016-01-01', '1996-07-04'], dtype='datetime64[ns]', freq=None)

In [43]:
times = pd.Series(['2015-01-03','2014/02/08','2016','July 4th, 1996'])
times

0        2015-01-03
1        2014/02/08
2              2016
3    July 4th, 1996
dtype: object

In [44]:
pd.to_datetime(times)

0   2015-01-03
1   2014-02-08
2   2016-01-01
3   1996-07-04
dtype: datetime64[ns]

In [46]:
dates = pd.Series(['July 4th, 1996','10/04/2016','hello','2015-02-31'])
dates

0    July 4th, 1996
1        10/04/2016
2             hello
3        2015-02-31
dtype: object

In [49]:
pd.to_datetime(dates,errors='coerce')

0   1996-07-04
1   2016-10-04
2          NaT
3          NaT
dtype: datetime64[ns]

## Create a Range of Dates with the pd.date_range() Method

In [51]:
times = pd.date_range(start='2016-01-01',end='2016-01-10',freq='D')
times

DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',
               '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',
               '2016-01-09', '2016-01-10'],
              dtype='datetime64[ns]', freq='D')

In [52]:
type(times)

pandas.core.indexes.datetimes.DatetimeIndex

In [53]:
times[0]

Timestamp('2016-01-01 00:00:00', freq='D')

In [55]:
times = pd.date_range(start='2016-01-01',end='2016-01-10',freq='2D')
times

DatetimeIndex(['2016-01-01', '2016-01-03', '2016-01-05', '2016-01-07',
               '2016-01-09'],
              dtype='datetime64[ns]', freq='2D')

In [56]:
times = pd.date_range(start='2016-01-01',end='2016-01-10',freq='B')
times

DatetimeIndex(['2016-01-01', '2016-01-04', '2016-01-05', '2016-01-06',
               '2016-01-07', '2016-01-08'],
              dtype='datetime64[ns]', freq='B')

In [57]:
times = pd.date_range(start='2016-01-01',end='2016-01-15',freq='W')
times

DatetimeIndex(['2016-01-03', '2016-01-10'], dtype='datetime64[ns]', freq='W-SUN')

In [58]:
times = pd.date_range(start='2016-01-01',end='2016-01-15',freq='W-FRI')
times

DatetimeIndex(['2016-01-01', '2016-01-08', '2016-01-15'], dtype='datetime64[ns]', freq='W-FRI')

In [59]:
times = pd.date_range(start='2016-01-01',end='2016-01-15',freq='H')
times

DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 01:00:00',
               '2016-01-01 02:00:00', '2016-01-01 03:00:00',
               '2016-01-01 04:00:00', '2016-01-01 05:00:00',
               '2016-01-01 06:00:00', '2016-01-01 07:00:00',
               '2016-01-01 08:00:00', '2016-01-01 09:00:00',
               ...
               '2016-01-14 15:00:00', '2016-01-14 16:00:00',
               '2016-01-14 17:00:00', '2016-01-14 18:00:00',
               '2016-01-14 19:00:00', '2016-01-14 20:00:00',
               '2016-01-14 21:00:00', '2016-01-14 22:00:00',
               '2016-01-14 23:00:00', '2016-01-15 00:00:00'],
              dtype='datetime64[ns]', length=337, freq='H')

In [60]:
# 6 hours
times = pd.date_range(start='2016-01-01',end='2016-01-15',freq='6H')
times

DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 06:00:00',
               '2016-01-01 12:00:00', '2016-01-01 18:00:00',
               '2016-01-02 00:00:00', '2016-01-02 06:00:00',
               '2016-01-02 12:00:00', '2016-01-02 18:00:00',
               '2016-01-03 00:00:00', '2016-01-03 06:00:00',
               '2016-01-03 12:00:00', '2016-01-03 18:00:00',
               '2016-01-04 00:00:00', '2016-01-04 06:00:00',
               '2016-01-04 12:00:00', '2016-01-04 18:00:00',
               '2016-01-05 00:00:00', '2016-01-05 06:00:00',
               '2016-01-05 12:00:00', '2016-01-05 18:00:00',
               '2016-01-06 00:00:00', '2016-01-06 06:00:00',
               '2016-01-06 12:00:00', '2016-01-06 18:00:00',
               '2016-01-07 00:00:00', '2016-01-07 06:00:00',
               '2016-01-07 12:00:00', '2016-01-07 18:00:00',
               '2016-01-08 00:00:00', '2016-01-08 06:00:00',
               '2016-01-08 12:00:00', '2016-01-08 18:00:00',
               '2016-01-

In [61]:
# month end
times = pd.date_range(start='2016-01-01',end='2016-12-31',freq='M')
times

DatetimeIndex(['2016-01-31', '2016-02-29', '2016-03-31', '2016-04-30',
               '2016-05-31', '2016-06-30', '2016-07-31', '2016-08-31',
               '2016-09-30', '2016-10-31', '2016-11-30', '2016-12-31'],
              dtype='datetime64[ns]', freq='M')

In [62]:
# month start
times = pd.date_range(start='2016-01-01',end='2016-12-31',freq='MS')
times

DatetimeIndex(['2016-01-01', '2016-02-01', '2016-03-01', '2016-04-01',
               '2016-05-01', '2016-06-01', '2016-07-01', '2016-08-01',
               '2016-09-01', '2016-10-01', '2016-11-01', '2016-12-01'],
              dtype='datetime64[ns]', freq='MS')

In [63]:
# year end
times = pd.date_range(start='2016-01-01',end='2050-01-01',freq='A')
times

DatetimeIndex(['2016-12-31', '2017-12-31', '2018-12-31', '2019-12-31',
               '2020-12-31', '2021-12-31', '2022-12-31', '2023-12-31',
               '2024-12-31', '2025-12-31', '2026-12-31', '2027-12-31',
               '2028-12-31', '2029-12-31', '2030-12-31', '2031-12-31',
               '2032-12-31', '2033-12-31', '2034-12-31', '2035-12-31',
               '2036-12-31', '2037-12-31', '2038-12-31', '2039-12-31',
               '2040-12-31', '2041-12-31', '2042-12-31', '2043-12-31',
               '2044-12-31', '2045-12-31', '2046-12-31', '2047-12-31',
               '2048-12-31', '2049-12-31'],
              dtype='datetime64[ns]', freq='A-DEC')

In [64]:
times = pd.date_range(start='2012-09-09',periods=25,freq='D')
times

DatetimeIndex(['2012-09-09', '2012-09-10', '2012-09-11', '2012-09-12',
               '2012-09-13', '2012-09-14', '2012-09-15', '2012-09-16',
               '2012-09-17', '2012-09-18', '2012-09-19', '2012-09-20',
               '2012-09-21', '2012-09-22', '2012-09-23', '2012-09-24',
               '2012-09-25', '2012-09-26', '2012-09-27', '2012-09-28',
               '2012-09-29', '2012-09-30', '2012-10-01', '2012-10-02',
               '2012-10-03'],
              dtype='datetime64[ns]', freq='D')

In [65]:
len(times)

25

In [66]:
# 50 business days from 2012-09-09
times = pd.date_range(start='2012-09-09',periods=50,freq='B')
times

DatetimeIndex(['2012-09-10', '2012-09-11', '2012-09-12', '2012-09-13',
               '2012-09-14', '2012-09-17', '2012-09-18', '2012-09-19',
               '2012-09-20', '2012-09-21', '2012-09-24', '2012-09-25',
               '2012-09-26', '2012-09-27', '2012-09-28', '2012-10-01',
               '2012-10-02', '2012-10-03', '2012-10-04', '2012-10-05',
               '2012-10-08', '2012-10-09', '2012-10-10', '2012-10-11',
               '2012-10-12', '2012-10-15', '2012-10-16', '2012-10-17',
               '2012-10-18', '2012-10-19', '2012-10-22', '2012-10-23',
               '2012-10-24', '2012-10-25', '2012-10-26', '2012-10-29',
               '2012-10-30', '2012-10-31', '2012-11-01', '2012-11-02',
               '2012-11-05', '2012-11-06', '2012-11-07', '2012-11-08',
               '2012-11-09', '2012-11-12', '2012-11-13', '2012-11-14',
               '2012-11-15', '2012-11-16'],
              dtype='datetime64[ns]', freq='B')

In [67]:
# 50 weeks from 2012-09-09
times = pd.date_range(start='2012-09-09',periods=50,freq='W')
times

DatetimeIndex(['2012-09-09', '2012-09-16', '2012-09-23', '2012-09-30',
               '2012-10-07', '2012-10-14', '2012-10-21', '2012-10-28',
               '2012-11-04', '2012-11-11', '2012-11-18', '2012-11-25',
               '2012-12-02', '2012-12-09', '2012-12-16', '2012-12-23',
               '2012-12-30', '2013-01-06', '2013-01-13', '2013-01-20',
               '2013-01-27', '2013-02-03', '2013-02-10', '2013-02-17',
               '2013-02-24', '2013-03-03', '2013-03-10', '2013-03-17',
               '2013-03-24', '2013-03-31', '2013-04-07', '2013-04-14',
               '2013-04-21', '2013-04-28', '2013-05-05', '2013-05-12',
               '2013-05-19', '2013-05-26', '2013-06-02', '2013-06-09',
               '2013-06-16', '2013-06-23', '2013-06-30', '2013-07-07',
               '2013-07-14', '2013-07-21', '2013-07-28', '2013-08-04',
               '2013-08-11', '2013-08-18'],
              dtype='datetime64[ns]', freq='W-SUN')

In [68]:
# 50 weeks from 2012-09-09, week starting on tues
times = pd.date_range(start='2012-09-09',periods=50,freq='W-TUE')
times

DatetimeIndex(['2012-09-11', '2012-09-18', '2012-09-25', '2012-10-02',
               '2012-10-09', '2012-10-16', '2012-10-23', '2012-10-30',
               '2012-11-06', '2012-11-13', '2012-11-20', '2012-11-27',
               '2012-12-04', '2012-12-11', '2012-12-18', '2012-12-25',
               '2013-01-01', '2013-01-08', '2013-01-15', '2013-01-22',
               '2013-01-29', '2013-02-05', '2013-02-12', '2013-02-19',
               '2013-02-26', '2013-03-05', '2013-03-12', '2013-03-19',
               '2013-03-26', '2013-04-02', '2013-04-09', '2013-04-16',
               '2013-04-23', '2013-04-30', '2013-05-07', '2013-05-14',
               '2013-05-21', '2013-05-28', '2013-06-04', '2013-06-11',
               '2013-06-18', '2013-06-25', '2013-07-02', '2013-07-09',
               '2013-07-16', '2013-07-23', '2013-07-30', '2013-08-06',
               '2013-08-13', '2013-08-20'],
              dtype='datetime64[ns]', freq='W-TUE')

In [69]:
# 50 6-hour increments
times = pd.date_range(start='2012-09-09',periods=50,freq='6H')
times

DatetimeIndex(['2012-09-09 00:00:00', '2012-09-09 06:00:00',
               '2012-09-09 12:00:00', '2012-09-09 18:00:00',
               '2012-09-10 00:00:00', '2012-09-10 06:00:00',
               '2012-09-10 12:00:00', '2012-09-10 18:00:00',
               '2012-09-11 00:00:00', '2012-09-11 06:00:00',
               '2012-09-11 12:00:00', '2012-09-11 18:00:00',
               '2012-09-12 00:00:00', '2012-09-12 06:00:00',
               '2012-09-12 12:00:00', '2012-09-12 18:00:00',
               '2012-09-13 00:00:00', '2012-09-13 06:00:00',
               '2012-09-13 12:00:00', '2012-09-13 18:00:00',
               '2012-09-14 00:00:00', '2012-09-14 06:00:00',
               '2012-09-14 12:00:00', '2012-09-14 18:00:00',
               '2012-09-15 00:00:00', '2012-09-15 06:00:00',
               '2012-09-15 12:00:00', '2012-09-15 18:00:00',
               '2012-09-16 00:00:00', '2012-09-16 06:00:00',
               '2012-09-16 12:00:00', '2012-09-16 18:00:00',
               '2012-09-

In [70]:
# starts at endpoint and moves back in time
times = pd.date_range(end='1999-12-31',periods=20,freq='D')
times

DatetimeIndex(['1999-12-12', '1999-12-13', '1999-12-14', '1999-12-15',
               '1999-12-16', '1999-12-17', '1999-12-18', '1999-12-19',
               '1999-12-20', '1999-12-21', '1999-12-22', '1999-12-23',
               '1999-12-24', '1999-12-25', '1999-12-26', '1999-12-27',
               '1999-12-28', '1999-12-29', '1999-12-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='D')

In [71]:
times = pd.date_range(end='1999-12-31',periods=40,freq='B')
times

DatetimeIndex(['1999-11-08', '1999-11-09', '1999-11-10', '1999-11-11',
               '1999-11-12', '1999-11-15', '1999-11-16', '1999-11-17',
               '1999-11-18', '1999-11-19', '1999-11-22', '1999-11-23',
               '1999-11-24', '1999-11-25', '1999-11-26', '1999-11-29',
               '1999-11-30', '1999-12-01', '1999-12-02', '1999-12-03',
               '1999-12-06', '1999-12-07', '1999-12-08', '1999-12-09',
               '1999-12-10', '1999-12-13', '1999-12-14', '1999-12-15',
               '1999-12-16', '1999-12-17', '1999-12-20', '1999-12-21',
               '1999-12-22', '1999-12-23', '1999-12-24', '1999-12-27',
               '1999-12-28', '1999-12-29', '1999-12-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='B')

In [72]:
times = pd.date_range(end='1999-12-31',periods=20,freq='W-SUN')
times

DatetimeIndex(['1999-08-15', '1999-08-22', '1999-08-29', '1999-09-05',
               '1999-09-12', '1999-09-19', '1999-09-26', '1999-10-03',
               '1999-10-10', '1999-10-17', '1999-10-24', '1999-10-31',
               '1999-11-07', '1999-11-14', '1999-11-21', '1999-11-28',
               '1999-12-05', '1999-12-12', '1999-12-19', '1999-12-26'],
              dtype='datetime64[ns]', freq='W-SUN')

In [73]:
times = pd.date_range(end='1999-12-31',periods=20,freq='M')
times

DatetimeIndex(['1998-05-31', '1998-06-30', '1998-07-31', '1998-08-31',
               '1998-09-30', '1998-10-31', '1998-11-30', '1998-12-31',
               '1999-01-31', '1999-02-28', '1999-03-31', '1999-04-30',
               '1999-05-31', '1999-06-30', '1999-07-31', '1999-08-31',
               '1999-09-30', '1999-10-31', '1999-11-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='M')

In [74]:
times = pd.date_range(end='1999-12-31',periods=53,freq='MS')
times

DatetimeIndex(['1995-08-01', '1995-09-01', '1995-10-01', '1995-11-01',
               '1995-12-01', '1996-01-01', '1996-02-01', '1996-03-01',
               '1996-04-01', '1996-05-01', '1996-06-01', '1996-07-01',
               '1996-08-01', '1996-09-01', '1996-10-01', '1996-11-01',
               '1996-12-01', '1997-01-01', '1997-02-01', '1997-03-01',
               '1997-04-01', '1997-05-01', '1997-06-01', '1997-07-01',
               '1997-08-01', '1997-09-01', '1997-10-01', '1997-11-01',
               '1997-12-01', '1998-01-01', '1998-02-01', '1998-03-01',
               '1998-04-01', '1998-05-01', '1998-06-01', '1998-07-01',
               '1998-08-01', '1998-09-01', '1998-10-01', '1998-11-01',
               '1998-12-01', '1999-01-01', '1999-02-01', '1999-03-01',
               '1999-04-01', '1999-05-01', '1999-06-01', '1999-07-01',
               '1999-08-01', '1999-09-01', '1999-10-01', '1999-11-01',
               '1999-12-01'],
              dtype='datetime64[ns]', freq='MS'

In [75]:
times = pd.date_range(end='1999-12-31',periods=100,freq='7H')
times

DatetimeIndex(['1999-12-02 03:00:00', '1999-12-02 10:00:00',
               '1999-12-02 17:00:00', '1999-12-03 00:00:00',
               '1999-12-03 07:00:00', '1999-12-03 14:00:00',
               '1999-12-03 21:00:00', '1999-12-04 04:00:00',
               '1999-12-04 11:00:00', '1999-12-04 18:00:00',
               '1999-12-05 01:00:00', '1999-12-05 08:00:00',
               '1999-12-05 15:00:00', '1999-12-05 22:00:00',
               '1999-12-06 05:00:00', '1999-12-06 12:00:00',
               '1999-12-06 19:00:00', '1999-12-07 02:00:00',
               '1999-12-07 09:00:00', '1999-12-07 16:00:00',
               '1999-12-07 23:00:00', '1999-12-08 06:00:00',
               '1999-12-08 13:00:00', '1999-12-08 20:00:00',
               '1999-12-09 03:00:00', '1999-12-09 10:00:00',
               '1999-12-09 17:00:00', '1999-12-10 00:00:00',
               '1999-12-10 07:00:00', '1999-12-10 14:00:00',
               '1999-12-10 21:00:00', '1999-12-11 04:00:00',
               '1999-12-

## The .dt Accessor

In [86]:
dates = pd.date_range(start='2000-01-01',end='2010-12-31',freq='24D')
dates

DatetimeIndex(['2000-01-01', '2000-01-25', '2000-02-18', '2000-03-13',
               '2000-04-06', '2000-04-30', '2000-05-24', '2000-06-17',
               '2000-07-11', '2000-08-04',
               ...
               '2010-05-20', '2010-06-13', '2010-07-07', '2010-07-31',
               '2010-08-24', '2010-09-17', '2010-10-11', '2010-11-04',
               '2010-11-28', '2010-12-22'],
              dtype='datetime64[ns]', length=168, freq='24D')

In [87]:
s = pd.Series(dates)
s.head()

0   2000-01-01
1   2000-01-25
2   2000-02-18
3   2000-03-13
4   2000-04-06
dtype: datetime64[ns]

In [88]:
s.dt.day.head()

0     1
1    25
2    18
3    13
4     6
dtype: int64

In [89]:
s.dt.month.head()

0    1
1    1
2    2
3    3
4    4
dtype: int64

In [90]:
s.dt.weekday_name.head()

0    Saturday
1     Tuesday
2      Friday
3      Monday
4    Thursday
dtype: object

In [92]:
s.dt.is_quarter_start.head()

0     True
1    False
2    False
3    False
4    False
dtype: bool

In [93]:
mask = s.dt.is_quarter_start
s[mask]

0     2000-01-01
19    2001-04-01
38    2002-07-01
137   2009-01-01
dtype: datetime64[ns]

## Import Financial Data Set with pandas_datareader Library

In [94]:
import pandas as pd
import datetime as dt
from pandas_datareader import data

In [99]:
company = 'MSFT'
start = '2014-01-01'
end = '2017-12-31'

stocks = data.DataReader(name=company,data_source='iex',start=start,end=end)
stocks.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-01-02,33.2632,33.3077,33.0406,33.094,30643745
2014-01-03,33.1296,33.1474,32.5953,32.8713,31134795
2014-01-06,32.8179,32.8535,32.1589,32.1767,43615035
2014-01-07,32.3504,32.4973,32.2479,32.4261,35924726
2014-01-08,32.0609,32.1856,31.6869,31.8472,59979542


**Note**: iex does not read in a datetime index

In [100]:
stocks.values

array([[  3.32632000e+01,   3.33077000e+01,   3.30406000e+01,
          3.30940000e+01,   3.06437450e+07],
       [  3.31296000e+01,   3.31474000e+01,   3.25953000e+01,
          3.28713000e+01,   3.11347950e+07],
       [  3.28179000e+01,   3.28535000e+01,   3.21589000e+01,
          3.21767000e+01,   4.36150350e+07],
       ..., 
       [  8.45562000e+01,   8.48820000e+01,   8.41268000e+01,
          8.46154000e+01,   1.46780250e+07],
       [  8.48030000e+01,   8.48326000e+01,   8.44575000e+01,
          8.46253000e+01,   1.05943440e+07],
       [  8.45365000e+01,   8.49511000e+01,   8.44081000e+01,
          8.44476000e+01,   1.87174060e+07]])

In [101]:
stocks.columns

Index(['open', 'high', 'low', 'close', 'volume'], dtype='object')

In [104]:
stocks.index

Index(['2014-01-02', '2014-01-03', '2014-01-06', '2014-01-07', '2014-01-08',
       '2014-01-09', '2014-01-10', '2014-01-13', '2014-01-14', '2014-01-15',
       ...
       '2017-12-15', '2017-12-18', '2017-12-19', '2017-12-20', '2017-12-21',
       '2017-12-22', '2017-12-26', '2017-12-27', '2017-12-28', '2017-12-29'],
      dtype='object', name='date', length=1007)

In [107]:
stocks.reset_index(inplace=True)

In [110]:
stocks['date'] = pd.to_datetime(stocks['date'])
stocks.head()

Unnamed: 0,date,open,high,low,close,volume
0,2014-01-02,33.2632,33.3077,33.0406,33.094,30643745
1,2014-01-03,33.1296,33.1474,32.5953,32.8713,31134795
2,2014-01-06,32.8179,32.8535,32.1589,32.1767,43615035
3,2014-01-07,32.3504,32.4973,32.2479,32.4261,35924726
4,2014-01-08,32.0609,32.1856,31.6869,31.8472,59979542


In [112]:
type(stocks['date'][0])

pandas._libs.tslib.Timestamp

In [113]:
stocks.set_index('date',inplace=True)
stocks.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-01-02,33.2632,33.3077,33.0406,33.094,30643745
2014-01-03,33.1296,33.1474,32.5953,32.8713,31134795
2014-01-06,32.8179,32.8535,32.1589,32.1767,43615035
2014-01-07,32.3504,32.4973,32.2479,32.4261,35924726
2014-01-08,32.0609,32.1856,31.6869,31.8472,59979542


In [114]:
stocks.index

DatetimeIndex(['2014-01-02', '2014-01-03', '2014-01-06', '2014-01-07',
               '2014-01-08', '2014-01-09', '2014-01-10', '2014-01-13',
               '2014-01-14', '2014-01-15',
               ...
               '2017-12-15', '2017-12-18', '2017-12-19', '2017-12-20',
               '2017-12-21', '2017-12-22', '2017-12-26', '2017-12-27',
               '2017-12-28', '2017-12-29'],
              dtype='datetime64[ns]', name='date', length=1007, freq=None)

## Selecting Rows from a DataFrame with a DateTimeIndex

In [115]:
stocks.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-01-02,33.2632,33.3077,33.0406,33.094,30643745
2014-01-03,33.1296,33.1474,32.5953,32.8713,31134795
2014-01-06,32.8179,32.8535,32.1589,32.1767,43615035
2014-01-07,32.3504,32.4973,32.2479,32.4261,35924726
2014-01-08,32.0609,32.1856,31.6869,31.8472,59979542


In [116]:
stocks.loc['2014-01-08']

open      3.206090e+01
high      3.218560e+01
low       3.168690e+01
close     3.184720e+01
volume    5.997954e+07
Name: 2014-01-08 00:00:00, dtype: float64

In [117]:
stocks.iloc[1]

open      3.312960e+01
high      3.314740e+01
low       3.259530e+01
close     3.287130e+01
volume    3.113480e+07
Name: 2014-01-03 00:00:00, dtype: float64

In [118]:
stocks.loc['2014-01-08':'2014-01-31']

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-01-08,32.0609,32.1856,31.6869,31.8472,59979542
2014-01-09,31.954,31.9808,31.5266,31.6423,36520448
2014-01-10,31.9719,32.1945,31.8383,32.0965,40558766
2014-01-13,32.052,32.0787,31.0189,31.1525,45923872
2014-01-14,30.9299,31.954,30.8408,31.865,41627325
2014-01-15,31.9719,32.7645,31.9273,32.7378,44909545
2014-01-16,32.6754,32.9515,32.337,32.8535,38035708
2014-01-17,32.8001,32.8001,32.1945,32.3993,46298736
2014-01-21,32.7912,32.7912,32.1144,32.2123,31578979
2014-01-22,32.2925,32.3459,31.8383,31.9986,21965266


In [120]:
birthdays = pd.date_range(start='1989-06-27',end='2018-06-27',freq=pd.DateOffset(years=1))

In [122]:
mask = stocks.index.isin(birthdays)
stocks[mask]

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-06-27,37.5983,38.2128,37.508,38.1766,74641945
2016-06-27,46.8044,46.8488,45.7892,46.1657,50576699
2017-06-27,68.5038,68.5722,67.5951,67.6244,25215128


## Timestamp Object Attributes

In [123]:
stocks.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-01-02,33.2632,33.3077,33.0406,33.094,30643745
2014-01-03,33.1296,33.1474,32.5953,32.8713,31134795
2014-01-06,32.8179,32.8535,32.1589,32.1767,43615035
2014-01-07,32.3504,32.4973,32.2479,32.4261,35924726
2014-01-08,32.0609,32.1856,31.6869,31.8472,59979542


In [124]:
someday = stocks.index[100]
someday

Timestamp('2014-05-28 00:00:00')

In [125]:
someday.day

28

In [126]:
someday.year

2014

In [127]:
someday.weekday_name

'Wednesday'

In [128]:
someday.is_month_start

False

In [129]:
stocks.insert(0,'day of week',stocks.index.weekday_name)
stocks.head()

Unnamed: 0_level_0,day of week,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-01-02,Thursday,33.2632,33.3077,33.0406,33.094,30643745
2014-01-03,Friday,33.1296,33.1474,32.5953,32.8713,31134795
2014-01-06,Monday,32.8179,32.8535,32.1589,32.1767,43615035
2014-01-07,Tuesday,32.3504,32.4973,32.2479,32.4261,35924726
2014-01-08,Wednesday,32.0609,32.1856,31.6869,31.8472,59979542


In [130]:
stocks.insert(1,'start of month',stocks.index.is_month_start)
stocks.head()

Unnamed: 0_level_0,day of week,start of month,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-01-02,Thursday,False,33.2632,33.3077,33.0406,33.094,30643745
2014-01-03,Friday,False,33.1296,33.1474,32.5953,32.8713,31134795
2014-01-06,Monday,False,32.8179,32.8535,32.1589,32.1767,43615035
2014-01-07,Tuesday,False,32.3504,32.4973,32.2479,32.4261,35924726
2014-01-08,Wednesday,False,32.0609,32.1856,31.6869,31.8472,59979542


## The .truncate() Method

In [131]:
stocks.head()

Unnamed: 0_level_0,day of week,start of month,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-01-02,Thursday,False,33.2632,33.3077,33.0406,33.094,30643745
2014-01-03,Friday,False,33.1296,33.1474,32.5953,32.8713,31134795
2014-01-06,Monday,False,32.8179,32.8535,32.1589,32.1767,43615035
2014-01-07,Tuesday,False,32.3504,32.4973,32.2479,32.4261,35924726
2014-01-08,Wednesday,False,32.0609,32.1856,31.6869,31.8472,59979542


In [133]:
stocks.drop(['day of week','start of month'],axis=1,inplace=True)
stocks.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-01-02,33.2632,33.3077,33.0406,33.094,30643745
2014-01-03,33.1296,33.1474,32.5953,32.8713,31134795
2014-01-06,32.8179,32.8535,32.1589,32.1767,43615035
2014-01-07,32.3504,32.4973,32.2479,32.4261,35924726
2014-01-08,32.0609,32.1856,31.6869,31.8472,59979542


In [134]:
stocks.truncate(before='2015-01-01',after='2015-02-01')

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-02,42.6924,43.3878,42.5826,42.7839,27913852
2015-01-05,42.4271,42.7564,42.3173,42.3859,39673865
2015-01-06,42.4362,42.7738,41.6676,41.7683,36447854
2015-01-07,42.0702,42.5094,41.6219,42.299,29114061
2015-01-08,42.7747,43.6896,42.7473,43.5433,29645202
2015-01-09,43.5616,43.7538,42.912,43.1773,23944181
2015-01-12,43.3878,43.4976,42.4179,42.6375,23651887
2015-01-13,42.976,43.8361,42.1434,42.4133,35270601
2015-01-14,42.0519,42.3081,41.7408,42.0473,29719580
2015-01-15,42.2898,42.4362,41.5487,41.6127,32750779


## pd.DateOffset Objects

In [148]:
company = 'GOOG'
start = dt.date(2017,1,1)
end = dt.datetime.now()

stocks = data.DataReader(name=company,data_source='iex',start=start,end=end)
stocks.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-03,778.81,789.63,775.8,786.14,1657268
2017-01-04,788.36,791.34,783.16,786.9,1072958
2017-01-05,786.08,794.48,785.02,794.02,1335167
2017-01-06,795.26,807.9,792.2041,806.15,1640170
2017-01-09,806.4,809.9664,802.83,806.65,1274645


In [149]:
type(stocks.index)

pandas.core.indexes.base.Index

In [150]:
stocks.reset_index(inplace=True)

In [153]:
stocks['date'] = pd.to_datetime(stocks['date'])

In [154]:
stocks.set_index('date',inplace=True)
stocks.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-03,778.81,789.63,775.8,786.14,1657268
2017-01-04,788.36,791.34,783.16,786.9,1072958
2017-01-05,786.08,794.48,785.02,794.02,1335167
2017-01-06,795.26,807.9,792.2041,806.15,1640170
2017-01-09,806.4,809.9664,802.83,806.65,1274645


In [155]:
stocks.index

DatetimeIndex(['2017-01-03', '2017-01-04', '2017-01-05', '2017-01-06',
               '2017-01-09', '2017-01-10', '2017-01-11', '2017-01-12',
               '2017-01-13', '2017-01-17',
               ...
               '2018-09-24', '2018-09-25', '2018-09-26', '2018-09-27',
               '2018-09-28', '2018-10-01', '2018-10-02', '2018-10-03',
               '2018-10-04', '2018-10-05'],
              dtype='datetime64[ns]', name='date', length=444, freq=None)

In [156]:
# add 5 days to every date
stocks.index + pd.DateOffset(days=5)

DatetimeIndex(['2017-01-08', '2017-01-09', '2017-01-10', '2017-01-11',
               '2017-01-14', '2017-01-15', '2017-01-16', '2017-01-17',
               '2017-01-18', '2017-01-22',
               ...
               '2018-09-29', '2018-09-30', '2018-10-01', '2018-10-02',
               '2018-10-03', '2018-10-06', '2018-10-07', '2018-10-08',
               '2018-10-09', '2018-10-10'],
              dtype='datetime64[ns]', name='date', length=444, freq=None)

In [157]:
# add 2 weeks to each date
stocks.index + pd.DateOffset(weeks=2)

DatetimeIndex(['2017-01-17', '2017-01-18', '2017-01-19', '2017-01-20',
               '2017-01-23', '2017-01-24', '2017-01-25', '2017-01-26',
               '2017-01-27', '2017-01-31',
               ...
               '2018-10-08', '2018-10-09', '2018-10-10', '2018-10-11',
               '2018-10-12', '2018-10-15', '2018-10-16', '2018-10-17',
               '2018-10-18', '2018-10-19'],
              dtype='datetime64[ns]', name='date', length=444, freq=None)

In [158]:
# subtract 5 days
stocks.index - pd.DateOffset(days=5)

DatetimeIndex(['2016-12-29', '2016-12-30', '2016-12-31', '2017-01-01',
               '2017-01-04', '2017-01-05', '2017-01-06', '2017-01-07',
               '2017-01-08', '2017-01-12',
               ...
               '2018-09-19', '2018-09-20', '2018-09-21', '2018-09-22',
               '2018-09-23', '2018-09-26', '2018-09-27', '2018-09-28',
               '2018-09-29', '2018-09-30'],
              dtype='datetime64[ns]', name='date', length=444, freq=None)

In [159]:
stocks.index + pd.DateOffset(years=1)

DatetimeIndex(['2018-01-03', '2018-01-04', '2018-01-05', '2018-01-06',
               '2018-01-09', '2018-01-10', '2018-01-11', '2018-01-12',
               '2018-01-13', '2018-01-17',
               ...
               '2019-09-24', '2019-09-25', '2019-09-26', '2019-09-27',
               '2019-09-28', '2019-10-01', '2019-10-02', '2019-10-03',
               '2019-10-04', '2019-10-05'],
              dtype='datetime64[ns]', name='date', length=444, freq=None)

In [160]:
stocks.index + pd.DateOffset(hours=6)

DatetimeIndex(['2017-01-03 06:00:00', '2017-01-04 06:00:00',
               '2017-01-05 06:00:00', '2017-01-06 06:00:00',
               '2017-01-09 06:00:00', '2017-01-10 06:00:00',
               '2017-01-11 06:00:00', '2017-01-12 06:00:00',
               '2017-01-13 06:00:00', '2017-01-17 06:00:00',
               ...
               '2018-09-24 06:00:00', '2018-09-25 06:00:00',
               '2018-09-26 06:00:00', '2018-09-27 06:00:00',
               '2018-09-28 06:00:00', '2018-10-01 06:00:00',
               '2018-10-02 06:00:00', '2018-10-03 06:00:00',
               '2018-10-04 06:00:00', '2018-10-05 06:00:00'],
              dtype='datetime64[ns]', name='date', length=444, freq=None)

In [161]:
stocks.index + pd.DateOffset(days=5,hours=6)

DatetimeIndex(['2017-01-08 06:00:00', '2017-01-09 06:00:00',
               '2017-01-10 06:00:00', '2017-01-11 06:00:00',
               '2017-01-14 06:00:00', '2017-01-15 06:00:00',
               '2017-01-16 06:00:00', '2017-01-17 06:00:00',
               '2017-01-18 06:00:00', '2017-01-22 06:00:00',
               ...
               '2018-09-29 06:00:00', '2018-09-30 06:00:00',
               '2018-10-01 06:00:00', '2018-10-02 06:00:00',
               '2018-10-03 06:00:00', '2018-10-06 06:00:00',
               '2018-10-07 06:00:00', '2018-10-08 06:00:00',
               '2018-10-09 06:00:00', '2018-10-10 06:00:00'],
              dtype='datetime64[ns]', name='date', length=444, freq=None)

In [162]:
# add 5 hours, subtract a single day
stocks.index + pd.DateOffset(hours=5) - pd.DateOffset(days=1)

DatetimeIndex(['2017-01-02 05:00:00', '2017-01-03 05:00:00',
               '2017-01-04 05:00:00', '2017-01-05 05:00:00',
               '2017-01-08 05:00:00', '2017-01-09 05:00:00',
               '2017-01-10 05:00:00', '2017-01-11 05:00:00',
               '2017-01-12 05:00:00', '2017-01-16 05:00:00',
               ...
               '2018-09-23 05:00:00', '2018-09-24 05:00:00',
               '2018-09-25 05:00:00', '2018-09-26 05:00:00',
               '2018-09-27 05:00:00', '2018-09-30 05:00:00',
               '2018-10-01 05:00:00', '2018-10-02 05:00:00',
               '2018-10-03 05:00:00', '2018-10-04 05:00:00'],
              dtype='datetime64[ns]', name='date', length=444, freq=None)

In [163]:
stocks.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-03,778.81,789.63,775.8,786.14,1657268
2017-01-04,788.36,791.34,783.16,786.9,1072958
2017-01-05,786.08,794.48,785.02,794.02,1335167
2017-01-06,795.26,807.9,792.2041,806.15,1640170
2017-01-09,806.4,809.9664,802.83,806.65,1274645


In [165]:
# find the next month end
stocks.index + pd.tseries.offsets.MonthEnd()

DatetimeIndex(['2017-01-31', '2017-01-31', '2017-01-31', '2017-01-31',
               '2017-01-31', '2017-01-31', '2017-01-31', '2017-01-31',
               '2017-01-31', '2017-01-31',
               ...
               '2018-09-30', '2018-09-30', '2018-09-30', '2018-09-30',
               '2018-09-30', '2018-10-31', '2018-10-31', '2018-10-31',
               '2018-10-31', '2018-10-31'],
              dtype='datetime64[ns]', name='date', length=444, freq=None)

In [166]:
from pandas.tseries.offsets import *

In [167]:
# find the nearest month beginning
stocks.index - MonthBegin()

DatetimeIndex(['2017-01-01', '2017-01-01', '2017-01-01', '2017-01-01',
               '2017-01-01', '2017-01-01', '2017-01-01', '2017-01-01',
               '2017-01-01', '2017-01-01',
               ...
               '2018-09-01', '2018-09-01', '2018-09-01', '2018-09-01',
               '2018-09-01', '2018-09-01', '2018-10-01', '2018-10-01',
               '2018-10-01', '2018-10-01'],
              dtype='datetime64[ns]', name='date', length=444, freq=None)

In [168]:
# find the next business month end
stocks.index + BMonthEnd()



DatetimeIndex(['2017-01-31', '2017-01-31', '2017-01-31', '2017-01-31',
               '2017-01-31', '2017-01-31', '2017-01-31', '2017-01-31',
               '2017-01-31', '2017-01-31',
               ...
               '2018-09-28', '2018-09-28', '2018-09-28', '2018-09-28',
               '2018-10-31', '2018-10-31', '2018-10-31', '2018-10-31',
               '2018-10-31', '2018-10-31'],
              dtype='datetime64[ns]', name='date', length=444, freq=None)

In [169]:
stocks.index + QuarterEnd()

DatetimeIndex(['2017-03-31', '2017-03-31', '2017-03-31', '2017-03-31',
               '2017-03-31', '2017-03-31', '2017-03-31', '2017-03-31',
               '2017-03-31', '2017-03-31',
               ...
               '2018-09-30', '2018-09-30', '2018-09-30', '2018-09-30',
               '2018-09-30', '2018-12-31', '2018-12-31', '2018-12-31',
               '2018-12-31', '2018-12-31'],
              dtype='datetime64[ns]', name='date', length=444, freq=None)

## The Timedelta Object

In [173]:
time_a = pd.Timestamp('2016-03-31 04:35:16 PM')
time_b = pd.Timestamp('2016-03-20 02:16:53 AM')

In [174]:
time_a - time_b

Timedelta('11 days 14:18:23')

In [175]:
time_b - time_a

Timedelta('-12 days +09:41:37')

In [176]:
pd.Timedelta(days=3,minutes=45,hours=12,weeks=8)

Timedelta('59 days 12:45:00')

In [177]:
pd.Timedelta('5 minutes')

Timedelta('0 days 00:05:00')

In [178]:
pd.Timedelta('14 days 6 hours 5 minutes')

Timedelta('14 days 06:05:00')

## Timedeltas in a Dataset

In [180]:
df = pd.read_csv('data/pandas/ecommerce.csv',index_col='ID',parse_dates=['order_date','delivery_date'])
df.head()

Unnamed: 0_level_0,order_date,delivery_date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1998-05-24,1999-02-05
2,1992-04-22,1998-03-06
4,1991-02-10,1992-08-26
5,1992-07-21,1997-11-20
7,1993-09-02,1998-06-10


In [183]:
df['delivery_time'] = df['delivery_date'] - df['order_date']
df.head()

Unnamed: 0_level_0,order_date,delivery_date,delivery_time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1998-05-24,1999-02-05,257 days
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days
5,1992-07-21,1997-11-20,1948 days
7,1993-09-02,1998-06-10,1742 days


In [186]:
mask = df['delivery_time'] > "365 days"
len(df[mask])

394