## Review of Python's datetime Module

In [37]:
import pandas as pd
import datetime as dt # python built-in library. 
import matplotlib.pyplot as plt
import pandas_datareader as pdr
import fix_yahoo_finance as yf  

In [38]:
someday = dt.date(2018, 7, 9) # data store Year, Month and Day
someday.year # extract year component

2018

In [43]:
someday.month # extract month from someday

7

In [44]:
someday.day # extract day from someday

9

In [45]:
str(someday)

'2018-07-09'

In [46]:
dt.datetime(2018, 7, 9) # datatime need time, default time is midnight

datetime.datetime(2018, 7, 9, 0, 0)

In [47]:
sometime = dt.datetime(2018, 7, 9, 15, 30, 55) # Year, Month, Day, Hour, Minute, Second
sometime

datetime.datetime(2018, 7, 9, 15, 30, 55)

In [48]:
str(sometime)

'2018-07-09 15:30:55'

In [49]:
sometime.year
sometime.month
sometime.day
sometime.hour
sometime.minute
sometime.second

55

## The pandas Timestamp Object
* pandas version of date and datetime
* felxible to interpret

In [50]:
import pandas as pd

In [51]:
pd.Timestamp("2015-03-31")

Timestamp('2015-03-31 00:00:00')

In [52]:
pd.Timestamp("2015-mar-31")

Timestamp('2015-03-31 00:00:00')

In [53]:
pd.Timestamp("2015/3/31")

Timestamp('2015-03-31 00:00:00')

In [54]:
pd.Timestamp("1/1/2015")

Timestamp('2015-01-01 00:00:00')

In [55]:
pd.Timestamp("2015, 7, 9")

Timestamp('2015-07-09 00:00:00')

In [56]:
pd.Timestamp("July/9/2015")

Timestamp('2015-07-09 00:00:00')

In [57]:
pd.Timestamp("2015/3/31 08:35:30")

Timestamp('2015-03-31 08:35:30')

In [58]:
pd.Timestamp("2015/3/31 08:35:30 PM")

Timestamp('2015-03-31 20:35:30')

In [59]:
pd.Timestamp(someday)

Timestamp('2018-07-09 00:00:00')

In [60]:
pd.Timestamp(sometime)

Timestamp('2018-07-09 15:30:55')

## The Pandas DateTimeIndex Object

In [3]:
import pandas as pd
import datetime as dt

In [62]:
dates = ["2018/2/4","2018/3/4","2018/4/6"]
pd.DatetimeIndex(dates) # This is a container

DatetimeIndex(['2018-02-04', '2018-03-04', '2018-04-06'], dtype='datetime64[ns]', freq=None)

In [63]:
dates = [dt.date(2016, 1, 10), dt.date(1999, 4, 3), dt.date(1995, 12,11)]

In [64]:
type(dates)

list

In [65]:
dtIndex = pd.DatetimeIndex(dates)
dtIndex

DatetimeIndex(['2016-01-10', '1999-04-03', '1995-12-11'], dtype='datetime64[ns]', freq=None)

In [66]:
values = [100,200,300]
pd.Series(data = values, index = dtIndex)  # DateTimeIndex serve as Index

2016-01-10    100
1999-04-03    200
1995-12-11    300
dtype: int64

## The pd.to_datetime() Method

In [67]:
pd.to_datetime("2010-1-24")

Timestamp('2010-01-24 00:00:00')

In [68]:
pd.to_datetime(dt.date(2015, 1 ,1 ))

Timestamp('2015-01-01 00:00:00')

In [69]:
dt.date(2015 ,1, 1)

datetime.date(2015, 1, 1)

In [70]:
pd.to_datetime(dt.datetime(2015,1 ,1 ,14, 24, 30 ))

Timestamp('2015-01-01 14:24:30')

In [71]:
pd.Timestamp(dt.datetime(2015,1 ,1 ,14, 24, 30 ))

Timestamp('2015-01-01 14:24:30')

In [72]:
pd.to_datetime(["2016/2/3", "2011-12-11", "2017", "2010/3"])  # pandas can normalize the look

DatetimeIndex(['2016-02-03', '2011-12-11', '2017-01-01', '2010-03-01'], dtype='datetime64[ns]', freq=None)

In [73]:
times = pd.Series(["2016/2/3", "2011-12-11", "2017", "2010/3"])
times

0      2016/2/3
1    2011-12-11
2          2017
3        2010/3
dtype: object

In [74]:
pd.to_datetime(times)

0   2016-02-03
1   2011-12-11
2   2017-01-01
3   2010-03-01
dtype: datetime64[ns]

In [75]:
dates = pd.Series(["july 4th 1996", "10/04/2001", "Hello", "2015/2/30"])
dates

0    july 4th 1996
1       10/04/2001
2            Hello
3        2015/2/30
dtype: object

In [76]:
# pd.to_datetime(dates) 
# We will see an error message. 
# It cannot convert "Hello" to date.ValueError: ('Unknown string format:', 'Hello')


In [77]:
pd.to_datetime(dates, errors = "coerce") # "coerce" converts those that can convert to dates and put NAT to others

0   1996-07-04
1   2001-10-04
2          NaT
3          NaT
dtype: datetime64[ns]

In [78]:
pd.to_datetime([1536276710, 1493651475], unit = "s") #This is the Unix time. "s" stands for second.

DatetimeIndex(['2018-09-06 23:31:50', '2017-05-01 15:11:15'], dtype='datetime64[ns]', freq=None)

## Create Range of Dates with the pd.date_range() Method, Part1
* start and end are inclusive

In [79]:
times = pd.date_range(start = "2016/01/01",end = "2016/1/10", freq = "1D") 
# At least two of those - start, end, and period -  parameters are required
# freq parameter is frequency

In [80]:
type(times)

pandas.core.indexes.datetimes.DatetimeIndex

In [81]:
times[0]

Timestamp('2016-01-01 00:00:00', freq='D')

In [82]:
times = pd.date_range(start = "2016/01/01",end = "2016/1/10", freq = "2D")
times

DatetimeIndex(['2016-01-01', '2016-01-03', '2016-01-05', '2016-01-07',
               '2016-01-09'],
              dtype='datetime64[ns]', freq='2D')

In [83]:
times = pd.date_range(start = "2016/01/01",end = "2016/1/10", freq = "B") # "B" is business days
times

DatetimeIndex(['2016-01-01', '2016-01-04', '2016-01-05', '2016-01-06',
               '2016-01-07', '2016-01-08'],
              dtype='datetime64[ns]', freq='B')

In [84]:
times = pd.date_range(start = "2018/09/01",end = "2018/9/30", freq = "B") # "B" is business days
times

DatetimeIndex(['2018-09-03', '2018-09-04', '2018-09-05', '2018-09-06',
               '2018-09-07', '2018-09-10', '2018-09-11', '2018-09-12',
               '2018-09-13', '2018-09-14', '2018-09-17', '2018-09-18',
               '2018-09-19', '2018-09-20', '2018-09-21', '2018-09-24',
               '2018-09-25', '2018-09-26', '2018-09-27', '2018-09-28'],
              dtype='datetime64[ns]', freq='B')

In [85]:
times = pd.date_range(start = "2018/09/01",end = "2018/9/30", freq = "W") # "W" means week
times # default begins the week at Sunday

DatetimeIndex(['2018-09-02', '2018-09-09', '2018-09-16', '2018-09-23',
               '2018-09-30'],
              dtype='datetime64[ns]', freq='W-SUN')

In [86]:
times = pd.date_range(start = "2018/09/01",end = "2018/9/30", freq = "W-MON") # "W" means week
times # begins the week at Monday

DatetimeIndex(['2018-09-03', '2018-09-10', '2018-09-17', '2018-09-24'], dtype='datetime64[ns]', freq='W-MON')

In [87]:
times = pd.date_range(start = "2018/09/01",end = "2018/9/30", freq = "H") # "H" means hours
times 

DatetimeIndex(['2018-09-01 00:00:00', '2018-09-01 01:00:00',
               '2018-09-01 02:00:00', '2018-09-01 03:00:00',
               '2018-09-01 04:00:00', '2018-09-01 05:00:00',
               '2018-09-01 06:00:00', '2018-09-01 07:00:00',
               '2018-09-01 08:00:00', '2018-09-01 09:00:00',
               ...
               '2018-09-29 15:00:00', '2018-09-29 16:00:00',
               '2018-09-29 17:00:00', '2018-09-29 18:00:00',
               '2018-09-29 19:00:00', '2018-09-29 20:00:00',
               '2018-09-29 21:00:00', '2018-09-29 22:00:00',
               '2018-09-29 23:00:00', '2018-09-30 00:00:00'],
              dtype='datetime64[ns]', length=697, freq='H')

In [88]:
times = pd.date_range(start = "2018/09/01",end = "2018/9/30", freq = "6H") # "H" means hours
times 

DatetimeIndex(['2018-09-01 00:00:00', '2018-09-01 06:00:00',
               '2018-09-01 12:00:00', '2018-09-01 18:00:00',
               '2018-09-02 00:00:00', '2018-09-02 06:00:00',
               '2018-09-02 12:00:00', '2018-09-02 18:00:00',
               '2018-09-03 00:00:00', '2018-09-03 06:00:00',
               ...
               '2018-09-27 18:00:00', '2018-09-28 00:00:00',
               '2018-09-28 06:00:00', '2018-09-28 12:00:00',
               '2018-09-28 18:00:00', '2018-09-29 00:00:00',
               '2018-09-29 06:00:00', '2018-09-29 12:00:00',
               '2018-09-29 18:00:00', '2018-09-30 00:00:00'],
              dtype='datetime64[ns]', length=117, freq='6H')

In [89]:
times = pd.date_range(start = "2018/01/01",end = "2018/12/31", freq = "M") # "M" means end of month
times 

DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30',
               '2018-05-31', '2018-06-30', '2018-07-31', '2018-08-31',
               '2018-09-30', '2018-10-31', '2018-11-30', '2018-12-31'],
              dtype='datetime64[ns]', freq='M')

In [90]:
times = pd.date_range(start = "2018/01/01",end = "2018/12/31", freq = "BM") # "BM" means end of month's business day
times 

DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-30', '2018-04-30',
               '2018-05-31', '2018-06-29', '2018-07-31', '2018-08-31',
               '2018-09-28', '2018-10-31', '2018-11-30', '2018-12-31'],
              dtype='datetime64[ns]', freq='BM')

In [4]:
times = pd.date_range(start = "2018/01/01",end = "2018/12/31", freq = "MS") # "MS" stands for Month Start 
times

DatetimeIndex(['2018-01-01', '2018-02-01', '2018-03-01', '2018-04-01',
               '2018-05-01', '2018-06-01', '2018-07-01', '2018-08-01',
               '2018-09-01', '2018-10-01', '2018-11-01', '2018-12-01'],
              dtype='datetime64[ns]', freq='MS')

In [8]:
times = pd.date_range(start = "2018/01/01",end = "2025/12/31", freq = "A") # "A" stands for Year End. last day of the year 
times

DatetimeIndex(['2018-12-31', '2019-12-31', '2020-12-31', '2021-12-31',
               '2022-12-31', '2023-12-31', '2024-12-31', '2025-12-31'],
              dtype='datetime64[ns]', freq='A-DEC')

## Create Range of Dates with the pd.date_range() Method, Part2

In [12]:
pd.date_range(start = "2012/1/1", periods = 25, freq = "D")

DatetimeIndex(['2012-01-01', '2012-01-02', '2012-01-03', '2012-01-04',
               '2012-01-05', '2012-01-06', '2012-01-07', '2012-01-08',
               '2012-01-09', '2012-01-10', '2012-01-11', '2012-01-12',
               '2012-01-13', '2012-01-14', '2012-01-15', '2012-01-16',
               '2012-01-17', '2012-01-18', '2012-01-19', '2012-01-20',
               '2012-01-21', '2012-01-22', '2012-01-23', '2012-01-24',
               '2012-01-25'],
              dtype='datetime64[ns]', freq='D')

In [13]:
len(pd.date_range(start = "2012/1/1", periods = 25, freq = "D"))

25

In [15]:
pd.date_range(start = "2012/1/1", periods = 50, freq = "D") # Days

DatetimeIndex(['2012-01-01', '2012-01-02', '2012-01-03', '2012-01-04',
               '2012-01-05', '2012-01-06', '2012-01-07', '2012-01-08',
               '2012-01-09', '2012-01-10', '2012-01-11', '2012-01-12',
               '2012-01-13', '2012-01-14', '2012-01-15', '2012-01-16',
               '2012-01-17', '2012-01-18', '2012-01-19', '2012-01-20',
               '2012-01-21', '2012-01-22', '2012-01-23', '2012-01-24',
               '2012-01-25', '2012-01-26', '2012-01-27', '2012-01-28',
               '2012-01-29', '2012-01-30', '2012-01-31', '2012-02-01',
               '2012-02-02', '2012-02-03', '2012-02-04', '2012-02-05',
               '2012-02-06', '2012-02-07', '2012-02-08', '2012-02-09',
               '2012-02-10', '2012-02-11', '2012-02-12', '2012-02-13',
               '2012-02-14', '2012-02-15', '2012-02-16', '2012-02-17',
               '2012-02-18', '2012-02-19'],
              dtype='datetime64[ns]', freq='D')

In [16]:
pd.date_range(start = "2012/1/1", periods = 50, freq = "B")  # Business Days, skipping weekend

DatetimeIndex(['2012-01-02', '2012-01-03', '2012-01-04', '2012-01-05',
               '2012-01-06', '2012-01-09', '2012-01-10', '2012-01-11',
               '2012-01-12', '2012-01-13', '2012-01-16', '2012-01-17',
               '2012-01-18', '2012-01-19', '2012-01-20', '2012-01-23',
               '2012-01-24', '2012-01-25', '2012-01-26', '2012-01-27',
               '2012-01-30', '2012-01-31', '2012-02-01', '2012-02-02',
               '2012-02-03', '2012-02-06', '2012-02-07', '2012-02-08',
               '2012-02-09', '2012-02-10', '2012-02-13', '2012-02-14',
               '2012-02-15', '2012-02-16', '2012-02-17', '2012-02-20',
               '2012-02-21', '2012-02-22', '2012-02-23', '2012-02-24',
               '2012-02-27', '2012-02-28', '2012-02-29', '2012-03-01',
               '2012-03-02', '2012-03-05', '2012-03-06', '2012-03-07',
               '2012-03-08', '2012-03-09'],
              dtype='datetime64[ns]', freq='B')

In [18]:
pd.date_range(start = "2018/1/1", periods = 50, freq = "W")  # weeks start from Sunday

DatetimeIndex(['2018-01-07', '2018-01-14', '2018-01-21', '2018-01-28',
               '2018-02-04', '2018-02-11', '2018-02-18', '2018-02-25',
               '2018-03-04', '2018-03-11', '2018-03-18', '2018-03-25',
               '2018-04-01', '2018-04-08', '2018-04-15', '2018-04-22',
               '2018-04-29', '2018-05-06', '2018-05-13', '2018-05-20',
               '2018-05-27', '2018-06-03', '2018-06-10', '2018-06-17',
               '2018-06-24', '2018-07-01', '2018-07-08', '2018-07-15',
               '2018-07-22', '2018-07-29', '2018-08-05', '2018-08-12',
               '2018-08-19', '2018-08-26', '2018-09-02', '2018-09-09',
               '2018-09-16', '2018-09-23', '2018-09-30', '2018-10-07',
               '2018-10-14', '2018-10-21', '2018-10-28', '2018-11-04',
               '2018-11-11', '2018-11-18', '2018-11-25', '2018-12-02',
               '2018-12-09', '2018-12-16'],
              dtype='datetime64[ns]', freq='W-SUN')

In [19]:
pd.date_range(start = "2018/1/1", periods = 50, freq = "W-SUN")  # weeks start from Sunday

DatetimeIndex(['2018-01-07', '2018-01-14', '2018-01-21', '2018-01-28',
               '2018-02-04', '2018-02-11', '2018-02-18', '2018-02-25',
               '2018-03-04', '2018-03-11', '2018-03-18', '2018-03-25',
               '2018-04-01', '2018-04-08', '2018-04-15', '2018-04-22',
               '2018-04-29', '2018-05-06', '2018-05-13', '2018-05-20',
               '2018-05-27', '2018-06-03', '2018-06-10', '2018-06-17',
               '2018-06-24', '2018-07-01', '2018-07-08', '2018-07-15',
               '2018-07-22', '2018-07-29', '2018-08-05', '2018-08-12',
               '2018-08-19', '2018-08-26', '2018-09-02', '2018-09-09',
               '2018-09-16', '2018-09-23', '2018-09-30', '2018-10-07',
               '2018-10-14', '2018-10-21', '2018-10-28', '2018-11-04',
               '2018-11-11', '2018-11-18', '2018-11-25', '2018-12-02',
               '2018-12-09', '2018-12-16'],
              dtype='datetime64[ns]', freq='W-SUN')

In [20]:
pd.date_range(start = "2018/1/1", periods = 50, freq = "W-MON")  # weeks start from Sunday

DatetimeIndex(['2018-01-01', '2018-01-08', '2018-01-15', '2018-01-22',
               '2018-01-29', '2018-02-05', '2018-02-12', '2018-02-19',
               '2018-02-26', '2018-03-05', '2018-03-12', '2018-03-19',
               '2018-03-26', '2018-04-02', '2018-04-09', '2018-04-16',
               '2018-04-23', '2018-04-30', '2018-05-07', '2018-05-14',
               '2018-05-21', '2018-05-28', '2018-06-04', '2018-06-11',
               '2018-06-18', '2018-06-25', '2018-07-02', '2018-07-09',
               '2018-07-16', '2018-07-23', '2018-07-30', '2018-08-06',
               '2018-08-13', '2018-08-20', '2018-08-27', '2018-09-03',
               '2018-09-10', '2018-09-17', '2018-09-24', '2018-10-01',
               '2018-10-08', '2018-10-15', '2018-10-22', '2018-10-29',
               '2018-11-05', '2018-11-12', '2018-11-19', '2018-11-26',
               '2018-12-03', '2018-12-10'],
              dtype='datetime64[ns]', freq='W-MON')

In [21]:
pd.date_range(start = "2018/1/1", periods = 50, freq = "M")  # months

DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30',
               '2018-05-31', '2018-06-30', '2018-07-31', '2018-08-31',
               '2018-09-30', '2018-10-31', '2018-11-30', '2018-12-31',
               '2019-01-31', '2019-02-28', '2019-03-31', '2019-04-30',
               '2019-05-31', '2019-06-30', '2019-07-31', '2019-08-31',
               '2019-09-30', '2019-10-31', '2019-11-30', '2019-12-31',
               '2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30',
               '2020-05-31', '2020-06-30', '2020-07-31', '2020-08-31',
               '2020-09-30', '2020-10-31', '2020-11-30', '2020-12-31',
               '2021-01-31', '2021-02-28', '2021-03-31', '2021-04-30',
               '2021-05-31', '2021-06-30', '2021-07-31', '2021-08-31',
               '2021-09-30', '2021-10-31', '2021-11-30', '2021-12-31',
               '2022-01-31', '2022-02-28'],
              dtype='datetime64[ns]', freq='M')

In [22]:
pd.date_range(start = "2018/1/1", periods = 50, freq = "A")  # years

DatetimeIndex(['2018-12-31', '2019-12-31', '2020-12-31', '2021-12-31',
               '2022-12-31', '2023-12-31', '2024-12-31', '2025-12-31',
               '2026-12-31', '2027-12-31', '2028-12-31', '2029-12-31',
               '2030-12-31', '2031-12-31', '2032-12-31', '2033-12-31',
               '2034-12-31', '2035-12-31', '2036-12-31', '2037-12-31',
               '2038-12-31', '2039-12-31', '2040-12-31', '2041-12-31',
               '2042-12-31', '2043-12-31', '2044-12-31', '2045-12-31',
               '2046-12-31', '2047-12-31', '2048-12-31', '2049-12-31',
               '2050-12-31', '2051-12-31', '2052-12-31', '2053-12-31',
               '2054-12-31', '2055-12-31', '2056-12-31', '2057-12-31',
               '2058-12-31', '2059-12-31', '2060-12-31', '2061-12-31',
               '2062-12-31', '2063-12-31', '2064-12-31', '2065-12-31',
               '2066-12-31', '2067-12-31'],
              dtype='datetime64[ns]', freq='A-DEC')

In [23]:
pd.date_range(start = "2018/1/1", periods = 50, freq = "H")  # hours

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',
               '2018-01-01 02:00:00', '2018-01-01 03:00:00',
               '2018-01-01 04:00:00', '2018-01-01 05:00:00',
               '2018-01-01 06:00:00', '2018-01-01 07:00:00',
               '2018-01-01 08:00:00', '2018-01-01 09:00:00',
               '2018-01-01 10:00:00', '2018-01-01 11:00:00',
               '2018-01-01 12:00:00', '2018-01-01 13:00:00',
               '2018-01-01 14:00:00', '2018-01-01 15:00:00',
               '2018-01-01 16:00:00', '2018-01-01 17:00:00',
               '2018-01-01 18:00:00', '2018-01-01 19:00:00',
               '2018-01-01 20:00:00', '2018-01-01 21:00:00',
               '2018-01-01 22:00:00', '2018-01-01 23:00:00',
               '2018-01-02 00:00:00', '2018-01-02 01:00:00',
               '2018-01-02 02:00:00', '2018-01-02 03:00:00',
               '2018-01-02 04:00:00', '2018-01-02 05:00:00',
               '2018-01-02 06:00:00', '2018-01-02 07:00:00',
               '2018-01-

In [24]:
pd.date_range(start = "2018/1/1", periods = 50, freq = "6H")  # 6hours increments

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 06:00:00',
               '2018-01-01 12:00:00', '2018-01-01 18:00:00',
               '2018-01-02 00:00:00', '2018-01-02 06:00:00',
               '2018-01-02 12:00:00', '2018-01-02 18:00:00',
               '2018-01-03 00:00:00', '2018-01-03 06:00:00',
               '2018-01-03 12:00:00', '2018-01-03 18:00:00',
               '2018-01-04 00:00:00', '2018-01-04 06:00:00',
               '2018-01-04 12:00:00', '2018-01-04 18:00:00',
               '2018-01-05 00:00:00', '2018-01-05 06:00:00',
               '2018-01-05 12:00:00', '2018-01-05 18:00:00',
               '2018-01-06 00:00:00', '2018-01-06 06:00:00',
               '2018-01-06 12:00:00', '2018-01-06 18:00:00',
               '2018-01-07 00:00:00', '2018-01-07 06:00:00',
               '2018-01-07 12:00:00', '2018-01-07 18:00:00',
               '2018-01-08 00:00:00', '2018-01-08 06:00:00',
               '2018-01-08 12:00:00', '2018-01-08 18:00:00',
               '2018-01-

In [30]:
pd.date_range(start = "2018/1/1 12:30:00", periods = 9, freq = "20min")  # 20 minutes increments

DatetimeIndex(['2018-01-01 12:30:00', '2018-01-01 12:50:00',
               '2018-01-01 13:10:00', '2018-01-01 13:30:00',
               '2018-01-01 13:50:00', '2018-01-01 14:10:00',
               '2018-01-01 14:30:00', '2018-01-01 14:50:00',
               '2018-01-01 15:10:00'],
              dtype='datetime64[ns]', freq='20T')

## Create Range of Dates with the pd.date_range()  Method, Part3

In [32]:
pd.date_range(end = "1999/12/31", periods = 20, freq = "D")

DatetimeIndex(['1999-12-12', '1999-12-13', '1999-12-14', '1999-12-15',
               '1999-12-16', '1999-12-17', '1999-12-18', '1999-12-19',
               '1999-12-20', '1999-12-21', '1999-12-22', '1999-12-23',
               '1999-12-24', '1999-12-25', '1999-12-26', '1999-12-27',
               '1999-12-28', '1999-12-29', '1999-12-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='D')

In [33]:
pd.date_range(end = "1999/12/31", periods = 20, freq = "B")

DatetimeIndex(['1999-12-06', '1999-12-07', '1999-12-08', '1999-12-09',
               '1999-12-10', '1999-12-13', '1999-12-14', '1999-12-15',
               '1999-12-16', '1999-12-17', '1999-12-20', '1999-12-21',
               '1999-12-22', '1999-12-23', '1999-12-24', '1999-12-27',
               '1999-12-28', '1999-12-29', '1999-12-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='B')

In [38]:
pd.date_range(end = "1999/12/31", periods = 20, freq = "W-SUN") # 20 Sundays before the end date

DatetimeIndex(['1999-08-15', '1999-08-22', '1999-08-29', '1999-09-05',
               '1999-09-12', '1999-09-19', '1999-09-26', '1999-10-03',
               '1999-10-10', '1999-10-17', '1999-10-24', '1999-10-31',
               '1999-11-07', '1999-11-14', '1999-11-21', '1999-11-28',
               '1999-12-05', '1999-12-12', '1999-12-19', '1999-12-26'],
              dtype='datetime64[ns]', freq='W-SUN')

In [39]:
pd.date_range(end = "1999/12/31", periods = 20, freq = "M") # 20 Month end before the end date

DatetimeIndex(['1998-05-31', '1998-06-30', '1998-07-31', '1998-08-31',
               '1998-09-30', '1998-10-31', '1998-11-30', '1998-12-31',
               '1999-01-31', '1999-02-28', '1999-03-31', '1999-04-30',
               '1999-05-31', '1999-06-30', '1999-07-31', '1999-08-31',
               '1999-09-30', '1999-10-31', '1999-11-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='M')

In [44]:
pd.date_range(end = "1999/12/31", periods = 20, freq = "3H") # 20 Month end before the end date

DatetimeIndex(['1999-12-28 15:00:00', '1999-12-28 18:00:00',
               '1999-12-28 21:00:00', '1999-12-29 00:00:00',
               '1999-12-29 03:00:00', '1999-12-29 06:00:00',
               '1999-12-29 09:00:00', '1999-12-29 12:00:00',
               '1999-12-29 15:00:00', '1999-12-29 18:00:00',
               '1999-12-29 21:00:00', '1999-12-30 00:00:00',
               '1999-12-30 03:00:00', '1999-12-30 06:00:00',
               '1999-12-30 09:00:00', '1999-12-30 12:00:00',
               '1999-12-30 15:00:00', '1999-12-30 18:00:00',
               '1999-12-30 21:00:00', '1999-12-31 00:00:00'],
              dtype='datetime64[ns]', freq='3H')

## The .dt Accessor

In [4]:
import pandas as pd
bdates = pd.date_range(start = "2000-01-01", end = "2010-12-31", freq = "24D")
bdates

DatetimeIndex(['2000-01-01', '2000-01-25', '2000-02-18', '2000-03-13',
               '2000-04-06', '2000-04-30', '2000-05-24', '2000-06-17',
               '2000-07-11', '2000-08-04',
               ...
               '2010-05-20', '2010-06-13', '2010-07-07', '2010-07-31',
               '2010-08-24', '2010-09-17', '2010-10-11', '2010-11-04',
               '2010-11-28', '2010-12-22'],
              dtype='datetime64[ns]', length=168, freq='24D')

In [5]:
s = pd.Series(bdates)
s.head()

0   2000-01-01
1   2000-01-25
2   2000-02-18
3   2000-03-13
4   2000-04-06
dtype: datetime64[ns]

In [6]:
s.dt.day.head() #create new series with days

0     1
1    25
2    18
3    13
4     6
dtype: int64

In [7]:
s.dt.month.head() #create new series with months

0    1
1    1
2    2
3    3
4    4
dtype: int64

In [8]:
s.dt.weekday_name.head(10) #create new series with months

0     Saturday
1      Tuesday
2       Friday
3       Monday
4     Thursday
5       Sunday
6    Wednesday
7     Saturday
8      Tuesday
9       Friday
dtype: object

In [12]:
mask = s.dt.is_quarter_start
mask.head()

0     True
1    False
2    False
3    False
4    False
dtype: bool

In [13]:
s[mask]

0     2000-01-01
19    2001-04-01
38    2002-07-01
137   2009-01-01
dtype: datetime64[ns]

In [15]:
mask2 = s.dt.is_month_start
s[mask2]

0     2000-01-01
19    2001-04-01
38    2002-07-01
104   2006-11-01
109   2007-03-01
137   2009-01-01
142   2009-05-01
dtype: datetime64[ns]

In [16]:
mask3 = s.dt.is_month_end
s[mask3]

5     2000-04-30
57    2003-09-30
71    2004-08-31
90    2005-11-30
123   2008-01-31
161   2010-07-31
dtype: datetime64[ns]

## Import Financial Data Set with pandas_datareader Library

In [1]:
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import pandas_datareader as pdr
import fix_yahoo_finance as yf  

In [2]:
#data = yf.download('AMZN','2018-01-01','2018-09-26')
#data.Close.plot()
#plt.show()

In [3]:
company = "AMZN"
start = "2018-01-01"
end = "2018-09-27"

# download dataframe
stocks = pdr.get_data_yahoo(company, start = start, end = end)

# download Panel
# data = pdr.get_data_yahoo(["SPY", "IWM"], start = start, end = end)

stocks.head(10)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,1190.0,1170.51001,1172.0,1189.01001,2694500,1189.01001
2018-01-03,1205.48999,1188.300049,1188.300049,1204.199951,3108800,1204.199951
2018-01-04,1215.869995,1204.660034,1205.0,1209.589966,3022100,1209.589966
2018-01-05,1229.140015,1210.0,1217.51001,1229.140015,3544700,1229.140015
2018-01-08,1253.079956,1232.030029,1236.0,1246.869995,4279500,1246.869995
2018-01-09,1259.329956,1241.76001,1256.900024,1252.699951,3661300,1252.699951
2018-01-10,1254.329956,1237.22998,1245.150024,1254.329956,2686000,1254.329956
2018-01-11,1276.77002,1256.459961,1259.73999,1276.680054,3125000,1276.680054
2018-01-12,1305.76001,1273.390015,1273.390015,1305.199951,5443700,1305.199951
2018-01-16,1339.939941,1292.300049,1323.0,1304.859985,7220700,1304.859985


In [4]:
stocks.values

array([[1.19000000e+03, 1.17051001e+03, 1.17200000e+03, 1.18901001e+03,
        2.69450000e+06, 1.18901001e+03],
       [1.20548999e+03, 1.18830005e+03, 1.18830005e+03, 1.20419995e+03,
        3.10880000e+06, 1.20419995e+03],
       [1.21587000e+03, 1.20466003e+03, 1.20500000e+03, 1.20958997e+03,
        3.02210000e+06, 1.20958997e+03],
       ...,
       [1.93688000e+03, 1.86500000e+03, 1.90379004e+03, 1.93435999e+03,
        4.21370000e+06, 1.93435999e+03],
       [1.97591003e+03, 1.93884998e+03, 1.94290002e+03, 1.97455005e+03,
        4.53840000e+06, 1.97455005e+03],
       [1.99525000e+03, 1.96152002e+03, 1.96850000e+03, 1.97484998e+03,
        4.26450000e+06, 1.97484998e+03]])

In [5]:
stocks.columns

Index(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], dtype='object')

In [6]:
stocks.index

DatetimeIndex(['2018-01-02', '2018-01-03', '2018-01-04', '2018-01-05',
               '2018-01-08', '2018-01-09', '2018-01-10', '2018-01-11',
               '2018-01-12', '2018-01-16',
               ...
               '2018-09-13', '2018-09-14', '2018-09-17', '2018-09-18',
               '2018-09-19', '2018-09-20', '2018-09-21', '2018-09-24',
               '2018-09-25', '2018-09-26'],
              dtype='datetime64[ns]', name='Date', length=186, freq=None)

In [7]:
stocks.axes

[DatetimeIndex(['2018-01-02', '2018-01-03', '2018-01-04', '2018-01-05',
                '2018-01-08', '2018-01-09', '2018-01-10', '2018-01-11',
                '2018-01-12', '2018-01-16',
                ...
                '2018-09-13', '2018-09-14', '2018-09-17', '2018-09-18',
                '2018-09-19', '2018-09-20', '2018-09-21', '2018-09-24',
                '2018-09-25', '2018-09-26'],
               dtype='datetime64[ns]', name='Date', length=186, freq=None),
 Index(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], dtype='object')]

## Selecting from a DataFrame with a DateimeIndex

In [112]:
company = "AMZN"
start = "2010-01-01"
end = "2018-09-27"
stocks =  pdr.get_data_yahoo(company, start = start, end = dt.date.today())
stocks.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,136.610001,133.139999,136.25,133.899994,7599900,133.899994
2010-01-05,135.479996,131.809998,133.429993,134.690002,8851900,134.690002
2010-01-06,134.729996,131.649994,134.600006,132.25,7178800,132.25
2010-01-07,132.320007,128.800003,132.009995,130.0,11030200,130.0
2010-01-08,133.679993,129.029999,130.559998,133.520004,9830500,133.520004


In [113]:
dt.date.today()

datetime.date(2018, 9, 27)

In [114]:
stocks.loc["2018-09-04"]

High         2.050500e+03
Low          2.013000e+03
Open         2.026500e+03
Close        2.039510e+03
Volume       5.721100e+06
Adj Close    2.039510e+03
Name: 2018-09-04 00:00:00, dtype: float64

In [115]:
stocks.iloc[100]

High         1.268500e+02
Low          1.206000e+02
Open         1.249800e+02
Close        1.267000e+02
Volume       4.747700e+06
Adj Close    1.267000e+02
Name: 2010-05-27 00:00:00, dtype: float64

In [116]:
stocks.loc["2018-01-01":"2018-01-31"] # extract a apecific date range

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,1190.0,1170.51001,1172.0,1189.01001,2694500,1189.01001
2018-01-03,1205.48999,1188.300049,1188.300049,1204.199951,3108800,1204.199951
2018-01-04,1215.869995,1204.660034,1205.0,1209.589966,3022100,1209.589966
2018-01-05,1229.140015,1210.0,1217.51001,1229.140015,3544700,1229.140015
2018-01-08,1253.079956,1232.030029,1236.0,1246.869995,4279500,1246.869995
2018-01-09,1259.329956,1241.76001,1256.900024,1252.699951,3661300,1252.699951
2018-01-10,1254.329956,1237.22998,1245.150024,1254.329956,2686000,1254.329956
2018-01-11,1276.77002,1256.459961,1259.73999,1276.680054,3125000,1276.680054
2018-01-12,1305.76001,1273.390015,1273.390015,1305.199951,5443700,1305.199951
2018-01-16,1339.939941,1292.300049,1323.0,1304.859985,7220700,1304.859985


In [117]:
birthdays = pd.date_range(start = "2010-03-13", end = end, freq = pd.DateOffset(years = 1))

In [118]:
mask = stocks.index.isin(birthdays) #use isin method to extrat specific dates
stocks[mask]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2012-03-13,184.869995,180.770004,183.919998,184.589996,4405800,184.589996
2013-03-13,276.5,272.640015,275.23999,275.100006,1884200,275.100006
2014-03-13,383.109985,368.079987,376.619995,371.51001,6829000,371.51001
2015-03-13,373.98999,366.679993,371.519989,370.579987,2611700,370.579987
2017-03-13,855.690002,851.710022,851.77002,854.590027,1909700,854.590027
2018-03-13,1617.540039,1578.01001,1615.959961,1588.180054,6531900,1588.180054


## Timestamp Object Agtributes

In [4]:
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import pandas_datareader as pdr
import fix_yahoo_finance as yf  

In [132]:
company = "AMZN"
start = "2010-01-01"
end = "2018-09-27"
company = "AMZN"
start = "2010-01-01"
end = "2018-09-27"
stocks =  pdr.get_data_yahoo(company, start = start, end = dt.date.today())
stocks.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,136.610001,133.139999,136.25,133.899994,7599900,133.899994
2010-01-05,135.479996,131.809998,133.429993,134.690002,8851900,134.690002
2010-01-06,134.729996,131.649994,134.600006,132.25,7178800,132.25
2010-01-07,132.320007,128.800003,132.009995,130.0,11030200,130.0
2010-01-08,133.679993,129.029999,130.559998,133.520004,9830500,133.520004


In [133]:
someday = stocks.index[100]
someday

Timestamp('2010-05-27 00:00:00')

In [134]:
someday.year

2010

In [135]:
someday.month

5

In [136]:
someday.day_name()

'Thursday'

In [137]:
someday.is_month_end

False

In [138]:
someday.is_month_start

False

In [139]:
stocks.insert(0, "Day of Week", stocks.index.day_name())  
# Add a new series by .insert method
# first argument "0" is the position in the columns
# second argument is the name of the new series

In [140]:
stocks.head()

Unnamed: 0_level_0,Day of Week,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-04,Monday,136.610001,133.139999,136.25,133.899994,7599900,133.899994
2010-01-05,Tuesday,135.479996,131.809998,133.429993,134.690002,8851900,134.690002
2010-01-06,Wednesday,134.729996,131.649994,134.600006,132.25,7178800,132.25
2010-01-07,Thursday,132.320007,128.800003,132.009995,130.0,11030200,130.0
2010-01-08,Friday,133.679993,129.029999,130.559998,133.520004,9830500,133.520004


In [141]:
stocks.insert(1, "Is start of Month", stocks.index.is_month_start)
stocks.head(20)

Unnamed: 0_level_0,Day of Week,Is start of Month,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-01-04,Monday,False,136.610001,133.139999,136.25,133.899994,7599900,133.899994
2010-01-05,Tuesday,False,135.479996,131.809998,133.429993,134.690002,8851900,134.690002
2010-01-06,Wednesday,False,134.729996,131.649994,134.600006,132.25,7178800,132.25
2010-01-07,Thursday,False,132.320007,128.800003,132.009995,130.0,11030200,130.0
2010-01-08,Friday,False,133.679993,129.029999,130.559998,133.520004,9830500,133.520004
2010-01-11,Monday,False,132.800003,129.210007,132.619995,130.309998,8779400,130.309998
2010-01-12,Tuesday,False,129.820007,126.550003,128.990005,127.349998,9096300,127.349998
2010-01-13,Wednesday,False,129.710007,125.75,127.900002,129.110001,10723200,129.110001
2010-01-14,Thursday,False,130.380005,126.400002,129.139999,127.349998,9774900,127.349998
2010-01-15,Friday,False,129.649994,127.059998,129.179993,127.139999,15376500,127.139999


In [148]:
stocks[stocks["Is start of Month"]].head(20)

Unnamed: 0_level_0,Day of Week,Is start of Month,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-02-01,Monday,True,124.860001,113.82,123.18,118.870003,37774400,118.870003
2010-03-01,Monday,True,124.660004,117.529999,118.699997,124.540001,13296500,124.540001
2010-04-01,Thursday,True,136.509995,131.179993,135.800003,131.809998,8785800,131.809998
2010-06-01,Tuesday,True,126.57,123.019997,124.970001,123.239998,3659500,123.239998
2010-07-01,Thursday,True,111.690002,106.699997,108.900002,110.959999,8529800,110.959999
2010-09-01,Wednesday,True,132.600006,126.169998,126.360001,132.490005,7138300,132.490005
2010-10-01,Friday,True,157.440002,152.199997,157.080002,153.710007,8683400,153.710007
2010-11-01,Monday,True,164.580002,161.520004,164.449997,162.580002,5239900,162.580002
2010-12-01,Wednesday,True,179.320007,176.0,179.160004,176.550003,5770100,176.550003
2011-02-01,Tuesday,True,173.100006,169.509995,170.520004,172.110001,5085900,172.110001


In [143]:
times = pd.date_range(start = start ,end = end, freq = "BM") # "BM" means end of month's business day
times 

DatetimeIndex(['2010-01-29', '2010-02-26', '2010-03-31', '2010-04-30',
               '2010-05-31', '2010-06-30', '2010-07-30', '2010-08-31',
               '2010-09-30', '2010-10-29',
               ...
               '2017-11-30', '2017-12-29', '2018-01-31', '2018-02-28',
               '2018-03-30', '2018-04-30', '2018-05-31', '2018-06-29',
               '2018-07-31', '2018-08-31'],
              dtype='datetime64[ns]', length=104, freq='BM')

In [146]:
mask = stocks.index.isin(times) #use isin method to extrat specific dates
stocks[mask].head(20)

Unnamed: 0_level_0,Day of Week,Is start of Month,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-01-29,Friday,False,131.850006,124.139999,129.770004,125.410004,29471300,125.410004
2010-02-26,Friday,False,119.43,117.0,117.879997,118.400002,5721600,118.400002
2010-03-31,Wednesday,False,136.800003,134.479996,136.0,135.770004,4600600,135.770004
2010-04-30,Friday,False,141.399994,136.910004,141.399994,137.100006,6113500,137.100006
2010-06-30,Wednesday,False,112.68,108.110001,108.580002,109.260002,9740700,109.260002
2010-07-30,Friday,False,118.739998,114.510002,115.529999,117.889999,7077500,117.889999
2010-08-31,Tuesday,False,125.900002,122.5,122.849998,124.830002,4225600,124.830002
2010-09-30,Thursday,False,160.929993,155.600006,160.009995,157.059998,7606000,157.059998
2010-10-29,Friday,False,168.5,164.809998,165.800003,165.229996,4998500,165.229996
2010-11-30,Tuesday,False,177.699997,174.899994,176.949997,175.399994,6762200,175.399994


In [147]:
stocks.loc["2010-05-25":"2010-06-05"] 
# extract a apecific date range
# 2010-5-31 was the end of the month and end of the business day, but it was a Federal holiday.
# You ca use holidays library to solve this problem

Unnamed: 0_level_0,Day of Week,Is start of Month,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-05-25,Tuesday,False,125.190002,118.5,118.540001,124.860001,6913400,124.860001
2010-05-26,Wednesday,False,125.790001,122.300003,125.050003,123.209999,6964600,123.209999
2010-05-27,Thursday,False,126.849998,120.599998,124.980003,126.699997,4747700,126.699997
2010-05-28,Friday,False,127.599998,124.010002,126.07,125.459999,3954800,125.459999
2010-06-01,Tuesday,True,126.57,123.019997,124.970001,123.239998,3659500,123.239998
2010-06-02,Wednesday,False,126.43,121.650002,124.019997,126.309998,4764700,126.309998
2010-06-03,Thursday,False,129.149994,124.849998,126.25,128.759995,5276900,128.759995
2010-06-04,Friday,False,128.199997,122.18,126.330002,122.769997,5497700,122.769997


## The .truncate() Method

In [153]:
company = "AMZN"
start = "2010-01-01"
end = "2018-09-27"
stocks =  pdr.get_data_yahoo(company, start = start, end = dt.date.today())
stocks.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,136.610001,133.139999,136.25,133.899994,7599900,133.899994
2010-01-05,135.479996,131.809998,133.429993,134.690002,8851900,134.690002
2010-01-06,134.729996,131.649994,134.600006,132.25,7178800,132.25
2010-01-07,132.320007,128.800003,132.009995,130.0,11030200,130.0
2010-01-08,133.679993,129.029999,130.559998,133.520004,9830500,133.520004


In [154]:
stocks.truncate(before ="2018-08-01", after="2018-08-31") #slicing method called directly on a dataframe

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-08-01,1798.439941,1776.02002,1784.0,1797.170044,4153100,1797.170044
2018-08-02,1836.560059,1786.0,1788.77002,1834.329956,4354700,1834.329956
2018-08-03,1841.0,1821.5,1837.73999,1823.290039,3460500,1823.290039
2018-08-06,1847.77002,1818.920044,1825.810059,1847.75,3391800,1847.75
2018-08-07,1869.719971,1846.27002,1854.530029,1862.47998,3377500,1862.47998
2018-08-08,1891.51001,1854.5,1861.0,1886.52002,3963000,1886.52002
2018-08-09,1914.569946,1877.47998,1882.0,1898.52002,4849900,1898.52002
2018-08-10,1899.5,1878.209961,1888.51001,1886.300049,3639900,1886.300049
2018-08-13,1925.0,1893.670044,1898.5,1896.199951,5512600,1896.199951
2018-08-14,1921.01001,1900.0,1919.390015,1919.650024,3986100,1919.650024


## pd.DateOffset Objects

In [157]:
company = "GOOG"
start = "2010-01-01"
end = "2018-09-27"
stocks =  pdr.get_data_yahoo(company, start = dt.date(2000, 1, 1), end = dt.datetime.now()) # get the current time
stocks.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2004-08-19,51.693783,47.669952,49.676899,49.845802,44994500.0,49.845802
2004-08-20,54.187561,49.925285,50.178635,53.80505,23005800.0,53.80505
2004-08-23,56.373344,54.172661,55.017166,54.346527,18393200.0,54.346527
2004-08-24,55.439419,51.450363,55.260582,52.096165,15361800.0,52.096165
2004-08-25,53.651051,51.604362,52.140873,52.657513,9257400.0,52.657513


In [159]:
stocks.tail()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-09-20,1189.890015,1173.359985,1179.98999,1186.869995,1210000.0,1186.869995
2018-09-21,1192.209961,1166.040039,1192.0,1166.089966,4405600.0,1166.089966
2018-09-24,1178.0,1146.910034,1157.170044,1173.369995,1271000.0,1173.369995
2018-09-25,1186.880005,1168.0,1176.150024,1184.650024,977700.0,1184.650024
2018-09-26,1194.22998,1174.765015,1185.150024,1180.48999,1346800.0,1180.48999


In [162]:
dt.datetime.now()

datetime.datetime(2018, 9, 27, 11, 16, 18, 669708)

In [161]:
dt.date.today()

datetime.date(2018, 9, 27)

In [163]:
stocks.index

DatetimeIndex(['2004-08-19', '2004-08-20', '2004-08-23', '2004-08-24',
               '2004-08-25', '2004-08-26', '2004-08-27', '2004-08-30',
               '2004-08-31', '2004-09-01',
               ...
               '2018-09-13', '2018-09-14', '2018-09-17', '2018-09-18',
               '2018-09-19', '2018-09-20', '2018-09-21', '2018-09-24',
               '2018-09-25', '2018-09-26'],
              dtype='datetime64[ns]', name='Date', length=3552, freq=None)

In [164]:
stocks.index + pd.DateOffset(days = 5) #add 5 days to every day

DatetimeIndex(['2004-08-24', '2004-08-25', '2004-08-28', '2004-08-29',
               '2004-08-30', '2004-08-31', '2004-09-01', '2004-09-04',
               '2004-09-05', '2004-09-06',
               ...
               '2018-09-18', '2018-09-19', '2018-09-22', '2018-09-23',
               '2018-09-24', '2018-09-25', '2018-09-26', '2018-09-29',
               '2018-09-30', '2018-10-01'],
              dtype='datetime64[ns]', name='Date', length=3552, freq=None)

In [165]:
stocks.index + pd.DateOffset(weeks = 2) #add 2 weeks to every day

DatetimeIndex(['2004-09-02', '2004-09-03', '2004-09-06', '2004-09-07',
               '2004-09-08', '2004-09-09', '2004-09-10', '2004-09-13',
               '2004-09-14', '2004-09-15',
               ...
               '2018-09-27', '2018-09-28', '2018-10-01', '2018-10-02',
               '2018-10-03', '2018-10-04', '2018-10-05', '2018-10-08',
               '2018-10-09', '2018-10-10'],
              dtype='datetime64[ns]', name='Date', length=3552, freq=None)

In [166]:
stocks.index - pd.DateOffset(months = 3) #subtract 3 months from every day

DatetimeIndex(['2004-05-19', '2004-05-20', '2004-05-23', '2004-05-24',
               '2004-05-25', '2004-05-26', '2004-05-27', '2004-05-30',
               '2004-05-31', '2004-06-01',
               ...
               '2018-06-13', '2018-06-14', '2018-06-17', '2018-06-18',
               '2018-06-19', '2018-06-20', '2018-06-21', '2018-06-24',
               '2018-06-25', '2018-06-26'],
              dtype='datetime64[ns]', name='Date', length=3552, freq=None)

In [168]:
stocks.index - pd.DateOffset(years = 1) #subtract 1 year from every day

DatetimeIndex(['2003-08-19', '2003-08-20', '2003-08-23', '2003-08-24',
               '2003-08-25', '2003-08-26', '2003-08-27', '2003-08-30',
               '2003-08-31', '2003-09-01',
               ...
               '2017-09-13', '2017-09-14', '2017-09-17', '2017-09-18',
               '2017-09-19', '2017-09-20', '2017-09-21', '2017-09-24',
               '2017-09-25', '2017-09-26'],
              dtype='datetime64[ns]', name='Date', length=3552, freq=None)

In [169]:
stocks.index + pd.DateOffset(hours = 6) #add 6 hours to every day

DatetimeIndex(['2004-08-19 06:00:00', '2004-08-20 06:00:00',
               '2004-08-23 06:00:00', '2004-08-24 06:00:00',
               '2004-08-25 06:00:00', '2004-08-26 06:00:00',
               '2004-08-27 06:00:00', '2004-08-30 06:00:00',
               '2004-08-31 06:00:00', '2004-09-01 06:00:00',
               ...
               '2018-09-13 06:00:00', '2018-09-14 06:00:00',
               '2018-09-17 06:00:00', '2018-09-18 06:00:00',
               '2018-09-19 06:00:00', '2018-09-20 06:00:00',
               '2018-09-21 06:00:00', '2018-09-24 06:00:00',
               '2018-09-25 06:00:00', '2018-09-26 06:00:00'],
              dtype='datetime64[ns]', name='Date', length=3552, freq=None)

In [170]:
stocks.index - pd.DateOffset(years = 1, months = 6) #subtract 1.5 year from every day

DatetimeIndex(['2003-05-19', '2003-05-20', '2003-05-23', '2003-05-24',
               '2003-05-25', '2003-05-26', '2003-05-27', '2003-05-30',
               '2003-05-31', '2003-06-01',
               ...
               '2017-06-13', '2017-06-14', '2017-06-17', '2017-06-18',
               '2017-06-19', '2017-06-20', '2017-06-21', '2017-06-24',
               '2017-06-25', '2017-06-26'],
              dtype='datetime64[ns]', name='Date', length=3552, freq=None)

## More Fun wiht pd.DataOffset Objects

In [6]:
company = "GOOG"
start = "2010-01-01"
end = "2018-09-27"
stocks =  pdr.get_data_yahoo(company, start = dt.date(2000, 1, 1), end = dt.datetime.now()) # get the current time
stocks.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2004-08-19,51.693783,47.669952,49.676899,49.845802,44994500.0,49.845802
2004-08-20,54.187561,49.925285,50.178635,53.80505,23005800.0,53.80505
2004-08-23,56.373344,54.172661,55.017166,54.346527,18393200.0,54.346527
2004-08-24,55.439419,51.450363,55.260582,52.096165,15361800.0,52.096165
2004-08-25,53.651051,51.604362,52.140873,52.657513,9257400.0,52.657513


In [7]:
stocks.index + pd.tseries.offsets.MonthEnd() 
# Retrun next available month end 
# end of Aug., 2004-08-31 is rounded to next available end of month, 2004-09-30.

DatetimeIndex(['2004-08-31', '2004-08-31', '2004-08-31', '2004-08-31',
               '2004-08-31', '2004-08-31', '2004-08-31', '2004-08-31',
               '2004-09-30', '2004-09-30',
               ...
               '2018-09-30', '2018-09-30', '2018-09-30', '2018-09-30',
               '2018-09-30', '2018-09-30', '2018-09-30', '2018-09-30',
               '2018-09-30', '2018-09-30'],
              dtype='datetime64[ns]', name='Date', length=3553, freq=None)

In [8]:
stocks.index -pd.tseries.offsets.MonthEnd() 
# Retrun previous available month end 
# end of Aug., 2004-08-31 is rounded to 2004-07-31

DatetimeIndex(['2004-07-31', '2004-07-31', '2004-07-31', '2004-07-31',
               '2004-07-31', '2004-07-31', '2004-07-31', '2004-07-31',
               '2004-07-31', '2004-08-31',
               ...
               '2018-08-31', '2018-08-31', '2018-08-31', '2018-08-31',
               '2018-08-31', '2018-08-31', '2018-08-31', '2018-08-31',
               '2018-08-31', '2018-08-31'],
              dtype='datetime64[ns]', name='Date', length=3553, freq=None)

In [9]:
stocks.index + pd.tseries.offsets.MonthBegin() 

DatetimeIndex(['2004-09-01', '2004-09-01', '2004-09-01', '2004-09-01',
               '2004-09-01', '2004-09-01', '2004-09-01', '2004-09-01',
               '2004-09-01', '2004-10-01',
               ...
               '2018-10-01', '2018-10-01', '2018-10-01', '2018-10-01',
               '2018-10-01', '2018-10-01', '2018-10-01', '2018-10-01',
               '2018-10-01', '2018-10-01'],
              dtype='datetime64[ns]', name='Date', length=3553, freq=None)

## import pandas.tseries.offsets 

In [10]:
from pandas.tseries.offsets import *   # import everything within this layer

In [11]:
stocks.index - MonthEnd() 
# Retrun previous available month end 
# end of Aug., 2004-08-31 is rounded to 2004-07-31

DatetimeIndex(['2004-07-31', '2004-07-31', '2004-07-31', '2004-07-31',
               '2004-07-31', '2004-07-31', '2004-07-31', '2004-07-31',
               '2004-07-31', '2004-08-31',
               ...
               '2018-08-31', '2018-08-31', '2018-08-31', '2018-08-31',
               '2018-08-31', '2018-08-31', '2018-08-31', '2018-08-31',
               '2018-08-31', '2018-08-31'],
              dtype='datetime64[ns]', name='Date', length=3553, freq=None)

In [12]:
stocks.index - BMonthEnd()  #Business Month End

DatetimeIndex(['2004-07-30', '2004-07-30', '2004-07-30', '2004-07-30',
               '2004-07-30', '2004-07-30', '2004-07-30', '2004-07-30',
               '2004-07-30', '2004-08-31',
               ...
               '2018-08-31', '2018-08-31', '2018-08-31', '2018-08-31',
               '2018-08-31', '2018-08-31', '2018-08-31', '2018-08-31',
               '2018-08-31', '2018-08-31'],
              dtype='datetime64[ns]', name='Date', length=3553, freq=None)

In [13]:
stocks.index + QuarterEnd() #Next quarter end

DatetimeIndex(['2004-09-30', '2004-09-30', '2004-09-30', '2004-09-30',
               '2004-09-30', '2004-09-30', '2004-09-30', '2004-09-30',
               '2004-09-30', '2004-09-30',
               ...
               '2018-09-30', '2018-09-30', '2018-09-30', '2018-09-30',
               '2018-09-30', '2018-09-30', '2018-09-30', '2018-09-30',
               '2018-09-30', '2018-09-30'],
              dtype='datetime64[ns]', name='Date', length=3553, freq=None)

In [15]:
stocks.index - QuarterBegin()

DatetimeIndex(['2004-06-01', '2004-06-01', '2004-06-01', '2004-06-01',
               '2004-06-01', '2004-06-01', '2004-06-01', '2004-06-01',
               '2004-06-01', '2004-06-01',
               ...
               '2018-09-01', '2018-09-01', '2018-09-01', '2018-09-01',
               '2018-09-01', '2018-09-01', '2018-09-01', '2018-09-01',
               '2018-09-01', '2018-09-01'],
              dtype='datetime64[ns]', name='Date', length=3553, freq=None)

In [16]:
stocks.index - YearBegin()

DatetimeIndex(['2004-01-01', '2004-01-01', '2004-01-01', '2004-01-01',
               '2004-01-01', '2004-01-01', '2004-01-01', '2004-01-01',
               '2004-01-01', '2004-01-01',
               ...
               '2018-01-01', '2018-01-01', '2018-01-01', '2018-01-01',
               '2018-01-01', '2018-01-01', '2018-01-01', '2018-01-01',
               '2018-01-01', '2018-01-01'],
              dtype='datetime64[ns]', name='Date', length=3553, freq=None)

## The Timedelta Object
represents time duration

In [17]:
pd.Timestamp("2016-03-31") # represnts specific moment in time

Timestamp('2016-03-31 00:00:00')

In [29]:
timeA = pd.Timestamp("2016-03-31")

In [30]:
timeB = pd.Timestamp("2016-03-20")

In [31]:
timeA - timeB # duration in time

Timedelta('11 days 00:00:00')

In [32]:
timeB - timeA

Timedelta('-11 days +00:00:00')

In [33]:
type(timeA - timeB)

pandas._libs.tslibs.timedeltas.Timedelta

In [34]:
type(timeA)

pandas._libs.tslibs.timestamps.Timestamp

In [36]:
pd.Timedelta(days = 3, hours = 12, minutes = 45, weeks = 1) 
# specific time span
# years does not work

Timedelta('10 days 12:45:00')

In [39]:
pd.Timedelta("10 days")

Timedelta('10 days 00:00:00')

In [43]:
pd.Timedelta("14 days 6 hours 12 minutes 30 seconds")
# weeks doesn not work

Timedelta('14 days 06:12:30')

## Timedeltas in a Dataset

In [49]:
shipping = pd.read_csv("ecommerce.csv", index_col = "ID", parse_dates = ["order_date", "delivery_date"]) 
shipping.head()
# "index_col" sets ID as an Index
# pandas is default to string. "parse_dates" explicitly tell that we want to interprete days as datetimes 

Unnamed: 0_level_0,order_date,delivery_date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1998-05-24,1999-02-05
2,1992-04-22,1998-03-06
4,1991-02-10,1992-08-26
5,1992-07-21,1997-11-20
7,1993-09-02,1998-06-10


In [51]:
shipping["delivery_time"] = shipping["delivery_date"] - shipping["order_date"]
shipping.head()

Unnamed: 0_level_0,order_date,delivery_date,delivery_time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1998-05-24,1999-02-05,257 days
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days
5,1992-07-21,1997-11-20,1948 days
7,1993-09-02,1998-06-10,1742 days


In [54]:
shipping["twice_as_long"] = shipping["delivery_date"] + shipping["delivery_time"]
shipping.head()

Unnamed: 0_level_0,order_date,delivery_date,delivery_time,twice_as_long
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1998-05-24,1999-02-05,257 days,1999-10-20
2,1992-04-22,1998-03-06,2144 days,2004-01-18
4,1991-02-10,1992-08-26,563 days,1994-03-12
5,1992-07-21,1997-11-20,1948 days,2003-03-22
7,1993-09-02,1998-06-10,1742 days,2003-03-18


In [55]:
shipping.dtypes

order_date        datetime64[ns]
delivery_date     datetime64[ns]
delivery_time    timedelta64[ns]
twice_as_long     datetime64[ns]
dtype: object

In [59]:
mask = shipping["delivery_time"] > "365 days" # pandas can figure out string value
shipping[mask].head()

Unnamed: 0_level_0,order_date,delivery_date,delivery_time,twice_as_long
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,1992-04-22,1998-03-06,2144 days,2004-01-18
4,1991-02-10,1992-08-26,563 days,1994-03-12
5,1992-07-21,1997-11-20,1948 days,2003-03-22
7,1993-09-02,1998-06-10,1742 days,2003-03-18
9,1990-01-25,1994-10-02,1711 days,1999-06-09


In [60]:
shipping["delivery_time"].min() 

Timedelta('8 days 00:00:00')

In [61]:
shipping["delivery_time"].mean() 

Timedelta('1217 days 22:53:53.532934')

In [62]:
shipping["delivery_time"].median() 

Timedelta('998 days 00:00:00')

### appendix

In [17]:
! pip3 install pandas-datareader

Collecting pandas-datareader
  Using cached https://files.pythonhosted.org/packages/be/76/639c60ede26458dadf76bacaa9cbcc76f8cc5082fb2b2d90d0a90c699d36/pandas_datareader-0.6.0-py2.py3-none-any.whl
Collecting requests-file (from pandas-datareader)
  Using cached https://files.pythonhosted.org/packages/23/9c/6e63c23c39e53d3df41c77a3d05a49a42c4e1383a6d2a5e3233161b89dbf/requests_file-1.4.3-py2.py3-none-any.whl
Collecting requests-ftp (from pandas-datareader)
  Using cached https://files.pythonhosted.org/packages/3d/ca/14b2ad1e93b5195eeaf56b86b7ecfd5ea2d5754a68d17aeb1e5b9f95b3cf/requests-ftp-0.3.1.tar.gz
Building wheels for collected packages: requests-ftp
  Running setup.py bdist_wheel for requests-ftp ... [?25ldone
[?25h  Stored in directory: /home/nbuser/.cache/pip/wheels/2a/98/32/37195e45a3392a73d9f65c488cbea30fe5bad76aaef4d6b020
Successfully built requests-ftp
Installing collected packages: requests-file, requests-ftp, pandas-datareader
Successfully installed pandas-datareader-0.6.0 r

In [6]:
! pip3 install git+https://github.com/pydata/pandas-datareader.git

Collecting git+https://github.com/pydata/pandas-datareader.git
  Cloning https://github.com/pydata/pandas-datareader.git to /tmp/pip-req-build-6p0lhltw
Building wheels for collected packages: pandas-datareader
  Running setup.py bdist_wheel for pandas-datareader ... [?25ldone
[?25h  Stored in directory: /tmp/pip-ephem-wheel-cache-2vzkkcqq/wheels/5f/4c/e3/f9e4e92cb4498744a91d53d37b840fb1959c286bf660740040
Successfully built pandas-datareader


In [4]:
! pip install pandas-datareader --upgrade

Requirement already up-to-date: pandas-datareader in /home/nbuser/.local/lib/python2.7/site-packages (0.7.0)


In [9]:
! pip3 install tiingo

Collecting tiingo
  Downloading https://files.pythonhosted.org/packages/bd/49/5c8f99477a1642079f0f411604b6359bdc8b6b657ff1e0104b2eca4c9f30/tiingo-0.8.0-py2.py3-none-any.whl
Installing collected packages: tiingo
Successfully installed tiingo-0.8.0


In [15]:
!pip3 install fix_yahoo_finance

Collecting fix_yahoo_finance
  Using cached https://files.pythonhosted.org/packages/0a/96/d44330e427f5368cb8abd25997b72956a31b52073d285c4d5cd56e5fdc17/fix-yahoo-finance-0.0.22.tar.gz
Collecting multitasking (from fix_yahoo_finance)
  Using cached https://files.pythonhosted.org/packages/ac/1a/0750416c5e3683d170757e423f097fdf78ceb9ccdc65658b24341664e53e/multitasking-0.0.7.tar.gz
Building wheels for collected packages: fix-yahoo-finance, multitasking
  Running setup.py bdist_wheel for fix-yahoo-finance ... [?25ldone
[?25h  Stored in directory: /home/nbuser/.cache/pip/wheels/2c/ca/ce/218a19aaecf63fd74c75d6a6772b1a799fa05826d8762bfd83
  Running setup.py bdist_wheel for multitasking ... [?25ldone
[?25h  Stored in directory: /home/nbuser/.cache/pip/wheels/41/e4/48/af808a1c57f43f104042abdaf80fa623ab213ca0268ba4189c
Successfully built fix-yahoo-finance multitasking
Installing collected packages: multitasking, fix-yahoo-finance
[33m  The script sample is installed in '/home/nbuser/.local/bi

In [8]:
! pip3 install quandl

Collecting quandl
  Downloading https://files.pythonhosted.org/packages/d3/70/c05e132321ae2375f26428e7097230b3b735a31d6b78077d5f15b3fb0e6b/Quandl-3.4.2-py2.py3-none-any.whl
Collecting inflection>=0.3.1 (from quandl)
  Downloading https://files.pythonhosted.org/packages/d5/35/a6eb45b4e2356fe688b21570864d4aa0d0a880ce387defe9c589112077f8/inflection-0.3.1.tar.gz
Collecting more-itertools (from quandl)
[?25l  Downloading https://files.pythonhosted.org/packages/79/b1/eace304ef66bd7d3d8b2f78cc374b73ca03bc53664d78151e9df3b3996cc/more_itertools-4.3.0-py3-none-any.whl (48kB)
[K    100% |████████████████████████████████| 51kB 273kB/s eta 0:00:01
Building wheels for collected packages: inflection
  Running setup.py bdist_wheel for inflection ... [?25ldone
[?25h  Stored in directory: /home/nbuser/.cache/pip/wheels/9f/5a/d3/6fc3bf6516d2a3eb7e18f9f28b472110b59325f3f258fe9211
Successfully built inflection
Installing collected packages: inflection, more-itertools, quandl
Successfully installed infl

In [172]:
import matplotlib.pyplot as plt
import quandl
data = quandl.get("SPY", start_date="2018-01-01", end_date="2018-01-01", api_key= "your key")
data.Close.plot()
plt.show()

SyntaxError: Invalid format used for Quandl database code.         The correct format is: `DATABASE_CODE/DATASET_CODE` (<string>)