In [1]:
import pandas as pd
import datetime as dt

# Review of Python's datetime Module:

In [5]:
someday = dt.date(2022, 5, 23)

In [10]:
someday.year
someday.month
someday.day

23

In [13]:
dt.datetime(2022, 5, 23, 18, 0, 0)

datetime.datetime(2022, 5, 23, 18, 0)

In [14]:
str(dt.datetime(2022, 5, 23, 18, 0, 0))

'2022-05-23 18:00:00'

In [15]:
sometime = dt.datetime(2022, 5, 23, 18, 0, 0)

In [17]:
sometime.year
sometime.month
sometime.day 

23

#  The pandas Timestamp Object:

In [21]:
pd.Timestamp("2015-03-31")
pd.Timestamp("2015/03/31")
pd.Timestamp("2015.03.31")

Timestamp('2015-03-31 00:00:00')

In [7]:
pd.Timestamp("1/1/2015")
#pd.Timestamp("19/12/2020")
pd.Timestamp("12/19/2020")
pd.Timestamp("2021-08-08 08:35:15")
pd.Timestamp("2021-03-08 6:13:19 PM")

Timestamp('2021-03-08 18:13:19')

In [8]:
pd.Timestamp(dt.date(2015, 1, 1))

Timestamp('2015-01-01 00:00:00')

In [10]:
pd.Timestamp(dt.datetime(2000, 2, 3, 21, 35, 22))

Timestamp('2000-02-03 21:35:22')

# The pandas DateTimeIndex Object:

In [11]:
date = ["2016-01-01", "2016-04-12", "2009-09-07"]
pd.DatetimeIndex(date)

DatetimeIndex(['2016-01-01', '2016-04-12', '2009-09-07'], dtype='datetime64[ns]', freq=None)

In [12]:
date = ["2016/01/01", "2016/04/12", "2009/09/07"]
pd.DatetimeIndex(date)

DatetimeIndex(['2016-01-01', '2016-04-12', '2009-09-07'], dtype='datetime64[ns]', freq=None)

In [15]:
dates = [dt.date(2016, 1, 10), dt.date(1994, 6, 13), dt.date(2003, 12, 29)]
dtIndex = pd.DatetimeIndex(dates)

In [17]:
values = [100, 200, 300]
pd.Series(data = values, index = dtIndex)

2016-01-10    100
1994-06-13    200
2003-12-29    300
dtype: int64

#  The pd.to_datetime() Method:

In [None]:
# Two date time method will be used, directly called upon Pandas Library.
# It is basically a convenience method to conver an existing object into panda's time related object.

In [24]:
pd.to_datetime("2001-04-19")
pd.to_datetime(dt.date(2015, 1, 1))
pd.to_datetime(dt.datetime(2015, 1, 1, 14, 35, 20))
pd.to_datetime(["2015-01-03", "2014/02/08", "2016", "July 4th, 1996"])

DatetimeIndex(['2015-01-03', '2014-02-08', '2016-01-01', '1996-07-04'], dtype='datetime64[ns]', freq=None)

In [28]:
times = pd.Series(["2015-01-03", "2014/02/08", "2016", "July 4th, 1996"])
times

0        2015-01-03
1        2014/02/08
2              2016
3    July 4th, 1996
dtype: object

In [29]:
pd.to_datetime(times)

0   2015-01-03
1   2014-02-08
2   2016-01-01
3   1996-07-04
dtype: datetime64[ns]

In [37]:
dates = pd.Series(["July 4th, 1996", "10/04/1991", "Hello", "2015-02-31"])
dates

0    July 4th, 1996
1        10/04/1991
2             Hello
3        2015-02-31
dtype: object

In [38]:
pd.to_datetime(dates, errors = "coerce")

0   1996-07-04
1   1991-10-04
2          NaT
3          NaT
dtype: datetime64[ns]

### Coerce: gives NA for non date/time values.

### UNIX: a way to store time in seconds.

In [40]:
# UNIX times = within brackets
# unit parameter = "s" = seconds
pd.to_datetime([1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit = "s")

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05',
               '2012-10-12 18:15:05'],
              dtype='datetime64[ns]', freq=None)

# Create Range of Dates with the pd.date_range() Method, Part 1

In [43]:
# At least 2 of tjose parameters are required for this function to work:
# freq = frequency is set to D = days.

times = pd.date_range(start ="2016-01-01", end = "2016-01-10", freq = "D")
times[0]

Timestamp('2016-01-01 00:00:00', freq='D')

In [48]:
# B = Business Days: 

pd.date_range(start ="2016-01-01", end = "2016-01-10", freq = "B")

DatetimeIndex(['2016-01-01', '2016-01-04', '2016-01-05', '2016-01-06',
               '2016-01-07', '2016-01-08'],
              dtype='datetime64[ns]', freq='B')

In [50]:
# W = Week
pd.date_range(start ="2016-01-01", end = "2016-01-15", freq = "W")

#This is only considereng Sundays here:

DatetimeIndex(['2016-01-03', '2016-01-10'], dtype='datetime64[ns]', freq='W-SUN')

In [52]:
pd.date_range(start ="2016-01-01", end = "2016-01-15", freq = "W-FRI")


DatetimeIndex(['2016-01-01', '2016-01-08', '2016-01-15'], dtype='datetime64[ns]', freq='W-FRI')

In [53]:
pd.date_range(start ="2016-01-01", end = "2016-01-15", freq = "H")

DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 01:00:00',
               '2016-01-01 02:00:00', '2016-01-01 03:00:00',
               '2016-01-01 04:00:00', '2016-01-01 05:00:00',
               '2016-01-01 06:00:00', '2016-01-01 07:00:00',
               '2016-01-01 08:00:00', '2016-01-01 09:00:00',
               ...
               '2016-01-14 15:00:00', '2016-01-14 16:00:00',
               '2016-01-14 17:00:00', '2016-01-14 18:00:00',
               '2016-01-14 19:00:00', '2016-01-14 20:00:00',
               '2016-01-14 21:00:00', '2016-01-14 22:00:00',
               '2016-01-14 23:00:00', '2016-01-15 00:00:00'],
              dtype='datetime64[ns]', length=337, freq='H')

In [55]:
pd.date_range(start ="2016-01-01", end = "2016-01-15", freq = "6H")

DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 06:00:00',
               '2016-01-01 12:00:00', '2016-01-01 18:00:00',
               '2016-01-02 00:00:00', '2016-01-02 06:00:00',
               '2016-01-02 12:00:00', '2016-01-02 18:00:00',
               '2016-01-03 00:00:00', '2016-01-03 06:00:00',
               '2016-01-03 12:00:00', '2016-01-03 18:00:00',
               '2016-01-04 00:00:00', '2016-01-04 06:00:00',
               '2016-01-04 12:00:00', '2016-01-04 18:00:00',
               '2016-01-05 00:00:00', '2016-01-05 06:00:00',
               '2016-01-05 12:00:00', '2016-01-05 18:00:00',
               '2016-01-06 00:00:00', '2016-01-06 06:00:00',
               '2016-01-06 12:00:00', '2016-01-06 18:00:00',
               '2016-01-07 00:00:00', '2016-01-07 06:00:00',
               '2016-01-07 12:00:00', '2016-01-07 18:00:00',
               '2016-01-08 00:00:00', '2016-01-08 06:00:00',
               '2016-01-08 12:00:00', '2016-01-08 18:00:00',
               '2016-01-

In [57]:
pd.date_range(start ="2016-01-01", end = "2016-01-15", freq = "M")

DatetimeIndex([], dtype='datetime64[ns]', freq='M')

In [58]:
pd.date_range(start ="2016-01-01", end = "2016-12-15", freq = "M")

DatetimeIndex(['2016-01-31', '2016-02-29', '2016-03-31', '2016-04-30',
               '2016-05-31', '2016-06-30', '2016-07-31', '2016-08-31',
               '2016-09-30', '2016-10-31', '2016-11-30'],
              dtype='datetime64[ns]', freq='M')

In [60]:
pd.date_range(start ="2016-01-01", end = "2016-12-15", freq = "MS") # MS = Month Start

DatetimeIndex(['2016-01-01', '2016-02-01', '2016-03-01', '2016-04-01',
               '2016-05-01', '2016-06-01', '2016-07-01', '2016-08-01',
               '2016-09-01', '2016-10-01', '2016-11-01', '2016-12-01'],
              dtype='datetime64[ns]', freq='MS')

In [62]:
pd.date_range(start ="2016-01-01", end = "2050-01-01", freq = "A") # A = Annual

DatetimeIndex(['2016-12-31', '2017-12-31', '2018-12-31', '2019-12-31',
               '2020-12-31', '2021-12-31', '2022-12-31', '2023-12-31',
               '2024-12-31', '2025-12-31', '2026-12-31', '2027-12-31',
               '2028-12-31', '2029-12-31', '2030-12-31', '2031-12-31',
               '2032-12-31', '2033-12-31', '2034-12-31', '2035-12-31',
               '2036-12-31', '2037-12-31', '2038-12-31', '2039-12-31',
               '2040-12-31', '2041-12-31', '2042-12-31', '2043-12-31',
               '2044-12-31', '2045-12-31', '2046-12-31', '2047-12-31',
               '2048-12-31', '2049-12-31'],
              dtype='datetime64[ns]', freq='A-DEC')

# Create Range of Dates with the pd.date_range() Method, Part 2

### Here we will look at Start and Period

In [63]:
pd.date_range(start = "2012-09-09", periods = 25, freq = "D") 
#periods = # of timesstamps we want to generate.

DatetimeIndex(['2012-09-09', '2012-09-10', '2012-09-11', '2012-09-12',
               '2012-09-13', '2012-09-14', '2012-09-15', '2012-09-16',
               '2012-09-17', '2012-09-18', '2012-09-19', '2012-09-20',
               '2012-09-21', '2012-09-22', '2012-09-23', '2012-09-24',
               '2012-09-25', '2012-09-26', '2012-09-27', '2012-09-28',
               '2012-09-29', '2012-09-30', '2012-10-01', '2012-10-02',
               '2012-10-03'],
              dtype='datetime64[ns]', freq='D')

In [64]:
pd.date_range(start = "2012-09-09", periods = 50, freq = "D")

DatetimeIndex(['2012-09-09', '2012-09-10', '2012-09-11', '2012-09-12',
               '2012-09-13', '2012-09-14', '2012-09-15', '2012-09-16',
               '2012-09-17', '2012-09-18', '2012-09-19', '2012-09-20',
               '2012-09-21', '2012-09-22', '2012-09-23', '2012-09-24',
               '2012-09-25', '2012-09-26', '2012-09-27', '2012-09-28',
               '2012-09-29', '2012-09-30', '2012-10-01', '2012-10-02',
               '2012-10-03', '2012-10-04', '2012-10-05', '2012-10-06',
               '2012-10-07', '2012-10-08', '2012-10-09', '2012-10-10',
               '2012-10-11', '2012-10-12', '2012-10-13', '2012-10-14',
               '2012-10-15', '2012-10-16', '2012-10-17', '2012-10-18',
               '2012-10-19', '2012-10-20', '2012-10-21', '2012-10-22',
               '2012-10-23', '2012-10-24', '2012-10-25', '2012-10-26',
               '2012-10-27', '2012-10-28'],
              dtype='datetime64[ns]', freq='D')

In [65]:
pd.date_range(start = "2012-09-09", periods = 50, freq = "B")

DatetimeIndex(['2012-09-10', '2012-09-11', '2012-09-12', '2012-09-13',
               '2012-09-14', '2012-09-17', '2012-09-18', '2012-09-19',
               '2012-09-20', '2012-09-21', '2012-09-24', '2012-09-25',
               '2012-09-26', '2012-09-27', '2012-09-28', '2012-10-01',
               '2012-10-02', '2012-10-03', '2012-10-04', '2012-10-05',
               '2012-10-08', '2012-10-09', '2012-10-10', '2012-10-11',
               '2012-10-12', '2012-10-15', '2012-10-16', '2012-10-17',
               '2012-10-18', '2012-10-19', '2012-10-22', '2012-10-23',
               '2012-10-24', '2012-10-25', '2012-10-26', '2012-10-29',
               '2012-10-30', '2012-10-31', '2012-11-01', '2012-11-02',
               '2012-11-05', '2012-11-06', '2012-11-07', '2012-11-08',
               '2012-11-09', '2012-11-12', '2012-11-13', '2012-11-14',
               '2012-11-15', '2012-11-16'],
              dtype='datetime64[ns]', freq='B')

In [66]:
pd.date_range(start = "2012-09-09", periods = 50, freq = "W")

DatetimeIndex(['2012-09-09', '2012-09-16', '2012-09-23', '2012-09-30',
               '2012-10-07', '2012-10-14', '2012-10-21', '2012-10-28',
               '2012-11-04', '2012-11-11', '2012-11-18', '2012-11-25',
               '2012-12-02', '2012-12-09', '2012-12-16', '2012-12-23',
               '2012-12-30', '2013-01-06', '2013-01-13', '2013-01-20',
               '2013-01-27', '2013-02-03', '2013-02-10', '2013-02-17',
               '2013-02-24', '2013-03-03', '2013-03-10', '2013-03-17',
               '2013-03-24', '2013-03-31', '2013-04-07', '2013-04-14',
               '2013-04-21', '2013-04-28', '2013-05-05', '2013-05-12',
               '2013-05-19', '2013-05-26', '2013-06-02', '2013-06-09',
               '2013-06-16', '2013-06-23', '2013-06-30', '2013-07-07',
               '2013-07-14', '2013-07-21', '2013-07-28', '2013-08-04',
               '2013-08-11', '2013-08-18'],
              dtype='datetime64[ns]', freq='W-SUN')

In [68]:
pd.date_range(start = "2012-09-09", periods = 50, freq = "W-Tue")

DatetimeIndex(['2012-09-11', '2012-09-18', '2012-09-25', '2012-10-02',
               '2012-10-09', '2012-10-16', '2012-10-23', '2012-10-30',
               '2012-11-06', '2012-11-13', '2012-11-20', '2012-11-27',
               '2012-12-04', '2012-12-11', '2012-12-18', '2012-12-25',
               '2013-01-01', '2013-01-08', '2013-01-15', '2013-01-22',
               '2013-01-29', '2013-02-05', '2013-02-12', '2013-02-19',
               '2013-02-26', '2013-03-05', '2013-03-12', '2013-03-19',
               '2013-03-26', '2013-04-02', '2013-04-09', '2013-04-16',
               '2013-04-23', '2013-04-30', '2013-05-07', '2013-05-14',
               '2013-05-21', '2013-05-28', '2013-06-04', '2013-06-11',
               '2013-06-18', '2013-06-25', '2013-07-02', '2013-07-09',
               '2013-07-16', '2013-07-23', '2013-07-30', '2013-08-06',
               '2013-08-13', '2013-08-20'],
              dtype='datetime64[ns]', freq='W-TUE')

In [69]:
pd.date_range(start = "2012-09-09", periods = 50, freq = "MS")

DatetimeIndex(['2012-10-01', '2012-11-01', '2012-12-01', '2013-01-01',
               '2013-02-01', '2013-03-01', '2013-04-01', '2013-05-01',
               '2013-06-01', '2013-07-01', '2013-08-01', '2013-09-01',
               '2013-10-01', '2013-11-01', '2013-12-01', '2014-01-01',
               '2014-02-01', '2014-03-01', '2014-04-01', '2014-05-01',
               '2014-06-01', '2014-07-01', '2014-08-01', '2014-09-01',
               '2014-10-01', '2014-11-01', '2014-12-01', '2015-01-01',
               '2015-02-01', '2015-03-01', '2015-04-01', '2015-05-01',
               '2015-06-01', '2015-07-01', '2015-08-01', '2015-09-01',
               '2015-10-01', '2015-11-01', '2015-12-01', '2016-01-01',
               '2016-02-01', '2016-03-01', '2016-04-01', '2016-05-01',
               '2016-06-01', '2016-07-01', '2016-08-01', '2016-09-01',
               '2016-10-01', '2016-11-01'],
              dtype='datetime64[ns]', freq='MS')

In [70]:
pd.date_range(start = "2012-09-09", periods = 50, freq = "A")

DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31', '2015-12-31',
               '2016-12-31', '2017-12-31', '2018-12-31', '2019-12-31',
               '2020-12-31', '2021-12-31', '2022-12-31', '2023-12-31',
               '2024-12-31', '2025-12-31', '2026-12-31', '2027-12-31',
               '2028-12-31', '2029-12-31', '2030-12-31', '2031-12-31',
               '2032-12-31', '2033-12-31', '2034-12-31', '2035-12-31',
               '2036-12-31', '2037-12-31', '2038-12-31', '2039-12-31',
               '2040-12-31', '2041-12-31', '2042-12-31', '2043-12-31',
               '2044-12-31', '2045-12-31', '2046-12-31', '2047-12-31',
               '2048-12-31', '2049-12-31', '2050-12-31', '2051-12-31',
               '2052-12-31', '2053-12-31', '2054-12-31', '2055-12-31',
               '2056-12-31', '2057-12-31', '2058-12-31', '2059-12-31',
               '2060-12-31', '2061-12-31'],
              dtype='datetime64[ns]', freq='A-DEC')

In [72]:
pd.date_range(start = "2012-09-09", periods = 50, freq = "6H")

DatetimeIndex(['2012-09-09 00:00:00', '2012-09-09 06:00:00',
               '2012-09-09 12:00:00', '2012-09-09 18:00:00',
               '2012-09-10 00:00:00', '2012-09-10 06:00:00',
               '2012-09-10 12:00:00', '2012-09-10 18:00:00',
               '2012-09-11 00:00:00', '2012-09-11 06:00:00',
               '2012-09-11 12:00:00', '2012-09-11 18:00:00',
               '2012-09-12 00:00:00', '2012-09-12 06:00:00',
               '2012-09-12 12:00:00', '2012-09-12 18:00:00',
               '2012-09-13 00:00:00', '2012-09-13 06:00:00',
               '2012-09-13 12:00:00', '2012-09-13 18:00:00',
               '2012-09-14 00:00:00', '2012-09-14 06:00:00',
               '2012-09-14 12:00:00', '2012-09-14 18:00:00',
               '2012-09-15 00:00:00', '2012-09-15 06:00:00',
               '2012-09-15 12:00:00', '2012-09-15 18:00:00',
               '2012-09-16 00:00:00', '2012-09-16 06:00:00',
               '2012-09-16 12:00:00', '2012-09-16 18:00:00',
               '2012-09-

# Create Range of Dates with the pd.date_range() Method, Part 3

## The "end" parameter and the "period" parameters:

In [77]:
pd.date_range(end = "1999-12-31", periods = 20, freq = "D")

DatetimeIndex(['1999-12-12', '1999-12-13', '1999-12-14', '1999-12-15',
               '1999-12-16', '1999-12-17', '1999-12-18', '1999-12-19',
               '1999-12-20', '1999-12-21', '1999-12-22', '1999-12-23',
               '1999-12-24', '1999-12-25', '1999-12-26', '1999-12-27',
               '1999-12-28', '1999-12-29', '1999-12-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='D')

In [79]:
pd.date_range(end = "1999-12-31", periods = 10, freq = "B")

DatetimeIndex(['1999-12-20', '1999-12-21', '1999-12-22', '1999-12-23',
               '1999-12-24', '1999-12-27', '1999-12-28', '1999-12-29',
               '1999-12-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='B')

In [80]:
pd.date_range(end = "1999-12-31", periods = 10, freq = "W-Sun")

DatetimeIndex(['1999-10-24', '1999-10-31', '1999-11-07', '1999-11-14',
               '1999-11-21', '1999-11-28', '1999-12-05', '1999-12-12',
               '1999-12-19', '1999-12-26'],
              dtype='datetime64[ns]', freq='W-SUN')

In [82]:
pd.date_range(end = "1999-12-31", periods = 10, freq = "W-fri")

DatetimeIndex(['1999-10-29', '1999-11-05', '1999-11-12', '1999-11-19',
               '1999-11-26', '1999-12-03', '1999-12-10', '1999-12-17',
               '1999-12-24', '1999-12-31'],
              dtype='datetime64[ns]', freq='W-FRI')

In [83]:
pd.date_range(end = "1999-12-31", periods = 10, freq = "m")

DatetimeIndex(['1999-03-31', '1999-04-30', '1999-05-31', '1999-06-30',
               '1999-07-31', '1999-08-31', '1999-09-30', '1999-10-31',
               '1999-11-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='M')

In [84]:
pd.date_range(end = "1999-12-31", periods = 23, freq = "W-Sun")

DatetimeIndex(['1999-07-25', '1999-08-01', '1999-08-08', '1999-08-15',
               '1999-08-22', '1999-08-29', '1999-09-05', '1999-09-12',
               '1999-09-19', '1999-09-26', '1999-10-03', '1999-10-10',
               '1999-10-17', '1999-10-24', '1999-10-31', '1999-11-07',
               '1999-11-14', '1999-11-21', '1999-11-28', '1999-12-05',
               '1999-12-12', '1999-12-19', '1999-12-26'],
              dtype='datetime64[ns]', freq='W-SUN')

In [85]:
pd.date_range(end = "1999-12-31", periods = 10, freq = "7H")

DatetimeIndex(['1999-12-28 09:00:00', '1999-12-28 16:00:00',
               '1999-12-28 23:00:00', '1999-12-29 06:00:00',
               '1999-12-29 13:00:00', '1999-12-29 20:00:00',
               '1999-12-30 03:00:00', '1999-12-30 10:00:00',
               '1999-12-30 17:00:00', '1999-12-31 00:00:00'],
              dtype='datetime64[ns]', freq='7H')

# The .dt Accessor

In [2]:
import pandas as pd
import datetime as dt

In [1]:
# similar to .str accessor.

In [4]:
bunch_of_dates = pd.date_range(start = "2000-01-01", end = "2010-12-31", freq = "24D")

In [7]:
s = pd.Series(bunch_of_dates)
s.head(3)

0   2000-01-01
1   2000-01-25
2   2000-02-18
dtype: datetime64[ns]

In [9]:
s.dt.day.head(3)

0     1
1    25
2    18
dtype: int64

In [17]:
s.dt.weekday.head(7)

0    5
1    1
2    4
3    0
4    3
5    6
6    2
dtype: int64

In [18]:
s.dt.day_name()

0       Saturday
1        Tuesday
2         Friday
3         Monday
4       Thursday
         ...    
163       Friday
164       Monday
165     Thursday
166       Sunday
167    Wednesday
Length: 168, dtype: object

In [19]:
s.dt.is_leap_year

0       True
1       True
2       True
3       True
4       True
       ...  
163    False
164    False
165    False
166    False
167    False
Length: 168, dtype: bool

In [20]:
mask = s.dt.is_quarter_start
s[mask]

0     2000-01-01
19    2001-04-01
38    2002-07-01
137   2009-01-01
dtype: datetime64[ns]

# Importing Financial Dataset with pandas_datareader Library

In [2]:
import pandas as pd
import datetime as dt
from pandas_datareader import data

In [6]:
stocks = data.DataReader(name = "MSFT", data_source= "yahoo", start="2020-01-01", end = "2022-01-01")
# name is Name of the company/stock name/stock symbol
# data_source is Yahoo
stocks.head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,160.729996,158.330002,158.779999,160.619995,22622100.0,157.289856
2020-01-03,159.949997,158.059998,158.320007,158.619995,21116200.0,155.331345
2020-01-06,159.100006,156.509995,157.080002,159.029999,20813700.0,155.732819


In [11]:
stocks.values
stocks.columns
stocks.index
stocks.axes

[DatetimeIndex(['2020-01-02', '2020-01-03', '2020-01-06', '2020-01-07',
                '2020-01-08', '2020-01-09', '2020-01-10', '2020-01-13',
                '2020-01-14', '2020-01-15',
                ...
                '2021-12-17', '2021-12-20', '2021-12-21', '2021-12-22',
                '2021-12-23', '2021-12-27', '2021-12-28', '2021-12-29',
                '2021-12-30', '2021-12-31'],
               dtype='datetime64[ns]', name='Date', length=505, freq=None),
 Index(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], dtype='object')]

# Selecting rows from a DataFrame with a DatetimeIndex

In [16]:
stocks = data.DataReader(name = "MSFT", data_source= "yahoo", start="2010-01-01", end = "2021-12-31")
stocks.head(2)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.855654
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.863365


In [20]:
stocks.loc["2010-01-04"]
stocks.loc[pd.Timestamp("2010-01-04")]

stocks.iloc[0]
stocks.iloc[500]
stocks.iloc[-2]

High         3.431300e+02
Low          3.388200e+02
Open         3.419100e+02
Close        3.393200e+02
Volume       1.599450e+07
Adj Close    3.386198e+02
Name: 2021-12-30 00:00:00, dtype: float64

In [21]:
# pUlling out multiple datas at a time:

In [23]:
stocks.loc[[pd.Timestamp("2010-01-04"), pd.Timestamp("2010-01-05")]]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.855654
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.863365


In [25]:
stocks.iloc[[10,15,30,1000]]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-19,31.24,30.68,30.75,31.1,46575700.0,23.971272
2010-01-26,29.85,29.09,29.200001,29.5,66639900.0,22.738024
2010-02-17,28.65,28.360001,28.530001,28.59,45882900.0,22.139666
2013-12-23,36.889999,36.549999,36.810001,36.619999,25128700.0,31.336239


In [29]:
stocks.loc["2013-10-01":"2013-10-07"]
stocks.truncate(before = "2013-10-01", after = "2013-10-07")


stocks.iloc[1000:1005]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-12-23,36.889999,36.549999,36.810001,36.619999,25128700.0,31.336239
2013-12-24,37.169998,36.639999,36.720001,37.080002,14243000.0,31.729874
2013-12-26,37.490002,37.169998,37.200001,37.439999,17612800.0,32.037922
2013-12-27,37.619999,37.169998,37.580002,37.290001,14563000.0,31.909567
2013-12-30,37.380001,36.900002,37.220001,37.290001,16290500.0,31.909567


### .loc[] is inclusive ---- includes the last row
### .iloc[] is exclusive ---- does not include the last row

In [31]:
# Find Microsoft stock on every birthday since 2010:

In [33]:
birthdays = pd.date_range(start = "1995-05-23", end = "2021-12-31", freq=pd.DateOffset(years = 1))

In [36]:
birthday_stocks = stocks.index.isin(birthdays)

In [39]:
stocks[birthday_stocks]
stocks.loc[birthday_stocks]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2011-05-23,24.25,24.030001,24.209999,24.17,52692500.0,19.255568
2012-05-23,29.4,28.639999,29.35,29.110001,65171000.0,23.823339
2013-05-23,34.549999,33.900002,34.23,34.150002,51102700.0,28.799728
2014-05-23,40.369999,40.0,40.369999,40.119999,18020000.0,34.832687
2016-05-23,50.68,49.98,50.599998,50.029999,26118700.0,45.823727
2017-05-23,68.75,68.379997,68.720001,68.68,15425800.0,64.479218
2018-05-23,98.730003,96.32,96.709999,98.660004,21251200.0,94.43383
2019-05-23,126.290001,124.739998,126.199997,126.18,23603800.0,122.736183


# Timestamp Object Atrributes and Methods:

In [40]:
stocks = data.DataReader(name = "MSFT", data_source= "yahoo", start="2010-01-01", end = "2021-12-31")
stocks.head(2)
output=None

In [42]:
# The Datetime Index simply consists of pandas Timestamp objects.
someday = stocks.index[500]

In [43]:
someday.month

12

In [45]:
someday.week

52

In [48]:
someday.is_month_start
someday.is_quarter_end
someday.is_quarter_start

False

In [49]:
someday.month_name()

'December'

In [50]:
stocks.index.day_name()

Index(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Monday',
       'Tuesday', 'Wednesday', 'Thursday', 'Friday',
       ...
       'Friday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Monday',
       'Tuesday', 'Wednesday', 'Thursday', 'Friday'],
      dtype='object', name='Date', length=3021)

In [51]:
stocks.insert(0, "Day of Week", stocks.index.day_name())

In [53]:
stocks.head(2)

Unnamed: 0_level_0,Day of Week,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-04,Monday,31.1,30.59,30.620001,30.950001,38409100.0,23.855659
2010-01-05,Tuesday,31.1,30.639999,30.85,30.959999,49749600.0,23.863358


In [57]:
stocks.index.is_month_start

stocks.insert(1, "Is start of Month?", stocks.index.is_month_start)
stocks.head(2)

Unnamed: 0_level_0,Day of Week,Is start of Month?,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-01-04,Monday,False,31.1,30.59,30.620001,30.950001,38409100.0,23.855659
2010-01-05,Tuesday,False,31.1,30.639999,30.85,30.959999,49749600.0,23.863358


In [61]:
stocks[stocks["Is start of Month?"]].head(2)

Unnamed: 0_level_0,Day of Week,Is start of Month?,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-02-01,Monday,True,28.48,27.92,28.389999,28.41,85931100.0,21.897875
2010-03-01,Monday,True,29.049999,28.530001,28.77,29.02,43805400.0,22.472649


# The pd.DateOffset Object

In [3]:
import pandas as pd
import datetime as dt
from pandas_datareader import data

stocks = data.DataReader(name = "MSFT", data_source= "yahoo", start="2010-01-01", end = "2021-12-31")
stocks.head(2)
output=None

### This allows to add or sub a set amount of time from every date in our date time index.

In [4]:
# Add 5 days to every date:

In [7]:
stocks["High"]+5

Date
2010-01-04     36.100000
2010-01-05     36.100000
2010-01-06     36.080000
2010-01-07     35.700001
2010-01-08     35.879999
                 ...    
2021-12-27    347.480011
2021-12-28    348.809998
2021-12-29    349.299988
2021-12-30    348.130005
2021-12-31    344.359985
Name: High, Length: 3021, dtype: float64

In [11]:
stocks.index + pd.DateOffset(days = 5)
stocks.index - pd.DateOffset(days = 5)

DatetimeIndex(['2009-12-30', '2009-12-31', '2010-01-01', '2010-01-02',
               '2010-01-03', '2010-01-06', '2010-01-07', '2010-01-08',
               '2010-01-09', '2010-01-10',
               ...
               '2021-12-12', '2021-12-15', '2021-12-16', '2021-12-17',
               '2021-12-18', '2021-12-22', '2021-12-23', '2021-12-24',
               '2021-12-25', '2021-12-26'],
              dtype='datetime64[ns]', name='Date', length=3021, freq=None)

In [21]:
stocks.index + pd.DateOffset(weeks = 2)
stocks.index - pd.DateOffset(weeks = 3)

stocks.index + pd.DateOffset(months = 3)
stocks.index - pd.DateOffset(months = 3)

stocks.index + pd.DateOffset(years = 1)
stocks.index - pd.DateOffset(years = 8)

stocks.index + pd.DateOffset(years = 1, months = 3, days =10, hours = 6, minutes =2)
stocks.index - pd.DateOffset(years = 1, months = 3, days =10, hours = 6, minutes =2)


DatetimeIndex(['2008-09-23 17:58:00', '2008-09-24 17:58:00',
               '2008-09-25 17:58:00', '2008-09-26 17:58:00',
               '2008-09-27 17:58:00', '2008-09-30 17:58:00',
               '2008-10-01 17:58:00', '2008-10-02 17:58:00',
               '2008-10-03 17:58:00', '2008-10-04 17:58:00',
               ...
               '2020-09-06 17:58:00', '2020-09-09 17:58:00',
               '2020-09-10 17:58:00', '2020-09-11 17:58:00',
               '2020-09-12 17:58:00', '2020-09-16 17:58:00',
               '2020-09-17 17:58:00', '2020-09-18 17:58:00',
               '2020-09-19 17:58:00', '2020-09-19 17:58:00'],
              dtype='datetime64[ns]', name='Date', length=3021, freq=None)

# Timeseries Offsets

In [22]:
 # if we want to add dynamic amount of time to each date/time.

In [23]:
# eg want to round each date to the end of the current month:

In [28]:
stocks = data.DataReader(name = "MSFT", data_source = "yahoo", start = "2010-01-01", end = "2020-12-31")
stocks.head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.855656
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.863363
2010-01-06,31.08,30.52,30.879999,30.77,58182400.0,23.716917


In [32]:
stocks.index + pd.tseries.offsets.MonthEnd()
stocks.index - pd.tseries.offsets.MonthEnd()

DatetimeIndex(['2009-12-31', '2009-12-31', '2009-12-31', '2009-12-31',
               '2009-12-31', '2009-12-31', '2009-12-31', '2009-12-31',
               '2009-12-31', '2009-12-31',
               ...
               '2020-11-30', '2020-11-30', '2020-11-30', '2020-11-30',
               '2020-11-30', '2020-11-30', '2020-11-30', '2020-11-30',
               '2020-11-30', '2020-11-30'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [33]:
stocks.tail(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-12-29,227.179993,223.580002,226.309998,224.149994,17403200.0,221.828049
2020-12-30,225.630005,221.470001,225.229996,221.679993,20272300.0,219.383621
2020-12-31,223.0,219.679993,221.699997,222.419998,20942100.0,220.115967


In [35]:
stocks.index + pd.tseries.offsets.MonthBegin()
stocks.index - pd.tseries.offsets.MonthBegin()

DatetimeIndex(['2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01',
               ...
               '2020-12-01', '2020-12-01', '2020-12-01', '2020-12-01',
               '2020-12-01', '2020-12-01', '2020-12-01', '2020-12-01',
               '2020-12-01', '2020-12-01'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [36]:
from pandas.tseries import offsets

In [37]:
stocks.index + offsets.MonthEnd()

DatetimeIndex(['2010-01-31', '2010-01-31', '2010-01-31', '2010-01-31',
               '2010-01-31', '2010-01-31', '2010-01-31', '2010-01-31',
               '2010-01-31', '2010-01-31',
               ...
               '2020-12-31', '2020-12-31', '2020-12-31', '2020-12-31',
               '2020-12-31', '2020-12-31', '2020-12-31', '2020-12-31',
               '2020-12-31', '2021-01-31'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [38]:
# looking for Business Month End:

In [39]:
stocks.index + offsets.BMonthEnd()

DatetimeIndex(['2010-01-29', '2010-01-29', '2010-01-29', '2010-01-29',
               '2010-01-29', '2010-01-29', '2010-01-29', '2010-01-29',
               '2010-01-29', '2010-01-29',
               ...
               '2020-12-31', '2020-12-31', '2020-12-31', '2020-12-31',
               '2020-12-31', '2020-12-31', '2020-12-31', '2020-12-31',
               '2020-12-31', '2021-01-29'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [43]:
stocks.index + offsets.YearEnd()
stocks.index - offsets.YearEnd()

stocks.index + offsets.YearBegin()
stocks.index - offsets.YearBegin()

DatetimeIndex(['2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01',
               ...
               '2020-01-01', '2020-01-01', '2020-01-01', '2020-01-01',
               '2020-01-01', '2020-01-01', '2020-01-01', '2020-01-01',
               '2020-01-01', '2020-01-01'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

# The Timedelta Object:

In [45]:
# TimeStamp object represents a given moment in time. 
# Something that we can plot on a calender.

In [48]:
time_a = pd.Timestamp("2020-03-31 04:35:16PM")
time_b = pd.Timestamp("2020-03-20 02:15:49AM")

time_a - time_b

Timedelta('11 days 14:19:27')

In [49]:
time_b - time_a

Timedelta('-12 days +09:40:33')

In [50]:
pd.Timedelta(days = 3)

Timedelta('3 days 00:00:00')

In [54]:
time_a + pd.Timedelta(days = 3)

Timestamp('2020-04-03 16:35:16')

In [58]:
pd.Timedelta(days = 3, minutes = 45)
pd.Timedelta(days = 3, hours = 12, minutes = 45)
pd.Timedelta(days = 3, hours = 12, minutes = 45, seconds = 20)
pd.Timedelta(weeks = 8, days = 3, hours = 12, minutes = 45)

Timedelta('59 days 12:45:00')

In [59]:
# Years function do not work here.

# The Timedeltas in a Dataset:

In [60]:
pd.read_csv("ecommerce.csv")

Unnamed: 0,ID,order_date,delivery_date
0,1,5/24/98,2/5/99
1,2,4/22/92,3/6/98
2,4,2/10/91,8/26/92
3,5,7/21/92,11/20/97
4,7,9/2/93,6/10/98
...,...,...,...
496,990,6/24/91,2/2/96
497,991,9/9/91,3/30/98
498,993,11/16/90,4/27/98
499,994,6/3/93,6/13/93


In [63]:
shipping = pd.read_csv("ecommerce.csv", index_col="ID", parse_dates=["order_date", "delivery_date"])
shipping.head(3)

Unnamed: 0_level_0,order_date,delivery_date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1998-05-24,1999-02-05
2,1992-04-22,1998-03-06
4,1991-02-10,1992-08-26


In [67]:
shipping["Delivery Time"] = shipping["delivery_date"] - shipping["order_date"]

In [68]:
shipping.head(3)

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1998-05-24,1999-02-05,257 days
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days


In [71]:
shipping["Twice As Long"] = shipping["delivery_date"] + shipping["Delivery Time"]
shipping.head(3)

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time,Twice As Long
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1998-05-24,1999-02-05,257 days,1999-10-20
2,1992-04-22,1998-03-06,2144 days,2004-01-18
4,1991-02-10,1992-08-26,563 days,1994-03-12


In [72]:
shipping.dtypes

order_date        datetime64[ns]
delivery_date     datetime64[ns]
Delivery Time    timedelta64[ns]
Twice As Long     datetime64[ns]
dtype: object

In [76]:
mask = shipping["Delivery Time"] > "365 days"
mask = shipping["Delivery Time"] > "2000 days"

In [77]:
shipping[mask]

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time,Twice As Long
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,1992-04-22,1998-03-06,2144 days,2004-01-18
10,1992-02-23,1998-12-30,2502 days,2005-11-05
20,1992-10-17,1998-10-06,2180 days,2004-09-24
23,1992-05-30,1999-08-15,2633 days,2006-10-30
32,1990-01-20,1998-07-24,3107 days,2007-01-25
...,...,...,...,...
958,1990-04-26,1997-06-29,2621 days,2004-09-01
972,1990-02-07,1995-11-05,2097 days,2001-08-02
984,1991-07-25,1999-02-09,2756 days,2006-08-27
991,1991-09-09,1998-03-30,2394 days,2004-10-18
