## Intro to the Working with Dates and Times Module

In [143]:
import pandas as pd
import datetime as dt

## Review of Python's datetime Module

In [144]:
#dt.date(yyyy, MM, dd)
dt.date(2016, 4, 12) # April 12th 2016
someday = dt.date(2010, 1, 20) # January 1st 2010

In [145]:
someday.year
someday.month
someday.day

20

Datetime must have a time, or will be set to midnight

In [146]:
dt.datetime(2010, 1, 20)

datetime.datetime(2010, 1, 20, 0, 0)

In [147]:
dt.datetime(2010, 1, 10, 8, 13, 57)

datetime.datetime(2010, 1, 10, 8, 13, 57)

In the afternoon, we will need the military time

In [148]:
str(dt.datetime(2010, 1, 10, 17, 13, 57))

'2010-01-10 17:13:57'

In [149]:
sometime = dt.datetime(2010, 1, 10, 17, 13, 57)

In [150]:
sometime.year
sometime.month
sometime.day
sometime.hour
sometime.minute
sometime.second

57

## The pandas Timestamp Object

In [151]:
pd.Timestamp('2010-01-10')
pd.Timestamp('2015/01/31')
pd.Timestamp('01/01/2015')
pd.Timestamp('2014, 11, 04')
pd.Timestamp('19/12/2015') # Becareful with these two formats
pd.Timestamp('12/19/2015') #
pd.Timestamp('4/3/2000') # or this could happen

Timestamp('2000-04-03 00:00:00')

In [152]:
pd.Timestamp('2010-01-10 08:35:15')
pd.Timestamp('2010-01-10 6:13:15 PM')

Timestamp('2010-01-10 18:13:15')

In [153]:
pd.Timestamp(dt.date(2015,1,1))

Timestamp('2015-01-01 00:00:00')

In [154]:
pd.Timestamp(dt.datetime(2020,3,14, 21, 34, 33))

Timestamp('2020-03-14 21:34:33')

## The pandas DateTimeIndex Object

In [155]:
dates =["2016-01-02", "2016-03-12", "2009-09-07"]
pd.DatetimeIndex(dates)

DatetimeIndex(['2016-01-02', '2016-03-12', '2009-09-07'], dtype='datetime64[ns]', freq=None)

In [156]:
dates =["2016/01/02", "2016/03/12", "2009/09/07"]
pd.DatetimeIndex(dates)

DatetimeIndex(['2016-01-02', '2016-03-12', '2009-09-07'], dtype='datetime64[ns]', freq=None)

In [157]:
dates = [dt.date(2016, 1, 10), dt.date(1994, 6, 14), dt.date(2003, 12, 29)]
dt_index = pd.DatetimeIndex(dates)

In [158]:
values = [100, 200, 300]
pd.Series(data = values, index = dt_index)

2016-01-10    100
1994-06-14    200
2003-12-29    300
dtype: int64

## The pd.to_datetime() Method

In [159]:
pd.to_datetime("2001-04-19")
pd.to_datetime(dt.date(2015,1, 1))
pd.to_datetime(dt.datetime(2015, 1, 1, 14, 25, 20))
pd.to_datetime(("2015-01-03", "2014/02/08", "2016", "May 4th, 1996"))

DatetimeIndex(['2015-01-03', '2014-02-08', '2016-01-01', '1996-05-04'], dtype='datetime64[ns]', freq=None)

In [160]:
times = pd.Series(["2015-01-03", "2014/02/08", "2016", "May 4th, 1996"])
times

0       2015-01-03
1       2014/02/08
2             2016
3    May 4th, 1996
dtype: object

Convert dates to datetime, maintaning the same format (yyyy-MM-dd)

In [161]:
pd.to_datetime(times)

0   2015-01-03
1   2014-02-08
2   2016-01-01
3   1996-05-04
dtype: datetime64[ns]

In [162]:
dates = pd.Series(["July 5th, 1996", "10/04/1992", "Hello", "2015-02-31"])
dates

0    July 5th, 1996
1        10/04/1992
2             Hello
3        2015-02-31
dtype: object

Parameter that raise errors if there is a incompatible data

So we use coerce to convert to datetime, and everything that is not datetime will be NaT (Not a Time)

In [163]:
pd.to_datetime(dates, errors= "coerce")

0   1996-07-05
1   1992-10-04
2          NaT
3          NaT
dtype: datetime64[ns]

UNIX TIME IN DATETIME

In [164]:
pd.to_datetime([1577836800, 1798761600,1830297600, 1861920000], unit="s")

DatetimeIndex(['2020-01-01', '2027-01-01', '2028-01-01', '2029-01-01'], dtype='datetime64[ns]', freq=None)

## Create Range of Dates with the pd.date_range Method - Part I

Freq = "d" = DAY

In [165]:
times =  pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "D")

In [166]:
type(times)

pandas.core.indexes.datetimes.DatetimeIndex

In [167]:
type(times[0])

pandas._libs.tslibs.timestamps.Timestamp

In [168]:
pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "2D") #increment of 2days

DatetimeIndex(['2016-01-01', '2016-01-03', '2016-01-05', '2016-01-07',
               '2016-01-09'],
              dtype='datetime64[ns]', freq='2D')

In [169]:
pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "B") # B will be just business days

DatetimeIndex(['2016-01-01', '2016-01-04', '2016-01-05', '2016-01-06',
               '2016-01-07', '2016-01-08'],
              dtype='datetime64[ns]', freq='B')

In [170]:
pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "W-FRI") # Just friday of each week

DatetimeIndex(['2016-01-01', '2016-01-08'], dtype='datetime64[ns]', freq='W-FRI')

In [171]:
pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "H") # H -> Every single hour

DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 01:00:00',
               '2016-01-01 02:00:00', '2016-01-01 03:00:00',
               '2016-01-01 04:00:00', '2016-01-01 05:00:00',
               '2016-01-01 06:00:00', '2016-01-01 07:00:00',
               '2016-01-01 08:00:00', '2016-01-01 09:00:00',
               ...
               '2016-01-09 15:00:00', '2016-01-09 16:00:00',
               '2016-01-09 17:00:00', '2016-01-09 18:00:00',
               '2016-01-09 19:00:00', '2016-01-09 20:00:00',
               '2016-01-09 21:00:00', '2016-01-09 22:00:00',
               '2016-01-09 23:00:00', '2016-01-10 00:00:00'],
              dtype='datetime64[ns]', length=217, freq='H')

In [172]:
pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "12h") # 12h frequency

DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 12:00:00',
               '2016-01-02 00:00:00', '2016-01-02 12:00:00',
               '2016-01-03 00:00:00', '2016-01-03 12:00:00',
               '2016-01-04 00:00:00', '2016-01-04 12:00:00',
               '2016-01-05 00:00:00', '2016-01-05 12:00:00',
               '2016-01-06 00:00:00', '2016-01-06 12:00:00',
               '2016-01-07 00:00:00', '2016-01-07 12:00:00',
               '2016-01-08 00:00:00', '2016-01-08 12:00:00',
               '2016-01-09 00:00:00', '2016-01-09 12:00:00',
               '2016-01-10 00:00:00'],
              dtype='datetime64[ns]', freq='12H')

In [173]:
pd.date_range(start = "2016-01-01", end = "2017-01-01", freq = "M") # Last day of each month

DatetimeIndex(['2016-01-31', '2016-02-29', '2016-03-31', '2016-04-30',
               '2016-05-31', '2016-06-30', '2016-07-31', '2016-08-31',
               '2016-09-30', '2016-10-31', '2016-11-30', '2016-12-31'],
              dtype='datetime64[ns]', freq='M')

In [174]:
pd.date_range(start = "2016-01-01", end = "2017-01-01", freq = "MS") # First day of each month

DatetimeIndex(['2016-01-01', '2016-02-01', '2016-03-01', '2016-04-01',
               '2016-05-01', '2016-06-01', '2016-07-01', '2016-08-01',
               '2016-09-01', '2016-10-01', '2016-11-01', '2016-12-01',
               '2017-01-01'],
              dtype='datetime64[ns]', freq='MS')

## Create Range of Dates with the pd.date_range Method - Part II

Periods will be the number of timestamps we want to generate

In [175]:
pd.date_range(start= "2012-09-09", periods= 25, freq="D")

DatetimeIndex(['2012-09-09', '2012-09-10', '2012-09-11', '2012-09-12',
               '2012-09-13', '2012-09-14', '2012-09-15', '2012-09-16',
               '2012-09-17', '2012-09-18', '2012-09-19', '2012-09-20',
               '2012-09-21', '2012-09-22', '2012-09-23', '2012-09-24',
               '2012-09-25', '2012-09-26', '2012-09-27', '2012-09-28',
               '2012-09-29', '2012-09-30', '2012-10-01', '2012-10-02',
               '2012-10-03'],
              dtype='datetime64[ns]', freq='D')

In [176]:
len(pd.date_range(start= "2012-09-09", periods= 25, freq="D"))

25

In [177]:
pd.date_range(start= "2012-09-09", periods= 50, freq="B")

DatetimeIndex(['2012-09-10', '2012-09-11', '2012-09-12', '2012-09-13',
               '2012-09-14', '2012-09-17', '2012-09-18', '2012-09-19',
               '2012-09-20', '2012-09-21', '2012-09-24', '2012-09-25',
               '2012-09-26', '2012-09-27', '2012-09-28', '2012-10-01',
               '2012-10-02', '2012-10-03', '2012-10-04', '2012-10-05',
               '2012-10-08', '2012-10-09', '2012-10-10', '2012-10-11',
               '2012-10-12', '2012-10-15', '2012-10-16', '2012-10-17',
               '2012-10-18', '2012-10-19', '2012-10-22', '2012-10-23',
               '2012-10-24', '2012-10-25', '2012-10-26', '2012-10-29',
               '2012-10-30', '2012-10-31', '2012-11-01', '2012-11-02',
               '2012-11-05', '2012-11-06', '2012-11-07', '2012-11-08',
               '2012-11-09', '2012-11-12', '2012-11-13', '2012-11-14',
               '2012-11-15', '2012-11-16'],
              dtype='datetime64[ns]', freq='B')

In [178]:
pd.date_range(start= "2012-09-09", periods= 50, freq="W")

DatetimeIndex(['2012-09-09', '2012-09-16', '2012-09-23', '2012-09-30',
               '2012-10-07', '2012-10-14', '2012-10-21', '2012-10-28',
               '2012-11-04', '2012-11-11', '2012-11-18', '2012-11-25',
               '2012-12-02', '2012-12-09', '2012-12-16', '2012-12-23',
               '2012-12-30', '2013-01-06', '2013-01-13', '2013-01-20',
               '2013-01-27', '2013-02-03', '2013-02-10', '2013-02-17',
               '2013-02-24', '2013-03-03', '2013-03-10', '2013-03-17',
               '2013-03-24', '2013-03-31', '2013-04-07', '2013-04-14',
               '2013-04-21', '2013-04-28', '2013-05-05', '2013-05-12',
               '2013-05-19', '2013-05-26', '2013-06-02', '2013-06-09',
               '2013-06-16', '2013-06-23', '2013-06-30', '2013-07-07',
               '2013-07-14', '2013-07-21', '2013-07-28', '2013-08-04',
               '2013-08-11', '2013-08-18'],
              dtype='datetime64[ns]', freq='W-SUN')

In [179]:
pd.date_range(start= "2012-09-09", periods= 50, freq="W-TUE")

DatetimeIndex(['2012-09-11', '2012-09-18', '2012-09-25', '2012-10-02',
               '2012-10-09', '2012-10-16', '2012-10-23', '2012-10-30',
               '2012-11-06', '2012-11-13', '2012-11-20', '2012-11-27',
               '2012-12-04', '2012-12-11', '2012-12-18', '2012-12-25',
               '2013-01-01', '2013-01-08', '2013-01-15', '2013-01-22',
               '2013-01-29', '2013-02-05', '2013-02-12', '2013-02-19',
               '2013-02-26', '2013-03-05', '2013-03-12', '2013-03-19',
               '2013-03-26', '2013-04-02', '2013-04-09', '2013-04-16',
               '2013-04-23', '2013-04-30', '2013-05-07', '2013-05-14',
               '2013-05-21', '2013-05-28', '2013-06-04', '2013-06-11',
               '2013-06-18', '2013-06-25', '2013-07-02', '2013-07-09',
               '2013-07-16', '2013-07-23', '2013-07-30', '2013-08-06',
               '2013-08-13', '2013-08-20'],
              dtype='datetime64[ns]', freq='W-TUE')

In [180]:
pd.date_range(start= "2012-09-09", periods= 50, freq="6h")

DatetimeIndex(['2012-09-09 00:00:00', '2012-09-09 06:00:00',
               '2012-09-09 12:00:00', '2012-09-09 18:00:00',
               '2012-09-10 00:00:00', '2012-09-10 06:00:00',
               '2012-09-10 12:00:00', '2012-09-10 18:00:00',
               '2012-09-11 00:00:00', '2012-09-11 06:00:00',
               '2012-09-11 12:00:00', '2012-09-11 18:00:00',
               '2012-09-12 00:00:00', '2012-09-12 06:00:00',
               '2012-09-12 12:00:00', '2012-09-12 18:00:00',
               '2012-09-13 00:00:00', '2012-09-13 06:00:00',
               '2012-09-13 12:00:00', '2012-09-13 18:00:00',
               '2012-09-14 00:00:00', '2012-09-14 06:00:00',
               '2012-09-14 12:00:00', '2012-09-14 18:00:00',
               '2012-09-15 00:00:00', '2012-09-15 06:00:00',
               '2012-09-15 12:00:00', '2012-09-15 18:00:00',
               '2012-09-16 00:00:00', '2012-09-16 06:00:00',
               '2012-09-16 12:00:00', '2012-09-16 18:00:00',
               '2012-09-

## Create Range of Dates with the pd.date_range Method - Part III

Start at the end date and, will move until the beginning that will be the number of periods + our parameters

In [181]:
pd.date_range(end = "1999-12-31", periods= 20, freq= "D")

DatetimeIndex(['1999-12-12', '1999-12-13', '1999-12-14', '1999-12-15',
               '1999-12-16', '1999-12-17', '1999-12-18', '1999-12-19',
               '1999-12-20', '1999-12-21', '1999-12-22', '1999-12-23',
               '1999-12-24', '1999-12-25', '1999-12-26', '1999-12-27',
               '1999-12-28', '1999-12-29', '1999-12-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='D')

In [182]:
pd.date_range(end = "1999-12-31", periods= 40, freq= "B")

DatetimeIndex(['1999-11-08', '1999-11-09', '1999-11-10', '1999-11-11',
               '1999-11-12', '1999-11-15', '1999-11-16', '1999-11-17',
               '1999-11-18', '1999-11-19', '1999-11-22', '1999-11-23',
               '1999-11-24', '1999-11-25', '1999-11-26', '1999-11-29',
               '1999-11-30', '1999-12-01', '1999-12-02', '1999-12-03',
               '1999-12-06', '1999-12-07', '1999-12-08', '1999-12-09',
               '1999-12-10', '1999-12-13', '1999-12-14', '1999-12-15',
               '1999-12-16', '1999-12-17', '1999-12-20', '1999-12-21',
               '1999-12-22', '1999-12-23', '1999-12-24', '1999-12-27',
               '1999-12-28', '1999-12-29', '1999-12-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='B')

In [183]:
pd.date_range(end = "1999-12-31", periods= 40, freq= "W-SUN")

DatetimeIndex(['1999-03-28', '1999-04-04', '1999-04-11', '1999-04-18',
               '1999-04-25', '1999-05-02', '1999-05-09', '1999-05-16',
               '1999-05-23', '1999-05-30', '1999-06-06', '1999-06-13',
               '1999-06-20', '1999-06-27', '1999-07-04', '1999-07-11',
               '1999-07-18', '1999-07-25', '1999-08-01', '1999-08-08',
               '1999-08-15', '1999-08-22', '1999-08-29', '1999-09-05',
               '1999-09-12', '1999-09-19', '1999-09-26', '1999-10-03',
               '1999-10-10', '1999-10-17', '1999-10-24', '1999-10-31',
               '1999-11-07', '1999-11-14', '1999-11-21', '1999-11-28',
               '1999-12-05', '1999-12-12', '1999-12-19', '1999-12-26'],
              dtype='datetime64[ns]', freq='W-SUN')

In [184]:
pd.date_range(end = "1999-12-31", periods= 53, freq= "MS") # MS -> Month Start

DatetimeIndex(['1995-08-01', '1995-09-01', '1995-10-01', '1995-11-01',
               '1995-12-01', '1996-01-01', '1996-02-01', '1996-03-01',
               '1996-04-01', '1996-05-01', '1996-06-01', '1996-07-01',
               '1996-08-01', '1996-09-01', '1996-10-01', '1996-11-01',
               '1996-12-01', '1997-01-01', '1997-02-01', '1997-03-01',
               '1997-04-01', '1997-05-01', '1997-06-01', '1997-07-01',
               '1997-08-01', '1997-09-01', '1997-10-01', '1997-11-01',
               '1997-12-01', '1998-01-01', '1998-02-01', '1998-03-01',
               '1998-04-01', '1998-05-01', '1998-06-01', '1998-07-01',
               '1998-08-01', '1998-09-01', '1998-10-01', '1998-11-01',
               '1998-12-01', '1999-01-01', '1999-02-01', '1999-03-01',
               '1999-04-01', '1999-05-01', '1999-06-01', '1999-07-01',
               '1999-08-01', '1999-09-01', '1999-10-01', '1999-11-01',
               '1999-12-01'],
              dtype='datetime64[ns]', freq='MS'

In [185]:
pd.date_range(end = "1999-12-31", periods= 53, freq= "7h") # MS -> Month Start

DatetimeIndex(['1999-12-15 20:00:00', '1999-12-16 03:00:00',
               '1999-12-16 10:00:00', '1999-12-16 17:00:00',
               '1999-12-17 00:00:00', '1999-12-17 07:00:00',
               '1999-12-17 14:00:00', '1999-12-17 21:00:00',
               '1999-12-18 04:00:00', '1999-12-18 11:00:00',
               '1999-12-18 18:00:00', '1999-12-19 01:00:00',
               '1999-12-19 08:00:00', '1999-12-19 15:00:00',
               '1999-12-19 22:00:00', '1999-12-20 05:00:00',
               '1999-12-20 12:00:00', '1999-12-20 19:00:00',
               '1999-12-21 02:00:00', '1999-12-21 09:00:00',
               '1999-12-21 16:00:00', '1999-12-21 23:00:00',
               '1999-12-22 06:00:00', '1999-12-22 13:00:00',
               '1999-12-22 20:00:00', '1999-12-23 03:00:00',
               '1999-12-23 10:00:00', '1999-12-23 17:00:00',
               '1999-12-24 00:00:00', '1999-12-24 07:00:00',
               '1999-12-24 14:00:00', '1999-12-24 21:00:00',
               '1999-12-

## The .dt Acessor

In [186]:
bunch_of_dates = pd.date_range(start= "2000-01-01", end= "2010-12-31", freq= "24D")

In [187]:
s = pd.Series(bunch_of_dates)
s.head(3)

0   2000-01-01
1   2000-01-25
2   2000-02-18
dtype: datetime64[ns]

In [188]:
s.dt.day
s.dt.month
s.dt.year
s.dt.weekday

0      5
1      1
2      4
3      0
4      3
      ..
163    4
164    0
165    3
166    6
167    2
Length: 168, dtype: int64

In [189]:
mask = s.dt.is_quarter_end
s[mask]

57   2003-09-30
dtype: datetime64[ns]

In [190]:
mask = s.dt.is_month_start
s[mask]

0     2000-01-01
19    2001-04-01
38    2002-07-01
104   2006-11-01
109   2007-03-01
137   2009-01-01
142   2009-05-01
dtype: datetime64[ns]

## Import Financial Dataset with pandas_datareader Library

In [191]:
import pandas as pd
import datetime as dt
from pandas_datareader import data

In [192]:
stocks = data.DataReader(name= "MSFT", data_source= "yahoo", start="2010-01-01", end= "2020-12-31") # Microsoft stock history - Getting data from Yahoo Finances
stocks.head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.855659
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.863365
2010-01-06,31.08,30.52,30.879999,30.77,58182400.0,23.716921


In [193]:
stocks.values

array([[3.11000004e+01, 3.05900002e+01, 3.06200008e+01, 3.09500008e+01,
        3.84091000e+07, 2.38556595e+01],
       [3.11000004e+01, 3.06399994e+01, 3.08500004e+01, 3.09599991e+01,
        4.97496000e+07, 2.38633652e+01],
       [3.10799999e+01, 3.05200005e+01, 3.08799992e+01, 3.07700005e+01,
        5.81824000e+07, 2.37169209e+01],
       ...,
       [2.27179993e+02, 2.23580002e+02, 2.26309998e+02, 2.24149994e+02,
        1.74032000e+07, 2.21828018e+02],
       [2.25630005e+02, 2.21470001e+02, 2.25229996e+02, 2.21679993e+02,
        2.02723000e+07, 2.19383621e+02],
       [2.23000000e+02, 2.19679993e+02, 2.21699997e+02, 2.22419998e+02,
        2.09421000e+07, 2.20115967e+02]])

In [194]:
stocks.columns

Index(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], dtype='object')

In [195]:
stocks.index

DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
               '2010-01-08', '2010-01-11', '2010-01-12', '2010-01-13',
               '2010-01-14', '2010-01-15',
               ...
               '2020-12-17', '2020-12-18', '2020-12-21', '2020-12-22',
               '2020-12-23', '2020-12-24', '2020-12-28', '2020-12-29',
               '2020-12-30', '2020-12-31'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

## Selecting Rows from a DataFrame with a DatetimeIndex

In [196]:
stocks.loc["2010-01-04"]

High         3.110000e+01
Low          3.059000e+01
Open         3.062000e+01
Close        3.095000e+01
Volume       3.840910e+07
Adj Close    2.385566e+01
Name: 2010-01-04 00:00:00, dtype: float64

In [197]:
stocks.loc[pd.Timestamp("2010-01-04")]

High         3.110000e+01
Low          3.059000e+01
Open         3.062000e+01
Close        3.095000e+01
Volume       3.840910e+07
Adj Close    2.385566e+01
Name: 2010-01-04 00:00:00, dtype: float64

In [198]:
stocks.iloc[0]
stocks.iloc[500]
stocks.iloc[-1]

High         2.230000e+02
Low          2.196800e+02
Open         2.217000e+02
Close        2.224200e+02
Volume       2.094210e+07
Adj Close    2.201160e+02
Name: 2020-12-31 00:00:00, dtype: float64

In [199]:
stocks.loc[[pd.Timestamp("2010-01-04"), pd.Timestamp("2010-01-05")]]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.855659
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.863365


The two lines below do the same thing

In [200]:
stocks.loc["2013-10-01":"2013-10-07"]
stocks.truncate(before= "2013-10-01", after="2013-10-07")

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-10-01,33.610001,33.299999,33.349998,33.580002,36718700.0,28.518585
2013-10-02,34.029999,33.290001,33.360001,33.919998,46946800.0,28.807343
2013-10-03,34.0,33.419998,33.880001,33.860001,38703800.0,28.75638
2013-10-04,33.990002,33.619999,33.689999,33.880001,33008100.0,28.773371
2013-10-07,33.709999,33.200001,33.599998,33.299999,35069300.0,28.280783


In [201]:
stocks.iloc[1000:1005] # iloc has a exclusive "after" parameter

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-12-23,36.889999,36.549999,36.810001,36.619999,25128700.0,31.336239
2013-12-24,37.169998,36.639999,36.720001,37.080002,14243000.0,31.729858
2013-12-26,37.490002,37.169998,37.200001,37.439999,17612800.0,32.037922
2013-12-27,37.619999,37.169998,37.580002,37.290001,14563000.0,31.909571
2013-12-30,37.380001,36.900002,37.220001,37.290001,16290500.0,31.909571


In [202]:
pd.date_range(start= "1991-04-12", end= "2020-12-31", freq= pd.DateOffset(years = 1))

DatetimeIndex(['1991-04-12', '1992-04-12', '1993-04-12', '1994-04-12',
               '1995-04-12', '1996-04-12', '1997-04-12', '1998-04-12',
               '1999-04-12', '2000-04-12', '2001-04-12', '2002-04-12',
               '2003-04-12', '2004-04-12', '2005-04-12', '2006-04-12',
               '2007-04-12', '2008-04-12', '2009-04-12', '2010-04-12',
               '2011-04-12', '2012-04-12', '2013-04-12', '2014-04-12',
               '2015-04-12', '2016-04-12', '2017-04-12', '2018-04-12',
               '2019-04-12', '2020-04-12'],
              dtype='datetime64[ns]', freq='<DateOffset: years=1>')

In [203]:
birthdays = stocks.index

In [204]:
birthdays_stocks = stocks.index.isin(birthdays)

In [205]:
stocks[birthdays_stocks]
stocks.loc[birthdays_stocks] # prefered way to aim a subset of stocks

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.100000,30.590000,30.620001,30.950001,38409100.0,23.855659
2010-01-05,31.100000,30.639999,30.850000,30.959999,49749600.0,23.863365
2010-01-06,31.080000,30.520000,30.879999,30.770000,58182400.0,23.716921
2010-01-07,30.700001,30.190001,30.629999,30.450001,50559700.0,23.470266
2010-01-08,30.879999,30.240000,30.280001,30.660000,51197400.0,23.632133
...,...,...,...,...,...,...
2020-12-24,223.610001,221.199997,221.419998,222.750000,10550600.0,220.442551
2020-12-28,226.029999,223.020004,224.449997,224.960007,17933500.0,222.629639
2020-12-29,227.179993,223.580002,226.309998,224.149994,17403200.0,221.828018
2020-12-30,225.630005,221.470001,225.229996,221.679993,20272300.0,219.383621


## Timestamp Object Attributes and Methods

In [206]:
stocks = data.DataReader(name= "MSFT", data_source= "yahoo", start="2010-01-01", end= "2020-12-31") # Microsoft stock history - Getting data from Yahoo Finances
stocks.head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.855658
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.863358
2010-01-06,31.08,30.52,30.879999,30.77,58182400.0,23.716925


In [207]:
someday = stocks.index[500]

In [208]:
someday.astimezone

<bound method Timestamp.tz_convert of Timestamp('2011-12-27 00:00:00')>

In [209]:
someday.month
someday.day
someday.year
someday.is_month_start
someday.is_quarter_end

False

In [210]:
someday.month_name()
someday.day_name()

'Tuesday'

In [211]:
stocks.index.day_name()

Index(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Monday',
       'Tuesday', 'Wednesday', 'Thursday', 'Friday',
       ...
       'Thursday', 'Friday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday',
       'Monday', 'Tuesday', 'Wednesday', 'Thursday'],
      dtype='object', name='Date', length=2769)

Adding a column with the days of the week names corresponding to each date.

In [212]:
stocks.insert(0, "Day of the week",stocks.index.day_name())

In [213]:
stocks

Unnamed: 0_level_0,Day of the week,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-04,Monday,31.100000,30.590000,30.620001,30.950001,38409100.0,23.855658
2010-01-05,Tuesday,31.100000,30.639999,30.850000,30.959999,49749600.0,23.863358
2010-01-06,Wednesday,31.080000,30.520000,30.879999,30.770000,58182400.0,23.716925
2010-01-07,Thursday,30.700001,30.190001,30.629999,30.450001,50559700.0,23.470263
2010-01-08,Friday,30.879999,30.240000,30.280001,30.660000,51197400.0,23.632133
...,...,...,...,...,...,...,...
2020-12-24,Thursday,223.610001,221.199997,221.419998,222.750000,10550600.0,220.442566
2020-12-28,Monday,226.029999,223.020004,224.449997,224.960007,17933500.0,222.629654
2020-12-29,Tuesday,227.179993,223.580002,226.309998,224.149994,17403200.0,221.828033
2020-12-30,Wednesday,225.630005,221.470001,225.229996,221.679993,20272300.0,219.383621


Adding a Column showing if is the day is at the start of the month or not

In [214]:
stocks.insert(1, "Is Start of Month", stocks.index.is_month_start)

In [215]:
stocks

Unnamed: 0_level_0,Day of the week,Is Start of Month,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-01-04,Monday,False,31.100000,30.590000,30.620001,30.950001,38409100.0,23.855658
2010-01-05,Tuesday,False,31.100000,30.639999,30.850000,30.959999,49749600.0,23.863358
2010-01-06,Wednesday,False,31.080000,30.520000,30.879999,30.770000,58182400.0,23.716925
2010-01-07,Thursday,False,30.700001,30.190001,30.629999,30.450001,50559700.0,23.470263
2010-01-08,Friday,False,30.879999,30.240000,30.280001,30.660000,51197400.0,23.632133
...,...,...,...,...,...,...,...,...
2020-12-24,Thursday,False,223.610001,221.199997,221.419998,222.750000,10550600.0,220.442566
2020-12-28,Monday,False,226.029999,223.020004,224.449997,224.960007,17933500.0,222.629654
2020-12-29,Tuesday,False,227.179993,223.580002,226.309998,224.149994,17403200.0,221.828033
2020-12-30,Wednesday,False,225.630005,221.470001,225.229996,221.679993,20272300.0,219.383621


In [216]:
stocks[stocks["Is Start of Month"]]

Unnamed: 0_level_0,Day of the week,Is Start of Month,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-02-01,Monday,True,28.480000,27.920000,28.389999,28.410000,85931100.0,21.897877
2010-03-01,Monday,True,29.049999,28.530001,28.770000,29.020000,43805400.0,22.472652
2010-04-01,Thursday,True,29.540001,28.620001,29.350000,29.160000,74768100.0,22.581062
2010-06-01,Tuesday,True,26.309999,25.520000,25.530001,25.889999,76152400.0,20.139292
2010-07-01,Thursday,True,23.320000,22.730000,23.090000,23.160000,92239400.0,18.015690
...,...,...,...,...,...,...,...,...
2020-06-01,Monday,True,183.000000,181.460007,182.539993,182.830002,22622400.0,180.028412
2020-07-01,Wednesday,True,206.350006,201.770004,203.139999,204.699997,32061200.0,201.563278
2020-09-01,Tuesday,True,227.449997,224.429993,225.509995,227.270004,25725500.0,224.328445
2020-10-01,Thursday,True,213.990005,211.320007,213.490005,212.460007,27158400.0,209.710114


## The pd.dateOffset Object

In [217]:
stocks = data.DataReader(name= "MSFT", data_source= "yahoo", start="2010-01-01", end= "2020-12-31") # Microsoft stock history - Getting data from Yahoo Finances
stocks.head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.855659
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.863363
2010-01-06,31.08,30.52,30.879999,30.77,58182400.0,23.716919


Adding a certain time of days to the values of the DataFrame

In [218]:
stocks["High"] + 5 # Can be done
#stocks.index + 5 # Do we mean 5 years/months/dates/minutes? Pandas doesn't know how to respond

# The two options below will work normally
stocks.index + pd.DateOffset(days = 5)
stocks.index - pd.DateOffset(days = 5)

#stocks.index = stocks.index - pd.DateOffset(days = 5)

DatetimeIndex(['2009-12-30', '2009-12-31', '2010-01-01', '2010-01-02',
               '2010-01-03', '2010-01-06', '2010-01-07', '2010-01-08',
               '2010-01-09', '2010-01-10',
               ...
               '2020-12-12', '2020-12-13', '2020-12-16', '2020-12-17',
               '2020-12-18', '2020-12-19', '2020-12-23', '2020-12-24',
               '2020-12-25', '2020-12-26'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [219]:
stocks.index + pd.DateOffset(weeks = 2)
stocks.index - pd.DateOffset(weeks = 3)

stocks.index + pd.DateOffset(months = 2)
stocks.index - pd.DateOffset(months = 3)

stocks.index + pd.DateOffset(years = 3)
stocks.index - pd.DateOffset(years = 3)

# We can mix the parameters
stocks.index + pd.DateOffset(years = 1, months = 3, days = 10, hours = 6, minutes = 2, seconds = 40)

DatetimeIndex(['2011-04-14 06:02:40', '2011-04-15 06:02:40',
               '2011-04-16 06:02:40', '2011-04-17 06:02:40',
               '2011-04-18 06:02:40', '2011-04-21 06:02:40',
               '2011-04-22 06:02:40', '2011-04-23 06:02:40',
               '2011-04-24 06:02:40', '2011-04-25 06:02:40',
               ...
               '2022-03-27 06:02:40', '2022-03-28 06:02:40',
               '2022-03-31 06:02:40', '2022-04-01 06:02:40',
               '2022-04-02 06:02:40', '2022-04-03 06:02:40',
               '2022-04-07 06:02:40', '2022-04-08 06:02:40',
               '2022-04-09 06:02:40', '2022-04-10 06:02:40'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

## Timeseries Offsets

In [220]:
stocks = data.DataReader(name= "MSFT", data_source= "yahoo", start="2010-01-01", end= "2020-12-31")
stocks.head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.855659
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.863361
2010-01-06,31.08,30.52,30.879999,30.77,58182400.0,23.716915


In [221]:
stocks.index + pd.tseries.offsets.MonthEnd()
stocks.index - pd.tseries.offsets.MonthEnd()

stocks.index + pd.tseries.offsets.MonthBegin()
stocks.index - pd.tseries.offsets.MonthBegin()

DatetimeIndex(['2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01',
               ...
               '2020-12-01', '2020-12-01', '2020-12-01', '2020-12-01',
               '2020-12-01', '2020-12-01', '2020-12-01', '2020-12-01',
               '2020-12-01', '2020-12-01'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [222]:
stocks.tail(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-12-29,227.179993,223.580002,226.309998,224.149994,17403200.0,221.828033
2020-12-30,225.630005,221.470001,225.229996,221.679993,20272300.0,219.383636
2020-12-31,223.0,219.679993,221.699997,222.419998,20942100.0,220.115967


Implicit importing pandas.tseries offsets

In [223]:
from pandas.tseries import offsets

In [224]:
stocks.index + offsets.MonthEnd()
stocks.index - offsets.MonthEnd()

stocks.index + offsets.MonthBegin()
stocks.index - offsets.MonthBegin()

DatetimeIndex(['2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01',
               ...
               '2020-12-01', '2020-12-01', '2020-12-01', '2020-12-01',
               '2020-12-01', '2020-12-01', '2020-12-01', '2020-12-01',
               '2020-12-01', '2020-12-01'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [225]:
stocks.tail(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-12-29,227.179993,223.580002,226.309998,224.149994,17403200.0,221.828033
2020-12-30,225.630005,221.470001,225.229996,221.679993,20272300.0,219.383636
2020-12-31,223.0,219.679993,221.699997,222.419998,20942100.0,220.115967


Business Month end

In [226]:
stocks.index + offsets.BMonthEnd()
stocks.index - offsets.BMonthEnd()

DatetimeIndex(['2009-12-31', '2009-12-31', '2009-12-31', '2009-12-31',
               '2009-12-31', '2009-12-31', '2009-12-31', '2009-12-31',
               '2009-12-31', '2009-12-31',
               ...
               '2020-11-30', '2020-11-30', '2020-11-30', '2020-11-30',
               '2020-11-30', '2020-11-30', '2020-11-30', '2020-11-30',
               '2020-11-30', '2020-11-30'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [227]:
stocks.index + offsets.YearBegin()

DatetimeIndex(['2011-01-01', '2011-01-01', '2011-01-01', '2011-01-01',
               '2011-01-01', '2011-01-01', '2011-01-01', '2011-01-01',
               '2011-01-01', '2011-01-01',
               ...
               '2021-01-01', '2021-01-01', '2021-01-01', '2021-01-01',
               '2021-01-01', '2021-01-01', '2021-01-01', '2021-01-01',
               '2021-01-01', '2021-01-01'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

## The Timedelta Object

In [228]:
time_a = pd.Timestamp("2020-03-31")
time_b = pd.Timestamp("2020-03-20")

time_a - time_b

Timedelta('11 days 00:00:00')

In [229]:
time_a = pd.Timestamp("2020-03-31 04:35:16PM")
time_b = pd.Timestamp("2020-03-20 02:13:49PM")

time_a - time_b

Timedelta('11 days 02:21:27')

In [230]:
time_b - time_a

Timedelta('-12 days +21:38:33')

Time in passing days

In [231]:
pd.Timedelta(days = 3)

Timedelta('3 days 00:00:00')

In [232]:
time_a + pd.Timedelta(days = 3)

Timestamp('2020-04-03 16:35:16')

In [233]:
pd.Timedelta(days = 3, hours = 12, minutes = 45)
pd.Timedelta(weeks = 3, hours = 12, minutes = 45, days = 8)
# Years will not work here
#pd.Timedelta(days = 3, hours = 12, minutes = 45, year=1)

Timedelta('29 days 12:45:00')

In [234]:
pd.Timedelta("5 minutes")
pd.Timedelta("6 hours 12 minutes")
pd.Timedelta("14 days 6 hours 12 minutes 39 seconds")

Timedelta('14 days 06:12:39')

## Timedeltas in a Dataset

In [235]:
shipping = pd.read_csv("ecommerce.csv", index_col= "ID", parse_dates= ["order_date", "delivery_date"])
shipping.head(3)

Unnamed: 0_level_0,order_date,delivery_date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1998-05-24,1999-02-05
2,1992-04-22,1998-03-06
4,1991-02-10,1992-08-26


In [236]:
shipping["delivery_time"] = shipping.delivery_date - shipping.order_date

In [237]:
shipping.head(3)

Unnamed: 0_level_0,order_date,delivery_date,delivery_time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1998-05-24,1999-02-05,257 days
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days


In [238]:
shipping["twice_as_long"] = shipping.delivery_date - shipping.delivery_time

In [239]:
shipping.head(3)

Unnamed: 0_level_0,order_date,delivery_date,delivery_time,twice_as_long
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1998-05-24,1999-02-05,257 days,1998-05-24
2,1992-04-22,1998-03-06,2144 days,1992-04-22
4,1991-02-10,1992-08-26,563 days,1991-02-10


In [240]:
shipping.dtypes

order_date        datetime64[ns]
delivery_date     datetime64[ns]
delivery_time    timedelta64[ns]
twice_as_long     datetime64[ns]
dtype: object

In [241]:
mask = shipping.delivery_time > "365 days"
shipping[mask]

Unnamed: 0_level_0,order_date,delivery_date,delivery_time,twice_as_long
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,1992-04-22,1998-03-06,2144 days,1992-04-22
4,1991-02-10,1992-08-26,563 days,1991-02-10
5,1992-07-21,1997-11-20,1948 days,1992-07-21
7,1993-09-02,1998-06-10,1742 days,1993-09-02
9,1990-01-25,1994-10-02,1711 days,1990-01-25
...,...,...,...,...
986,1990-12-10,1992-12-16,737 days,1990-12-10
990,1991-06-24,1996-02-02,1684 days,1991-06-24
991,1991-09-09,1998-03-30,2394 days,1991-09-09
993,1990-11-16,1998-04-27,2719 days,1990-11-16


In [242]:
mask = shipping.delivery_time < "365 days"
shipping[mask]

Unnamed: 0_level_0,order_date,delivery_date,delivery_time,twice_as_long
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1998-05-24,1999-02-05,257 days,1998-05-24
8,1993-06-10,1993-11-11,154 days,1993-06-10
19,1998-05-10,1998-05-19,9 days,1998-05-10
30,1998-10-22,1999-01-11,81 days,1998-10-22
46,1995-09-05,1996-07-19,318 days,1995-09-05
...,...,...,...,...
954,1993-08-08,1994-01-23,168 days,1993-08-08
969,1996-09-24,1996-11-16,53 days,1996-09-24
975,1997-06-18,1997-11-02,137 days,1997-06-18
985,1995-07-26,1996-06-18,328 days,1995-07-26


In [243]:
mask = shipping.delivery_time == "3423 days"
shipping[mask]

Unnamed: 0_level_0,order_date,delivery_date,delivery_time,twice_as_long
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
130,1990-04-02,1999-08-16,3423 days,1990-04-02


In [244]:
shipping.delivery_time.max()

Timedelta('3583 days 00:00:00')

In [245]:
shipping.delivery_time.min()

Timedelta('8 days 00:00:00')