In [1]:
import pandas as pd 

# this will import the date time module 
import datetime as dt

## Review of Python's datetime Module

In [4]:
someday = dt.date(2020, 11, 3)

In [5]:
someday.year

2020

In [6]:
someday.month

11

In [7]:
someday.day

3

In [8]:
# we did not specify the time so it will default to the midnight 

dt.datetime(2010, 1, 10)

datetime.datetime(2010, 1, 10, 0, 0)

In [11]:
# this will show that it is 8AM
# minutes is the fifth argument 
# sixth argument is the seconds 

sometime = dt.datetime(2010, 1, 10, 8, 10, 17)

In [14]:
sometime.day
sometime.month
sometime.year
sometime.hour
sometime.minute
sometime.second

17

## The Pandas Timestamp Object

In [15]:
pd.Timestamp("2015-03-31")

Timestamp('2015-03-31 00:00:00')

In [16]:
pd.Timestamp("2015/03/31")

Timestamp('2015-03-31 00:00:00')

In [19]:
pd.Timestamp("2015, 3, 31")

Timestamp('2015-03-31 00:00:00')

In [20]:
pd.Timestamp("2021, 3, 8 6:13:29 PM")

Timestamp('2021-03-08 18:13:29')

## The pandas DateTimeIndex Object

#### this is a collection pandas datetime timestamps

In [21]:
# first it is going to date 

dates = ["2016-01-02", "2016-04-12", "2009-09-07"]
pd.DatetimeIndex(dates)

DatetimeIndex(['2016-01-02', '2016-04-12', '2009-09-07'], dtype='datetime64[ns]', freq=None)

In [2]:
# it is going to convert those date objects to pandas timestamp objects 

dates = [dt.date(2016, 1, 10), dt.date(1994, 6, 13), dt.date(2003, 12, 29)]
dtIndex = pd.DatetimeIndex(dates)

In [3]:
# setting the date time index as the index

values = [100, 200, 300]
pd.Series(data = values, index = dtIndex)

2016-01-10    100
1994-06-13    200
2003-12-29    300
dtype: int64

## The pd.to_datetime() Method

In [6]:
pd.to_datetime('2001-04-19')
pd.to_datetime(dt.date(2015, 1,1))
pd.to_datetime(dt.datetime(2015, 1, 1, 14, 35, 20))

Timestamp('2015-01-01 14:35:20')

In [7]:
# converts all of them into timestamps

pd.to_datetime(["2015-01-03", "2014/02/08", "2016", "July 4th, 1996"])

DatetimeIndex(['2015-01-03', '2014-02-08', '2016-01-01', '1996-07-04'], dtype='datetime64[ns]', freq=None)

In [10]:
# will default to a string type 

times = pd.Series(["2015-01-03", "2014/02/08", "2016", "July 4th, 1996"])
times

0        2015-01-03
1        2014/02/08
2              2016
3    July 4th, 1996
dtype: object

In [11]:
# the list will be of date type which is what we want 

pd.to_datetime(times)

0   2015-01-03
1   2014-02-08
2   2016-01-01
3   1996-07-04
dtype: datetime64[ns]

In [13]:
dates = pd.Series(["July 4th, 1996", "10/01/1991", "Hello", "2015-02-31"])
dates

0    July 4th, 1996
1        10/01/1991
2             Hello
3        2015-02-31
dtype: object

In [15]:
# NaT (not a time)

pd.to_datetime(dates, errors = "coerce")

0   1996-07-04
1   1991-10-01
2          NaT
3          NaT
dtype: datetime64[ns]

## Create Range of Dates with the pd.date_range() Method, Part 1 

In [17]:
# two of the other methods are needed in order for the method to work 
# freq = D means that the frequency is set to daily 

times = pd.date_range(start = "2016-01-01", end = '2016-01-10', freq = 'D')

In [18]:
type(times)

pandas.core.indexes.datetimes.DatetimeIndex

In [19]:
# first item in the DateTimeIndex 

times[0]

Timestamp('2016-01-01 00:00:00', freq='D')

In [21]:
# freq = '1D' means separate by one day 

pd.date_range(start = "2016-01-01", end = '2016-01-10', freq = '1D')

DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',
               '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',
               '2016-01-09', '2016-01-10'],
              dtype='datetime64[ns]', freq='D')

In [22]:
pd.date_range(start = "2016-01-01", end = '2016-01-10', freq = '2D')

DatetimeIndex(['2016-01-01', '2016-01-03', '2016-01-05', '2016-01-07',
               '2016-01-09'],
              dtype='datetime64[ns]', freq='2D')

In [23]:
# 'B' meand business days so it will exclude the weekends 

pd.date_range(start = "2016-01-01", end = '2016-01-10', freq = 'B')

DatetimeIndex(['2016-01-01', '2016-01-04', '2016-01-05', '2016-01-06',
               '2016-01-07', '2016-01-08'],
              dtype='datetime64[ns]', freq='B')

In [24]:
pd.date_range(start = "2016-01-01", end = '2016-01-15', freq = 'W')

DatetimeIndex(['2016-01-03', '2016-01-10'], dtype='datetime64[ns]', freq='W-SUN')

In [25]:
# counts the fridays per week 

pd.date_range(start = "2016-01-01", end = '2016-01-15', freq = 'W-FRI')

DatetimeIndex(['2016-01-01', '2016-01-08', '2016-01-15'], dtype='datetime64[ns]', freq='W-FRI')

In [27]:
pd.date_range(start = "2016-01-01", end = '2016-01-15', freq = 'H')
pd.date_range(start = "2016-01-01", end = '2016-01-15', freq = '6H')

DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 06:00:00',
               '2016-01-01 12:00:00', '2016-01-01 18:00:00',
               '2016-01-02 00:00:00', '2016-01-02 06:00:00',
               '2016-01-02 12:00:00', '2016-01-02 18:00:00',
               '2016-01-03 00:00:00', '2016-01-03 06:00:00',
               '2016-01-03 12:00:00', '2016-01-03 18:00:00',
               '2016-01-04 00:00:00', '2016-01-04 06:00:00',
               '2016-01-04 12:00:00', '2016-01-04 18:00:00',
               '2016-01-05 00:00:00', '2016-01-05 06:00:00',
               '2016-01-05 12:00:00', '2016-01-05 18:00:00',
               '2016-01-06 00:00:00', '2016-01-06 06:00:00',
               '2016-01-06 12:00:00', '2016-01-06 18:00:00',
               '2016-01-07 00:00:00', '2016-01-07 06:00:00',
               '2016-01-07 12:00:00', '2016-01-07 18:00:00',
               '2016-01-08 00:00:00', '2016-01-08 06:00:00',
               '2016-01-08 12:00:00', '2016-01-08 18:00:00',
               '2016-01-

In [28]:
# this will give me the last day of each of the moneths 

pd.date_range(start = "2016-01-01", end = '2016-12-31', freq = 'M')

DatetimeIndex(['2016-01-31', '2016-02-29', '2016-03-31', '2016-04-30',
               '2016-05-31', '2016-06-30', '2016-07-31', '2016-08-31',
               '2016-09-30', '2016-10-31', '2016-11-30', '2016-12-31'],
              dtype='datetime64[ns]', freq='M')

In [32]:
# first day of each month 

pd.date_range(start = "2016-01-01", end = '2017-01-01', freq = 'MS')

DatetimeIndex(['2016-01-01', '2016-02-01', '2016-03-01', '2016-04-01',
               '2016-05-01', '2016-06-01', '2016-07-01', '2016-08-01',
               '2016-09-01', '2016-10-01', '2016-11-01', '2016-12-01',
               '2017-01-01'],
              dtype='datetime64[ns]', freq='MS')

In [34]:
# the last day of each year 

pd.date_range(start = "2016-01-01", end = '2040-01-01', freq = 'A')

DatetimeIndex(['2016-12-31', '2017-12-31', '2018-12-31', '2019-12-31',
               '2020-12-31', '2021-12-31', '2022-12-31', '2023-12-31',
               '2024-12-31', '2025-12-31', '2026-12-31', '2027-12-31',
               '2028-12-31', '2029-12-31', '2030-12-31', '2031-12-31',
               '2032-12-31', '2033-12-31', '2034-12-31', '2035-12-31',
               '2036-12-31', '2037-12-31', '2038-12-31', '2039-12-31'],
              dtype='datetime64[ns]', freq='A-DEC')

## Create Range of Dates with the pd.date_range() Method, Part 2

In [37]:
# period refers to the number of results that we want to get 
# will show us exactly 25 results (25 days)

pd.date_range("2012-09-09", periods = 25, freq = 'D')

DatetimeIndex(['2012-09-09', '2012-09-10', '2012-09-11', '2012-09-12',
               '2012-09-13', '2012-09-14', '2012-09-15', '2012-09-16',
               '2012-09-17', '2012-09-18', '2012-09-19', '2012-09-20',
               '2012-09-21', '2012-09-22', '2012-09-23', '2012-09-24',
               '2012-09-25', '2012-09-26', '2012-09-27', '2012-09-28',
               '2012-09-29', '2012-09-30', '2012-10-01', '2012-10-02',
               '2012-10-03'],
              dtype='datetime64[ns]', freq='D')

In [38]:
# will show us 50 business days (excludes business days)

pd.date_range("2012-09-09", periods = 50, freq = 'B')

DatetimeIndex(['2012-09-10', '2012-09-11', '2012-09-12', '2012-09-13',
               '2012-09-14', '2012-09-17', '2012-09-18', '2012-09-19',
               '2012-09-20', '2012-09-21', '2012-09-24', '2012-09-25',
               '2012-09-26', '2012-09-27', '2012-09-28', '2012-10-01',
               '2012-10-02', '2012-10-03', '2012-10-04', '2012-10-05',
               '2012-10-08', '2012-10-09', '2012-10-10', '2012-10-11',
               '2012-10-12', '2012-10-15', '2012-10-16', '2012-10-17',
               '2012-10-18', '2012-10-19', '2012-10-22', '2012-10-23',
               '2012-10-24', '2012-10-25', '2012-10-26', '2012-10-29',
               '2012-10-30', '2012-10-31', '2012-11-01', '2012-11-02',
               '2012-11-05', '2012-11-06', '2012-11-07', '2012-11-08',
               '2012-11-09', '2012-11-12', '2012-11-13', '2012-11-14',
               '2012-11-15', '2012-11-16'],
              dtype='datetime64[ns]', freq='B')

In [41]:
# 50 dates separated by the weekly frequency

pd.date_range("2012-09-09", periods = 50, freq = 'W-TUE')

DatetimeIndex(['2012-09-11', '2012-09-18', '2012-09-25', '2012-10-02',
               '2012-10-09', '2012-10-16', '2012-10-23', '2012-10-30',
               '2012-11-06', '2012-11-13', '2012-11-20', '2012-11-27',
               '2012-12-04', '2012-12-11', '2012-12-18', '2012-12-25',
               '2013-01-01', '2013-01-08', '2013-01-15', '2013-01-22',
               '2013-01-29', '2013-02-05', '2013-02-12', '2013-02-19',
               '2013-02-26', '2013-03-05', '2013-03-12', '2013-03-19',
               '2013-03-26', '2013-04-02', '2013-04-09', '2013-04-16',
               '2013-04-23', '2013-04-30', '2013-05-07', '2013-05-14',
               '2013-05-21', '2013-05-28', '2013-06-04', '2013-06-11',
               '2013-06-18', '2013-06-25', '2013-07-02', '2013-07-09',
               '2013-07-16', '2013-07-23', '2013-07-30', '2013-08-06',
               '2013-08-13', '2013-08-20'],
              dtype='datetime64[ns]', freq='W-TUE')

## Create Range of Dates with the pd.date_range() Method, Part 3

In [44]:
# will start at the end point and move upwards on a daily frequency 

pd.date_range(end = "1999-12-31", periods = 20, freq = 'D')

DatetimeIndex(['1999-12-12', '1999-12-13', '1999-12-14', '1999-12-15',
               '1999-12-16', '1999-12-17', '1999-12-18', '1999-12-19',
               '1999-12-20', '1999-12-21', '1999-12-22', '1999-12-23',
               '1999-12-24', '1999-12-25', '1999-12-26', '1999-12-27',
               '1999-12-28', '1999-12-29', '1999-12-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='D')

In [46]:
pd.date_range(end = "1999-12-31", periods = 40, freq = 'B')

DatetimeIndex(['1999-11-08', '1999-11-09', '1999-11-10', '1999-11-11',
               '1999-11-12', '1999-11-15', '1999-11-16', '1999-11-17',
               '1999-11-18', '1999-11-19', '1999-11-22', '1999-11-23',
               '1999-11-24', '1999-11-25', '1999-11-26', '1999-11-29',
               '1999-11-30', '1999-12-01', '1999-12-02', '1999-12-03',
               '1999-12-06', '1999-12-07', '1999-12-08', '1999-12-09',
               '1999-12-10', '1999-12-13', '1999-12-14', '1999-12-15',
               '1999-12-16', '1999-12-17', '1999-12-20', '1999-12-21',
               '1999-12-22', '1999-12-23', '1999-12-24', '1999-12-27',
               '1999-12-28', '1999-12-29', '1999-12-30', '1999-12-31'],
              dtype='datetime64[ns]', freq='B')

In [47]:
pd.date_range(end = "1999-12-31", periods = 40, freq = 'W-SUN')

DatetimeIndex(['1999-03-28', '1999-04-04', '1999-04-11', '1999-04-18',
               '1999-04-25', '1999-05-02', '1999-05-09', '1999-05-16',
               '1999-05-23', '1999-05-30', '1999-06-06', '1999-06-13',
               '1999-06-20', '1999-06-27', '1999-07-04', '1999-07-11',
               '1999-07-18', '1999-07-25', '1999-08-01', '1999-08-08',
               '1999-08-15', '1999-08-22', '1999-08-29', '1999-09-05',
               '1999-09-12', '1999-09-19', '1999-09-26', '1999-10-03',
               '1999-10-10', '1999-10-17', '1999-10-24', '1999-10-31',
               '1999-11-07', '1999-11-14', '1999-11-21', '1999-11-28',
               '1999-12-05', '1999-12-12', '1999-12-19', '1999-12-26'],
              dtype='datetime64[ns]', freq='W-SUN')

## The .dt Accessor

In [49]:
bunch_of_dates = pd.date_range(start = '2000-01-01', end = '2010-12-31', freq = "24D")

In [51]:
s = pd.Series(bunch_of_dates)
s.head(3)

0   2000-01-01
1   2000-01-25
2   2000-02-18
dtype: datetime64[ns]

In [52]:
s.dt.day

0       1
1      25
2      18
3      13
4       6
       ..
163    17
164    11
165     4
166    28
167    22
Length: 168, dtype: int64

In [53]:
s.dt.month

0       1
1       1
2       2
3       3
4       4
       ..
163     9
164    10
165    11
166    11
167    12
Length: 168, dtype: int64

In [60]:
s.dt.day_name

<bound method PandasDelegate._add_delegate_accessors.<locals>._create_delegator_method.<locals>.f of <pandas.core.indexes.accessors.DatetimeProperties object at 0x00000151BBC5CB80>>

In [62]:
# which of the dates fall in the start of a quarter 
# will return a boolean series and will show True for those dates that start in a quarter 

mask = s.dt.is_quarter_start
s[mask]

0     2000-01-01
19    2001-04-01
38    2002-07-01
137   2009-01-01
dtype: datetime64[ns]

## Import Financial Dataset with pandas_datareader Library

In [2]:
import pandas as pd 
import datetime as dt

# we want to import data 
from pandas_datareader import data

In [4]:
stocks = data.DataReader(name = 'MSFT', data_source = 'yahoo', start = '2010-01-01', end = '2020-12-31')

In [7]:
stocks.head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,24.226894
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,24.23472
2010-01-06,31.08,30.52,30.879999,30.77,58182400.0,24.085989


In [13]:
stocks.values
stocks.columns

# datetime index 
stocks.index
stocks.axes

[DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
                '2010-01-08', '2010-01-11', '2010-01-12', '2010-01-13',
                '2010-01-14', '2010-01-15',
                ...
                '2020-07-15', '2020-07-16', '2020-07-17', '2020-07-20',
                '2020-07-21', '2020-07-22', '2020-07-23', '2020-07-24',
                '2020-07-27', '2020-07-28'],
               dtype='datetime64[ns]', name='Date', length=2660, freq=None),
 Index(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], dtype='object')]

## Selecting Rows from a DataFrame with a DateTimeIndex

#### extract one or more rows either by index label or by index position 

#### The loc accessor is based on index label 

In [5]:
stocks = data.DataReader(name = 'MSFT', data_source = 'yahoo', start = '2010-01-01', end = '2020-12-31')
stocks.head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,24.226894
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,24.23472
2010-01-06,31.08,30.52,30.879999,30.77,58182400.0,24.085989


In [6]:
# it is a good idea to wrap the loc accessor with a timestamp object 

stocks.loc[pd.Timestamp("2010-01-04")]

High         3.110000e+01
Low          3.059000e+01
Open         3.062000e+01
Close        3.095000e+01
Volume       3.840910e+07
Adj Close    2.422689e+01
Name: 2010-01-04 00:00:00, dtype: float64

In [22]:
# iloc is for index position 

stocks.iloc[0]
stocks.iloc[500]

High         2.614000e+01
Low          2.593000e+01
Open         2.596000e+01
Close        2.604000e+01
Volume       2.128720e+07
Adj Close    2.136081e+01
Name: 2011-12-27 00:00:00, dtype: float64

In [8]:
# pulling multiple values at the same time 

stocks.loc[[pd.Timestamp("2010-01-04"), pd.Timestamp("2010-01-05")]]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,24.226894
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,24.23472


In [9]:
# slicing, we can pull from a given label to a given label 
# the end date is INCLUSIVE 

stocks.loc["2013-10-01" : "2013-10-07"]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-10-01,33.610001,33.299999,33.349998,33.580002,36718700.0,28.962389
2013-10-02,34.029999,33.290001,33.360001,33.919998,46946800.0,29.255632
2013-10-03,34.0,33.419998,33.880001,33.860001,38703800.0,29.203878
2013-10-04,33.990002,33.619999,33.689999,33.880001,33008100.0,29.22113
2013-10-07,33.709999,33.200001,33.599998,33.299999,35069300.0,28.720888


In [10]:
# truncate does the same thing, it is just the method way of doing it 

stocks.truncate(before = "2013-10-01", after = "2013-10-07")

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-10-01,33.610001,33.299999,33.349998,33.580002,36718700.0,28.962389
2013-10-02,34.029999,33.290001,33.360001,33.919998,46946800.0,29.255632
2013-10-03,34.0,33.419998,33.880001,33.860001,38703800.0,29.203878
2013-10-04,33.990002,33.619999,33.689999,33.880001,33008100.0,29.22113
2013-10-07,33.709999,33.200001,33.599998,33.299999,35069300.0,28.720888


In [11]:
# with iloc the ending number is going to be EXCLUSIVE 

stocks.iloc[1000:1005]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-12-23,36.889999,36.549999,36.810001,36.619999,25128700.0,31.823889
2013-12-24,37.169998,36.639999,36.720001,37.080002,14243000.0,32.223644
2013-12-26,37.490002,37.169998,37.200001,37.439999,17612800.0,32.536484
2013-12-27,37.619999,37.169998,37.580002,37.290001,14563000.0,32.406143
2013-12-30,37.380001,36.900002,37.220001,37.290001,16290500.0,32.406143


#### Find out the stock price of microsoft on all of my birthdays 

In [16]:
# freq = pd.DateOffset means that it will look for that same date in all the years leading up to the last one 

bday = pd.date_range(start = "1999-11-03", end = "2020-11-03", freq = pd.DateOffset(years = 1))

In [19]:
# retunrs a boolean list where False indicates that it does not fall on your bday 


bday_stock = stocks.index.isin(bday)

In [20]:
# will return the stocks on your birthdays but if it doesn't show up it means that it fell on a weekend

stocks[bday_stock]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-11-03,27.49,26.959999,27.459999,27.030001,110255300.0,21.467186
2011-11-03,26.59,25.98,26.24,26.530001,65836100.0,21.600107
2014-11-03,47.459999,46.73,46.889999,47.439999,23130400.0,42.09021
2015-11-03,54.389999,52.900002,52.93,54.150002,36596900.0,49.330231
2016-11-03,59.639999,59.110001,59.529999,59.209999,21600400.0,55.419125
2017-11-03,84.540001,83.400002,84.080002,84.139999,17633500.0,80.650314


## Timestamp Object Attributes and Methods

In [21]:
stocks = data.DataReader(name = 'MSFT', data_source = 'yahoo', start = '2010-01-01', end = '2020-12-31')
stocks.head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,24.226894
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,24.23472
2010-01-06,31.08,30.52,30.879999,30.77,58182400.0,24.085989


In [22]:
stocks.index

DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
               '2010-01-08', '2010-01-11', '2010-01-12', '2010-01-13',
               '2010-01-14', '2010-01-15',
               ...
               '2020-07-21', '2020-07-22', '2020-07-23', '2020-07-24',
               '2020-07-27', '2020-07-28', '2020-07-29', '2020-07-30',
               '2020-07-31', '2020-08-03'],
              dtype='datetime64[ns]', name='Date', length=2664, freq=None)

In [24]:
someday = stocks.index[500]
someday

Timestamp('2011-12-27 00:00:00')

In [31]:
someday.month
someday.week
# tells you whether that date falls at the start of the month 
someday.is_month_start
# tells you whether that date falls at the end of the month 
someday.is_month_end

someday.is_quarter_start
someday.is_quarter_end
someday.day

27

In [33]:
someday.month_name()
someday.day_name()

'Tuesday'

In [36]:
stocks.index.day_name()

Index(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Monday',
       'Tuesday', 'Wednesday', 'Thursday', 'Friday',
       ...
       'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Monday', 'Tuesday',
       'Wednesday', 'Thursday', 'Friday', 'Monday'],
      dtype='object', name='Date', length=2664)

In [38]:
# will insert a new column with the name of the day 

stocks.insert(loc = 0, column = 'Day of Week', value = stocks.index.day_name())

In [39]:
stocks.head()

Unnamed: 0_level_0,Day of Week,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-04,Monday,31.1,30.59,30.620001,30.950001,38409100.0,24.226894
2010-01-05,Tuesday,31.1,30.639999,30.85,30.959999,49749600.0,24.23472
2010-01-06,Wednesday,31.08,30.52,30.879999,30.77,58182400.0,24.085989
2010-01-07,Thursday,30.700001,30.190001,30.629999,30.450001,50559700.0,23.835503
2010-01-08,Friday,30.879999,30.24,30.280001,30.66,51197400.0,23.999893


In [41]:
stocks.insert(1, "Is start of month", stocks.index.is_month_start)

In [42]:
stocks.head()

Unnamed: 0_level_0,Day of Week,Is start of month,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-01-04,Monday,False,31.1,30.59,30.620001,30.950001,38409100.0,24.226894
2010-01-05,Tuesday,False,31.1,30.639999,30.85,30.959999,49749600.0,24.23472
2010-01-06,Wednesday,False,31.08,30.52,30.879999,30.77,58182400.0,24.085989
2010-01-07,Thursday,False,30.700001,30.190001,30.629999,30.450001,50559700.0,23.835503
2010-01-08,Friday,False,30.879999,30.24,30.280001,30.66,51197400.0,23.999893


In [43]:
# want the rows for the first of every given month 

stocks[stocks["Is start of month"]]

Unnamed: 0_level_0,Day of Week,Is start of month,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-02-01,Monday,True,28.480000,27.920000,28.389999,28.410000,85931100.0,22.238642
2010-03-01,Monday,True,29.049999,28.530001,28.770000,29.020000,43805400.0,22.822367
2010-04-01,Thursday,True,29.540001,28.620001,29.350000,29.160000,74768100.0,22.932465
2010-06-01,Tuesday,True,26.309999,25.520000,25.530001,25.889999,76152400.0,20.452698
2010-07-01,Thursday,True,23.320000,22.730000,23.090000,23.160000,92239400.0,18.296041
...,...,...,...,...,...,...,...,...
2019-11-01,Friday,True,144.419998,142.970001,144.259995,143.720001,33128400.0,142.445740
2020-04-01,Wednesday,True,157.750000,150.820007,153.000000,152.110001,57969900.0,151.687546
2020-05-01,Friday,True,178.639999,174.009995,175.800003,174.570007,39370500.0,174.085175
2020-06-01,Monday,True,183.000000,181.460007,182.539993,182.830002,22622400.0,182.830002


## The pd.DateOffset Object

#### allows us to add or subtract a set amount of time for every date in our date time index

In [4]:
stocks = data.DataReader(name = 'MSFT', data_source = 'yahoo', start = '2010-01-01', end = '2020-12-31')
stocks.head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,24.226894
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,24.23472
2010-01-06,31.08,30.52,30.879999,30.77,58182400.0,24.085989


#### want to add 5 days to every date 

In [7]:
# this takes the data and adds/subtracts 5 days 

stocks.index + pd.DateOffset(days = 5)
stocks.index - pd.DateOffset(days = 5)


DatetimeIndex(['2009-12-30', '2009-12-31', '2010-01-01', '2010-01-02',
               '2010-01-03', '2010-01-06', '2010-01-07', '2010-01-08',
               '2010-01-09', '2010-01-10',
               ...
               '2020-07-17', '2020-07-18', '2020-07-19', '2020-07-22',
               '2020-07-23', '2020-07-24', '2020-07-25', '2020-07-26',
               '2020-07-29', '2020-07-30'],
              dtype='datetime64[ns]', name='Date', length=2665, freq=None)

In [10]:
# adds two weeks from every date 
stocks.index + pd.DateOffset(weeks = 2)

# adds 3 months to each date 
stocks.index + pd.DateOffset(months = 3)


DatetimeIndex(['2010-04-04', '2010-04-05', '2010-04-06', '2010-04-07',
               '2010-04-08', '2010-04-11', '2010-04-12', '2010-04-13',
               '2010-04-14', '2010-04-15',
               ...
               '2020-10-22', '2020-10-23', '2020-10-24', '2020-10-27',
               '2020-10-28', '2020-10-29', '2020-10-30', '2020-10-31',
               '2020-11-03', '2020-11-04'],
              dtype='datetime64[ns]', name='Date', length=2665, freq=None)

In [12]:
# you can add anything to this and combine stuff as well 

stocks.index + pd.DateOffset(years = 1, months = 3, days = 10, hours = 6, minutes = 2, seconds = 26)


DatetimeIndex(['2011-04-14 06:02:26', '2011-04-15 06:02:26',
               '2011-04-16 06:02:26', '2011-04-17 06:02:26',
               '2011-04-18 06:02:26', '2011-04-21 06:02:26',
               '2011-04-22 06:02:26', '2011-04-23 06:02:26',
               '2011-04-24 06:02:26', '2011-04-25 06:02:26',
               ...
               '2021-11-01 06:02:26', '2021-11-02 06:02:26',
               '2021-11-03 06:02:26', '2021-11-06 06:02:26',
               '2021-11-07 06:02:26', '2021-11-08 06:02:26',
               '2021-11-09 06:02:26', '2021-11-10 06:02:26',
               '2021-11-13 06:02:26', '2021-11-14 06:02:26'],
              dtype='datetime64[ns]', name='Date', length=2665, freq=None)

## Timeseries Offsets

In [13]:
stocks = data.DataReader(name = 'MSFT', data_source = 'yahoo', start = '2010-01-01', end = '2020-12-31')
stocks.head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,24.226894
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,24.23472
2010-01-06,31.08,30.52,30.879999,30.77,58182400.0,24.085989


#### what if you want to add a dynamic amount of time to each datetime, what if we wanted to round each date to the end of the current month. For ex, i would round January 4th to January 31st

In [15]:
# we are navigating through these folders to find the objects that we want to work with. We have MonthEnd which 
# will help us with rounding all the dates to the end of the month 

stocks.index + pd.tseries.offsets.MonthEnd()

DatetimeIndex(['2010-01-31', '2010-01-31', '2010-01-31', '2010-01-31',
               '2010-01-31', '2010-01-31', '2010-01-31', '2010-01-31',
               '2010-01-31', '2010-01-31',
               ...
               '2020-07-31', '2020-07-31', '2020-07-31', '2020-07-31',
               '2020-07-31', '2020-07-31', '2020-07-31', '2020-08-31',
               '2020-08-31', '2020-08-31'],
              dtype='datetime64[ns]', name='Date', length=2665, freq=None)

In [18]:
# this is the shorter way to go through the folders 

from pandas.tseries import offsets

In [19]:
# we will only need the offesets to access it 

stocks.index + offsets.MonthEnd()

DatetimeIndex(['2010-01-31', '2010-01-31', '2010-01-31', '2010-01-31',
               '2010-01-31', '2010-01-31', '2010-01-31', '2010-01-31',
               '2010-01-31', '2010-01-31',
               ...
               '2020-07-31', '2020-07-31', '2020-07-31', '2020-07-31',
               '2020-07-31', '2020-07-31', '2020-07-31', '2020-08-31',
               '2020-08-31', '2020-08-31'],
              dtype='datetime64[ns]', name='Date', length=2665, freq=None)

In [16]:
# for 2020-07-31 it will not read it as the final date of the month instead it will return 8-31-2020 

stocks.tail(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-07-31,205.100006,199.009995,204.399994,205.009995,51248000.0,205.009995
2020-08-03,217.639999,210.440002,211.520004,216.539993,78877000.0,216.539993
2020-08-04,214.770004,210.309998,214.169998,213.289993,47737794.0,213.289993


In [17]:
# will round to the NEXT beginning of the month so for January 1st then it will be Feb 1st 

stocks.index + pd.tseries.offsets.MonthBegin()

DatetimeIndex(['2010-02-01', '2010-02-01', '2010-02-01', '2010-02-01',
               '2010-02-01', '2010-02-01', '2010-02-01', '2010-02-01',
               '2010-02-01', '2010-02-01',
               ...
               '2020-08-01', '2020-08-01', '2020-08-01', '2020-08-01',
               '2020-08-01', '2020-08-01', '2020-08-01', '2020-08-01',
               '2020-09-01', '2020-09-01'],
              dtype='datetime64[ns]', name='Date', length=2665, freq=None)

In [25]:
# business end of the month 
# if a month ends on the 31st and it lands on a saturday [that is not a business day]
# so the way it will work is that the friday before the 31st is considered the business end of the month 

stocks.index + offsets.BMonthEnd()

stocks.index - offsets.BMonthEnd()

DatetimeIndex(['2009-12-31', '2009-12-31', '2009-12-31', '2009-12-31',
               '2009-12-31', '2009-12-31', '2009-12-31', '2009-12-31',
               '2009-12-31', '2009-12-31',
               ...
               '2020-06-30', '2020-06-30', '2020-06-30', '2020-06-30',
               '2020-06-30', '2020-06-30', '2020-06-30', '2020-06-30',
               '2020-07-31', '2020-07-31'],
              dtype='datetime64[ns]', name='Date', length=2665, freq=None)

In [26]:
stocks.index + offsets.BYearEnd()

DatetimeIndex(['2010-12-31', '2010-12-31', '2010-12-31', '2010-12-31',
               '2010-12-31', '2010-12-31', '2010-12-31', '2010-12-31',
               '2010-12-31', '2010-12-31',
               ...
               '2020-12-31', '2020-12-31', '2020-12-31', '2020-12-31',
               '2020-12-31', '2020-12-31', '2020-12-31', '2020-12-31',
               '2020-12-31', '2020-12-31'],
              dtype='datetime64[ns]', name='Date', length=2665, freq=None)

## The Timedelta Object

#### Timedelta represents a time span or a duration or a passage of time. It is simply a measurement of time 


In [28]:
# subtracting two timestamps from one another 
time_a = pd.Timestamp("2020-03-31 04:35:16PM")
time_b = pd.Timestamp("2020-03-20 02:15:49PM" )

# we will get a timedelta object 
time_a - time_b

Timedelta('11 days 02:19:27')

In [29]:
time_b - time_a

Timedelta('-12 days +21:40:33')

In [31]:
# will add 3 days to time_a 

time_a + pd.Timedelta(days = 3)

Timestamp('2020-04-03 16:35:16')

In [33]:
time_a + pd.Timedelta(days = 3, hours = 12, minutes = 45, seconds = 20)

Timestamp('2020-04-04 05:20:36')

In [35]:
pd.Timedelta(weeks = 8, days = 3, hours = 12, minutes = 45, seconds = 20)

Timedelta('59 days 12:45:20')

In [38]:
pd.Timedelta("5 minutes")
pd.Timedelta("6 hours 12 minutes")
pd.Timedelta("14 days 6 hours 12 minutes 49 seconds")

Timedelta('14 days 06:12:49')

## Timedeltas in a Dataset

In [45]:
shipping = pd.read_csv('ecommerce.csv', index_col = 'ID', parse_dates = ["order_date", "delivery_date"])
shipping.head(3)

Unnamed: 0_level_0,order_date,delivery_date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1998-05-24,1999-02-05
2,1992-04-22,1998-03-06
4,1991-02-10,1992-08-26


In [48]:
# let's calculate the duration it took for the customer to get their item delivered 

# we will get a timedelta series and it shows us the duration 
shipping["Delivery Time"] = shipping['delivery_date'] - shipping['order_date']

In [49]:
shipping.head(3)

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1998-05-24,1999-02-05,257 days
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days


In [51]:
# find out what the day would be in the future if the delivery time took twice as long 
# we can add the timedeltas in delivery time to our delivery_date 
# for example the first date will represent 257 days after the delivery date

shipping["Future Delivery Date"] = shipping['delivery_date'] + shipping['Delivery Time']

In [52]:
shipping.head()

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time,Future Delivery Date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1998-05-24,1999-02-05,257 days,1999-10-20
2,1992-04-22,1998-03-06,2144 days,2004-01-18
4,1991-02-10,1992-08-26,563 days,1994-03-12
5,1992-07-21,1997-11-20,1948 days,2003-03-22
7,1993-09-02,1998-06-10,1742 days,2003-03-18


In [54]:
mask = shipping['Delivery Time'] > "365 Days"

In [55]:
# orders that took more than a year 

shipping[mask]

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time,Future Delivery Date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,1992-04-22,1998-03-06,2144 days,2004-01-18
4,1991-02-10,1992-08-26,563 days,1994-03-12
5,1992-07-21,1997-11-20,1948 days,2003-03-22
7,1993-09-02,1998-06-10,1742 days,2003-03-18
9,1990-01-25,1994-10-02,1711 days,1999-06-09
...,...,...,...,...
986,1990-12-10,1992-12-16,737 days,1994-12-23
990,1991-06-24,1996-02-02,1684 days,2000-09-12
991,1991-09-09,1998-03-30,2394 days,2004-10-18
993,1990-11-16,1998-04-27,2719 days,2005-10-06


In [56]:
shipping["Delivery Time"].max()

Timedelta('3583 days 00:00:00')

In [57]:
shipping["Delivery Time"].min()

Timedelta('8 days 00:00:00')