## Intro to the Working with Dates and Times Module

In [None]:
import pandas as pd
import datetime as dt

## Review of Python's datetime Module

In [None]:
#dt.date(yyyy, MM, dd)
dt.date(2016, 4, 12) # April 12th 2016
someday = dt.date(2010, 1, 20) # January 1st 2010

In [None]:
someday.year
someday.month
someday.day

Datetime must have a time, or will be set to midnight

In [None]:
dt.datetime(2010, 1, 20)

In [None]:
dt.datetime(2010, 1, 10, 8, 13, 57)

In the afternoon, we will need the military time

In [None]:
str(dt.datetime(2010, 1, 10, 17, 13, 57))

In [None]:
sometime = dt.datetime(2010, 1, 10, 17, 13, 57)

In [None]:
sometime.year
sometime.month
sometime.day
sometime.hour
sometime.minute
sometime.second

## The pandas Timestamp Object

In [None]:
pd.Timestamp('2010-01-10')
pd.Timestamp('2015/01/31')
pd.Timestamp('01/01/2015')
pd.Timestamp('2014, 11, 04')
pd.Timestamp('19/12/2015') # Becareful with these two formats
pd.Timestamp('12/19/2015') #
pd.Timestamp('4/3/2000') # or this could happen

In [None]:
pd.Timestamp('2010-01-10 08:35:15')
pd.Timestamp('2010-01-10 6:13:15 PM')

In [None]:
pd.Timestamp(dt.date(2015,1,1))

In [None]:
pd.Timestamp(dt.datetime(2020,3,14, 21, 34, 33))

## The pandas DateTimeIndex Object

In [None]:
dates =["2016-01-02", "2016-03-12", "2009-09-07"]
pd.DatetimeIndex(dates)

In [None]:
dates =["2016/01/02", "2016/03/12", "2009/09/07"]
pd.DatetimeIndex(dates)

In [None]:
dates = [dt.date(2016, 1, 10), dt.date(1994, 6, 14), dt.date(2003, 12, 29)]
dt_index = pd.DatetimeIndex(dates)

In [None]:
values = [100, 200, 300]
pd.Series(data = values, index = dt_index)

## The pd.to_datetime() Method

In [None]:
pd.to_datetime("2001-04-19")
pd.to_datetime(dt.date(2015,1, 1))
pd.to_datetime(dt.datetime(2015, 1, 1, 14, 25, 20))
pd.to_datetime(("2015-01-03", "2014/02/08", "2016", "May 4th, 1996"))

In [None]:
times = pd.Series(["2015-01-03", "2014/02/08", "2016", "May 4th, 1996"])
times

Convert dates to datetime, maintaning the same format (yyyy-MM-dd)

In [None]:
pd.to_datetime(times)

In [None]:
dates = pd.Series(["July 5th, 1996", "10/04/1992", "Hello", "2015-02-31"])
dates

Parameter that raise errors if there is a incompatible data

So we use coerce to convert to datetime, and everything that is not datetime will be NaT (Not a Time)

In [None]:
pd.to_datetime(dates, errors= "coerce")

UNIX TIME IN DATETIME

In [None]:
pd.to_datetime([1577836800, 1798761600,1830297600, 1861920000], unit="s")

## Create Range of Dates with the pd.date_range Method - Part I

Freq = "d" = DAY

In [None]:
times =  pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "D")

In [None]:
type(times)

In [None]:
type(times[0])

In [None]:
pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "2D") #increment of 2days

In [None]:
pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "B") # B will be just business days

In [None]:
pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "W-FRI") # Just friday of each week

In [None]:
pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "H") # H -> Every single hour

In [None]:
pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "12h") # 12h frequency

In [None]:
pd.date_range(start = "2016-01-01", end = "2017-01-01", freq = "M") # Last day of each month

In [None]:
pd.date_range(start = "2016-01-01", end = "2017-01-01", freq = "MS") # First day of each month

## Create Range of Dates with the pd.date_range Method - Part II

Periods will be the number of timestamps we want to generate

In [None]:
pd.date_range(start= "2012-09-09", periods= 25, freq="D")

In [None]:
len(pd.date_range(start= "2012-09-09", periods= 25, freq="D"))

In [None]:
pd.date_range(start= "2012-09-09", periods= 50, freq="B")

In [None]:
pd.date_range(start= "2012-09-09", periods= 50, freq="W")

In [None]:
pd.date_range(start= "2012-09-09", periods= 50, freq="W-TUE")

In [None]:
pd.date_range(start= "2012-09-09", periods= 50, freq="6h")

## Create Range of Dates with the pd.date_range Method - Part III

Start at the end date and, will move until the beginning that will be the number of periods + our parameters

In [None]:
pd.date_range(end = "1999-12-31", periods= 20, freq= "D")

In [None]:
pd.date_range(end = "1999-12-31", periods= 40, freq= "B")

In [None]:
pd.date_range(end = "1999-12-31", periods= 40, freq= "W-SUN")

In [None]:
pd.date_range(end = "1999-12-31", periods= 53, freq= "MS") # MS -> Month Start

In [None]:
pd.date_range(end = "1999-12-31", periods= 53, freq= "7h") # MS -> Month Start

## The .dt Acessor

In [None]:
bunch_of_dates = pd.date_range(start= "2000-01-01", end= "2010-12-31", freq= "24D")

In [None]:
s = pd.Series(bunch_of_dates)
s.head(3)

In [None]:
s.dt.day
s.dt.month
s.dt.year
s.dt.weekday

In [None]:
mask = s.dt.is_quarter_end
s[mask]

In [None]:
mask = s.dt.is_month_start
s[mask]

## Import Financial Dataset with pandas_datareader Library

In [None]:
import pandas as pd
import datetime as dt
from pandas_datareader import data

In [None]:
stocks = data.DataReader(name= "MSFT", data_source= "yahoo", start="2010-01-01", end= "2020-12-31") # Microsoft stock history - Getting data from Yahoo Finances
stocks.head(3)

In [None]:
stocks.values

In [None]:
stocks.columns

In [None]:
stocks.index

## Selecting Rows from a DataFrame with a DatetimeIndex

In [None]:
stocks.loc["2010-01-04"]

In [None]:
stocks.loc[pd.Timestamp("2010-01-04")]

In [None]:
stocks.iloc[0]
stocks.iloc[500]
stocks.iloc[-1]

In [None]:
stocks.loc[[pd.Timestamp("2010-01-04"), pd.Timestamp("2010-01-05")]]

The two lines below do the same thing

In [None]:
stocks.loc["2013-10-01":"2013-10-07"]
stocks.truncate(before= "2013-10-01", after="2013-10-07")

In [None]:
stocks.iloc[1000:1005] # iloc has a exclusive "after" parameter

In [None]:
pd.date_range(start= "1991-04-12", end= "2020-12-31", freq= pd.DateOffset(years = 1))

In [None]:
birthdays = stocks.index

In [None]:
birthdays_stocks = stocks.index.isin(birthdays)

In [None]:
stocks[birthdays_stocks]
stocks.loc[birthdays_stocks] # prefered way to aim a subset of stocks

## Timestamp Object Attributes and Methods

In [82]:
stocks = data.DataReader(name= "MSFT", data_source= "yahoo", start="2010-01-01", end= "2020-12-31") # Microsoft stock history - Getting data from Yahoo Finances
stocks.head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.855656
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.863363
2010-01-06,31.08,30.52,30.879999,30.77,58182400.0,23.716919


In [85]:
someday = stocks.index[500]

In [86]:
someday.astimezone

<bound method Timestamp.tz_convert of Timestamp('2011-12-27 00:00:00')>

In [89]:
someday.month
someday.day
someday.year
someday.is_month_start
someday.is_quarter_end

False

In [92]:
someday.month_name()
someday.day_name()

'Tuesday'

In [94]:
stocks.index.day_name()

Index(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Monday',
       'Tuesday', 'Wednesday', 'Thursday', 'Friday',
       ...
       'Thursday', 'Friday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday',
       'Monday', 'Tuesday', 'Wednesday', 'Thursday'],
      dtype='object', name='Date', length=2769)

Adding a column with the days of the week names corresponding to each date.

In [95]:
stocks.insert(0, "Day of the week",stocks.index.day_name())

In [96]:
stocks

Unnamed: 0_level_0,Day of the week,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-04,Monday,31.100000,30.590000,30.620001,30.950001,38409100.0,23.855656
2010-01-05,Tuesday,31.100000,30.639999,30.850000,30.959999,49749600.0,23.863363
2010-01-06,Wednesday,31.080000,30.520000,30.879999,30.770000,58182400.0,23.716919
2010-01-07,Thursday,30.700001,30.190001,30.629999,30.450001,50559700.0,23.470272
2010-01-08,Friday,30.879999,30.240000,30.280001,30.660000,51197400.0,23.632130
...,...,...,...,...,...,...,...
2020-12-24,Thursday,223.610001,221.199997,221.419998,222.750000,10550600.0,220.442551
2020-12-28,Monday,226.029999,223.020004,224.449997,224.960007,17933500.0,222.629654
2020-12-29,Tuesday,227.179993,223.580002,226.309998,224.149994,17403200.0,221.828049
2020-12-30,Wednesday,225.630005,221.470001,225.229996,221.679993,20272300.0,219.383621


Adding a Column showing if is the day is at the start of the month or not

In [100]:
stocks.insert(1, "Is Start of Month", stocks.index.is_month_start)

In [101]:
stocks

Unnamed: 0_level_0,Day of the week,Is Start of Month,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-01-04,Monday,False,31.100000,30.590000,30.620001,30.950001,38409100.0,23.855656
2010-01-05,Tuesday,False,31.100000,30.639999,30.850000,30.959999,49749600.0,23.863363
2010-01-06,Wednesday,False,31.080000,30.520000,30.879999,30.770000,58182400.0,23.716919
2010-01-07,Thursday,False,30.700001,30.190001,30.629999,30.450001,50559700.0,23.470272
2010-01-08,Friday,False,30.879999,30.240000,30.280001,30.660000,51197400.0,23.632130
...,...,...,...,...,...,...,...,...
2020-12-24,Thursday,False,223.610001,221.199997,221.419998,222.750000,10550600.0,220.442551
2020-12-28,Monday,False,226.029999,223.020004,224.449997,224.960007,17933500.0,222.629654
2020-12-29,Tuesday,False,227.179993,223.580002,226.309998,224.149994,17403200.0,221.828049
2020-12-30,Wednesday,False,225.630005,221.470001,225.229996,221.679993,20272300.0,219.383621


In [102]:
stocks[stocks["Is Start of Month"]]

Unnamed: 0_level_0,Day of the week,Is Start of Month,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-02-01,Monday,True,28.480000,27.920000,28.389999,28.410000,85931100.0,21.897869
2010-03-01,Monday,True,29.049999,28.530001,28.770000,29.020000,43805400.0,22.472652
2010-04-01,Thursday,True,29.540001,28.620001,29.350000,29.160000,74768100.0,22.581066
2010-06-01,Tuesday,True,26.309999,25.520000,25.530001,25.889999,76152400.0,20.139290
2010-07-01,Thursday,True,23.320000,22.730000,23.090000,23.160000,92239400.0,18.015680
...,...,...,...,...,...,...,...,...
2020-06-01,Monday,True,183.000000,181.460007,182.539993,182.830002,22622400.0,180.028442
2020-07-01,Wednesday,True,206.350006,201.770004,203.139999,204.699997,32061200.0,201.563309
2020-09-01,Tuesday,True,227.449997,224.429993,225.509995,227.270004,25725500.0,224.328430
2020-10-01,Thursday,True,213.990005,211.320007,213.490005,212.460007,27158400.0,209.710129


## The pd.dateOffset Object

In [103]:
stocks = data.DataReader(name= "MSFT", data_source= "yahoo", start="2010-01-01", end= "2020-12-31") # Microsoft stock history - Getting data from Yahoo Finances
stocks.head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.855654
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.863363
2010-01-06,31.08,30.52,30.879999,30.77,58182400.0,23.716911


Adding a certain time of days to the values of the DataFrame

In [107]:
stocks["High"] + 5 # Can be done
#stocks.index + 5 # Do we mean 5 years/months/dates/minutes? Pandas doesn't know how to respond

# The two options below will work normally
stocks.index + pd.DateOffset(days = 5)
stocks.index - pd.DateOffset(days = 5)

#stocks.index = stocks.index - pd.DateOffset(days = 5)

DatetimeIndex(['2009-12-30', '2009-12-31', '2010-01-01', '2010-01-02',
               '2010-01-03', '2010-01-06', '2010-01-07', '2010-01-08',
               '2010-01-09', '2010-01-10',
               ...
               '2020-12-12', '2020-12-13', '2020-12-16', '2020-12-17',
               '2020-12-18', '2020-12-19', '2020-12-23', '2020-12-24',
               '2020-12-25', '2020-12-26'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [118]:
stocks.index + pd.DateOffset(weeks = 2)
stocks.index - pd.DateOffset(weeks = 3)

stocks.index + pd.DateOffset(months = 2)
stocks.index - pd.DateOffset(months = 3)

stocks.index + pd.DateOffset(years = 3)
stocks.index - pd.DateOffset(years = 3)

# We can mix the parameters
stocks.index + pd.DateOffset(years = 1, months = 3, days = 10, hours = 6, minutes = 2, seconds = 40)

DatetimeIndex(['2011-04-14 06:02:40', '2011-04-15 06:02:40',
               '2011-04-16 06:02:40', '2011-04-17 06:02:40',
               '2011-04-18 06:02:40', '2011-04-21 06:02:40',
               '2011-04-22 06:02:40', '2011-04-23 06:02:40',
               '2011-04-24 06:02:40', '2011-04-25 06:02:40',
               ...
               '2022-03-27 06:02:40', '2022-03-28 06:02:40',
               '2022-03-31 06:02:40', '2022-04-01 06:02:40',
               '2022-04-02 06:02:40', '2022-04-03 06:02:40',
               '2022-04-07 06:02:40', '2022-04-08 06:02:40',
               '2022-04-09 06:02:40', '2022-04-10 06:02:40'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)