## Intro to the Working with Dates and Times Module

In [None]:
import pandas as pd
import datetime as dt

## Review of Python's datetime Module

In [None]:
#dt.date(yyyy, MM, dd)
dt.date(2016, 4, 12) # April 12th 2016
someday = dt.date(2010, 1, 20) # January 1st 2010

In [None]:
someday.year
someday.month
someday.day

Datetime must have a time, or will be set to midnight

In [None]:
dt.datetime(2010, 1, 20)

In [None]:
dt.datetime(2010, 1, 10, 8, 13, 57)

In the afternoon, we will need the military time

In [None]:
str(dt.datetime(2010, 1, 10, 17, 13, 57))

In [None]:
sometime = dt.datetime(2010, 1, 10, 17, 13, 57)

In [None]:
sometime.year
sometime.month
sometime.day
sometime.hour
sometime.minute
sometime.second

## The pandas Timestamp Object

In [None]:
pd.Timestamp('2010-01-10')
pd.Timestamp('2015/01/31')
pd.Timestamp('01/01/2015')
pd.Timestamp('2014, 11, 04')
pd.Timestamp('19/12/2015') # Becareful with these two formats
pd.Timestamp('12/19/2015') #
pd.Timestamp('4/3/2000') # or this could happen

In [None]:
pd.Timestamp('2010-01-10 08:35:15')
pd.Timestamp('2010-01-10 6:13:15 PM')

In [None]:
pd.Timestamp(dt.date(2015,1,1))

In [None]:
pd.Timestamp(dt.datetime(2020,3,14, 21, 34, 33))

## The pandas DateTimeIndex Object

In [None]:
dates =["2016-01-02", "2016-03-12", "2009-09-07"]
pd.DatetimeIndex(dates)

In [None]:
dates =["2016/01/02", "2016/03/12", "2009/09/07"]
pd.DatetimeIndex(dates)

In [None]:
dates = [dt.date(2016, 1, 10), dt.date(1994, 6, 14), dt.date(2003, 12, 29)]
dt_index = pd.DatetimeIndex(dates)

In [None]:
values = [100, 200, 300]
pd.Series(data = values, index = dt_index)

## The pd.to_datetime() Method

In [None]:
pd.to_datetime("2001-04-19")
pd.to_datetime(dt.date(2015,1, 1))
pd.to_datetime(dt.datetime(2015, 1, 1, 14, 25, 20))
pd.to_datetime(("2015-01-03", "2014/02/08", "2016", "May 4th, 1996"))

In [None]:
times = pd.Series(["2015-01-03", "2014/02/08", "2016", "May 4th, 1996"])
times

Convert dates to datetime, maintaning the same format (yyyy-MM-dd)

In [None]:
pd.to_datetime(times)

In [None]:
dates = pd.Series(["July 5th, 1996", "10/04/1992", "Hello", "2015-02-31"])
dates

Parameter that raise errors if there is a incompatible data

So we use coerce to convert to datetime, and everything that is not datetime will be NaT (Not a Time)

In [None]:
pd.to_datetime(dates, errors= "coerce")

UNIX TIME IN DATETIME

In [None]:
pd.to_datetime([1577836800, 1798761600,1830297600, 1861920000], unit="s")

## Create Range of Dates with the pd.date_range Method - Part I

Freq = "d" = DAY

In [None]:
times =  pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "D")

In [None]:
type(times)

In [None]:
type(times[0])

In [None]:
pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "2D") #increment of 2days

In [None]:
pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "B") # B will be just business days

In [None]:
pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "W-FRI") # Just friday of each week

In [None]:
pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "H") # H -> Every single hour

In [None]:
pd.date_range(start = "2016-01-01", end = "2016-01-10", freq = "12h") # 12h frequency

In [None]:
pd.date_range(start = "2016-01-01", end = "2017-01-01", freq = "M") # Last day of each month

In [None]:
pd.date_range(start = "2016-01-01", end = "2017-01-01", freq = "MS") # First day of each month

## Create Range of Dates with the pd.date_range Method - Part II

Periods will be the number of timestamps we want to generate

In [None]:
pd.date_range(start= "2012-09-09", periods= 25, freq="D")

In [None]:
len(pd.date_range(start= "2012-09-09", periods= 25, freq="D"))

In [None]:
pd.date_range(start= "2012-09-09", periods= 50, freq="B")

In [None]:
pd.date_range(start= "2012-09-09", periods= 50, freq="W")

In [None]:
pd.date_range(start= "2012-09-09", periods= 50, freq="W-TUE")

In [None]:
pd.date_range(start= "2012-09-09", periods= 50, freq="6h")

## Create Range of Dates with the pd.date_range Method - Part III

Start at the end date and, will move until the beginning that will be the number of periods + our parameters

In [None]:
pd.date_range(end = "1999-12-31", periods= 20, freq= "D")

In [None]:
pd.date_range(end = "1999-12-31", periods= 40, freq= "B")

In [None]:
pd.date_range(end = "1999-12-31", periods= 40, freq= "W-SUN")

In [None]:
pd.date_range(end = "1999-12-31", periods= 53, freq= "MS") # MS -> Month Start

In [None]:
pd.date_range(end = "1999-12-31", periods= 53, freq= "7h") # MS -> Month Start

## The .dt Acessor

In [None]:
bunch_of_dates = pd.date_range(start= "2000-01-01", end= "2010-12-31", freq= "24D")

In [None]:
s = pd.Series(bunch_of_dates)
s.head(3)

In [None]:
s.dt.day
s.dt.month
s.dt.year
s.dt.weekday

In [None]:
mask = s.dt.is_quarter_end
s[mask]

In [None]:
mask = s.dt.is_month_start
s[mask]

## Import Financial Dataset with pandas_datareader Library

In [None]:
import pandas as pd
import datetime as dt
from pandas_datareader import data

In [None]:
stocks = data.DataReader(name= "MSFT", data_source= "yahoo", start="2010-01-01", end= "2020-12-31") # Microsoft stock history - Getting data from Yahoo Finances
stocks.head(3)

In [None]:
stocks.values

In [None]:
stocks.columns

In [None]:
stocks.index

## Selecting Rows from a DataFrame with a DatetimeIndex

In [None]:
stocks.loc["2010-01-04"]

In [None]:
stocks.loc[pd.Timestamp("2010-01-04")]

In [None]:
stocks.iloc[0]
stocks.iloc[500]
stocks.iloc[-1]

In [None]:
stocks.loc[[pd.Timestamp("2010-01-04"), pd.Timestamp("2010-01-05")]]

The two lines below do the same thing

In [None]:
stocks.loc["2013-10-01":"2013-10-07"]
stocks.truncate(before= "2013-10-01", after="2013-10-07")

In [None]:
stocks.iloc[1000:1005] # iloc has a exclusive "after" parameter

In [None]:
pd.date_range(start= "1991-04-12", end= "2020-12-31", freq= pd.DateOffset(years = 1))

In [None]:
birthdays = stocks.index

In [None]:
birthdays_stocks = stocks.index.isin(birthdays)

In [None]:
stocks[birthdays_stocks]
stocks.loc[birthdays_stocks] # prefered way to aim a subset of stocks

## Timestamp Object Attributes and Methods

In [None]:
stocks = data.DataReader(name= "MSFT", data_source= "yahoo", start="2010-01-01", end= "2020-12-31") # Microsoft stock history - Getting data from Yahoo Finances
stocks.head(3)

In [None]:
someday = stocks.index[500]

In [None]:
someday.astimezone

In [None]:
someday.month
someday.day
someday.year
someday.is_month_start
someday.is_quarter_end

In [None]:
someday.month_name()
someday.day_name()

In [None]:
stocks.index.day_name()

Adding a column with the days of the week names corresponding to each date.

In [None]:
stocks.insert(0, "Day of the week",stocks.index.day_name())

In [None]:
stocks

Adding a Column showing if is the day is at the start of the month or not

In [None]:
stocks.insert(1, "Is Start of Month", stocks.index.is_month_start)

In [None]:
stocks

In [None]:
stocks[stocks["Is Start of Month"]]

## The pd.dateOffset Object

In [None]:
stocks = data.DataReader(name= "MSFT", data_source= "yahoo", start="2010-01-01", end= "2020-12-31") # Microsoft stock history - Getting data from Yahoo Finances
stocks.head(3)

Adding a certain time of days to the values of the DataFrame

In [None]:
stocks["High"] + 5 # Can be done
#stocks.index + 5 # Do we mean 5 years/months/dates/minutes? Pandas doesn't know how to respond

# The two options below will work normally
stocks.index + pd.DateOffset(days = 5)
stocks.index - pd.DateOffset(days = 5)

#stocks.index = stocks.index - pd.DateOffset(days = 5)

In [None]:
stocks.index + pd.DateOffset(weeks = 2)
stocks.index - pd.DateOffset(weeks = 3)

stocks.index + pd.DateOffset(months = 2)
stocks.index - pd.DateOffset(months = 3)

stocks.index + pd.DateOffset(years = 3)
stocks.index - pd.DateOffset(years = 3)

# We can mix the parameters
stocks.index + pd.DateOffset(years = 1, months = 3, days = 10, hours = 6, minutes = 2, seconds = 40)

## Timeseries Offsets

In [79]:
stocks = data.DataReader(name= "MSFT", data_source= "yahoo", start="2010-01-01", end= "2020-12-31")
stocks.head(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.855658
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.863367
2010-01-06,31.08,30.52,30.879999,30.77,58182400.0,23.716913


In [92]:
stocks.index + pd.tseries.offsets.MonthEnd()
stocks.index - pd.tseries.offsets.MonthEnd()

stocks.index + pd.tseries.offsets.MonthBegin()
stocks.index - pd.tseries.offsets.MonthBegin()

DatetimeIndex(['2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01',
               ...
               '2020-12-01', '2020-12-01', '2020-12-01', '2020-12-01',
               '2020-12-01', '2020-12-01', '2020-12-01', '2020-12-01',
               '2020-12-01', '2020-12-01'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [90]:
stocks.tail(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-12-29,227.179993,223.580002,226.309998,224.149994,17403200.0,221.828049
2020-12-30,225.630005,221.470001,225.229996,221.679993,20272300.0,219.383621
2020-12-31,223.0,219.679993,221.699997,222.419998,20942100.0,220.115967


Implicit importing pandas.tseries offsets

In [94]:
from pandas.tseries import offsets

In [95]:
stocks.index + offsets.MonthEnd()
stocks.index - offsets.MonthEnd()

stocks.index + offsets.MonthBegin()
stocks.index - offsets.MonthBegin()

DatetimeIndex(['2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01',
               ...
               '2020-12-01', '2020-12-01', '2020-12-01', '2020-12-01',
               '2020-12-01', '2020-12-01', '2020-12-01', '2020-12-01',
               '2020-12-01', '2020-12-01'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [None]:
stocks.tail(3)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-12-29,227.179993,223.580002,226.309998,224.149994,17403200.0,221.828049
2020-12-30,225.630005,221.470001,225.229996,221.679993,20272300.0,219.383621
2020-12-31,223.0,219.679993,221.699997,222.419998,20942100.0,220.115967


Business Month end

In [97]:
stocks.index + offsets.BMonthEnd()
stocks.index - offsets.BMonthEnd()

DatetimeIndex(['2009-12-31', '2009-12-31', '2009-12-31', '2009-12-31',
               '2009-12-31', '2009-12-31', '2009-12-31', '2009-12-31',
               '2009-12-31', '2009-12-31',
               ...
               '2020-11-30', '2020-11-30', '2020-11-30', '2020-11-30',
               '2020-11-30', '2020-11-30', '2020-11-30', '2020-11-30',
               '2020-11-30', '2020-11-30'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [98]:
stocks.index + offsets.YearBegin()

DatetimeIndex(['2011-01-01', '2011-01-01', '2011-01-01', '2011-01-01',
               '2011-01-01', '2011-01-01', '2011-01-01', '2011-01-01',
               '2011-01-01', '2011-01-01',
               ...
               '2021-01-01', '2021-01-01', '2021-01-01', '2021-01-01',
               '2021-01-01', '2021-01-01', '2021-01-01', '2021-01-01',
               '2021-01-01', '2021-01-01'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

## The Timedelta Object

In [100]:
time_a = pd.Timestamp("2020-03-31")
time_b = pd.Timestamp("2020-03-20")

time_a - time_b

Timedelta('11 days 00:00:00')

In [101]:
time_a = pd.Timestamp("2020-03-31 04:35:16PM")
time_b = pd.Timestamp("2020-03-20 02:13:49PM")

time_a - time_b

Timedelta('11 days 02:21:27')

In [103]:
time_b - time_a

Timedelta('-12 days +21:38:33')

Time in passing days

In [105]:
pd.Timedelta(days = 3)

Timedelta('3 days 00:00:00')

In [106]:
time_a + pd.Timedelta(days = 3)

Timestamp('2020-04-03 16:35:16')

In [115]:
pd.Timedelta(days = 3, hours = 12, minutes = 45)
pd.Timedelta(weeks = 3, hours = 12, minutes = 45, days = 8)
# Years will not work here
#pd.Timedelta(days = 3, hours = 12, minutes = 45, year=1)

Timedelta('29 days 12:45:00')

In [119]:
pd.Timedelta("5 minutes")
pd.Timedelta("6 hours 12 minutes")
pd.Timedelta("14 days 6 hours 12 minutes 39 seconds")

Timedelta('14 days 06:12:39')

## Timedeltas in a Dataset

In [125]:
shipping = pd.read_csv("ecommerce.csv", index_col= "ID", parse_dates= ["order_date", "delivery_date"])
shipping.head(3)

Unnamed: 0_level_0,order_date,delivery_date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1998-05-24,1999-02-05
2,1992-04-22,1998-03-06
4,1991-02-10,1992-08-26


In [128]:
shipping["delivery_time"] = shipping.delivery_date - shipping.order_date

In [130]:
shipping.head(3)

Unnamed: 0_level_0,order_date,delivery_date,delivery_time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1998-05-24,1999-02-05,257 days
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days


In [133]:
shipping["twice_as_long"] = shipping.delivery_date - shipping.delivery_time

In [134]:
shipping.head(3)

Unnamed: 0_level_0,order_date,delivery_date,delivery_time,twice_as_long
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1998-05-24,1999-02-05,257 days,1998-05-24
2,1992-04-22,1998-03-06,2144 days,1992-04-22
4,1991-02-10,1992-08-26,563 days,1991-02-10


In [135]:
shipping.dtypes

order_date        datetime64[ns]
delivery_date     datetime64[ns]
delivery_time    timedelta64[ns]
twice_as_long     datetime64[ns]
dtype: object

In [138]:
mask = shipping.delivery_time > "365 days"
shipping[mask]

Unnamed: 0_level_0,order_date,delivery_date,delivery_time,twice_as_long
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,1992-04-22,1998-03-06,2144 days,1992-04-22
4,1991-02-10,1992-08-26,563 days,1991-02-10
5,1992-07-21,1997-11-20,1948 days,1992-07-21
7,1993-09-02,1998-06-10,1742 days,1993-09-02
9,1990-01-25,1994-10-02,1711 days,1990-01-25
...,...,...,...,...
986,1990-12-10,1992-12-16,737 days,1990-12-10
990,1991-06-24,1996-02-02,1684 days,1991-06-24
991,1991-09-09,1998-03-30,2394 days,1991-09-09
993,1990-11-16,1998-04-27,2719 days,1990-11-16


In [139]:
mask = shipping.delivery_time < "365 days"
shipping[mask]

Unnamed: 0_level_0,order_date,delivery_date,delivery_time,twice_as_long
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1998-05-24,1999-02-05,257 days,1998-05-24
8,1993-06-10,1993-11-11,154 days,1993-06-10
19,1998-05-10,1998-05-19,9 days,1998-05-10
30,1998-10-22,1999-01-11,81 days,1998-10-22
46,1995-09-05,1996-07-19,318 days,1995-09-05
...,...,...,...,...
954,1993-08-08,1994-01-23,168 days,1993-08-08
969,1996-09-24,1996-11-16,53 days,1996-09-24
975,1997-06-18,1997-11-02,137 days,1997-06-18
985,1995-07-26,1996-06-18,328 days,1995-07-26


In [140]:
mask = shipping.delivery_time == "3423 days"
shipping[mask]

Unnamed: 0_level_0,order_date,delivery_date,delivery_time,twice_as_long
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
130,1990-04-02,1999-08-16,3423 days,1990-04-02


In [141]:
shipping.delivery_time.max()

Timedelta('3583 days 00:00:00')

In [142]:
shipping.delivery_time.min()

Timedelta('8 days 00:00:00')