In [1]:
import pandas as pd
import numpy as np

# Timestamp

In [2]:
# The first Pandas time manipulation is Timestamps
# WE can create or work with timestamps with pandas upper level functions
pd.Timestamp("15/9/2021 10:15 AM")

Timestamp('2021-09-15 10:15:00')

In [4]:
# we can also create timestamps by passing the parameters: year, month, date, hour, minute
pd.Timestamp(2021,9,15,10,15)

Timestamp('2021-09-15 10:15:00')

In [6]:
# timestamps have very useful attributes/methods, for example isoweekday.
# isoweekday returns the day of the week from 1 (monday) to 7 (sunday)
pd.Timestamp(2021,9,15,10,15).isoweekday()

3

In [7]:
# other paramters
pd.Timestamp(2021,9,15,10,15,30).second

30

# Period

In [8]:
# we can also request a SPAN instead of a specific moment.
pd.Period("02/2021")

Period('2021-02', 'M')

In [9]:
# the period class automatically detects the appropiate span depending on inputs
# in this case, this change the perood to a day
pd.Period("05/02/2021")

Period('2021-05-02', 'D')

In [10]:
# periods represent the complete span and encapsulate the timespan and allows for arithmetic operations 
pd.Period("02/2021") + 4

Period('2021-06', 'M')

# Datetimeindex and Periodindex

In [14]:
# the index of a datetime is a DateTimeIndex type
series1 = pd.Series(["a","b","c"],index=[pd.Timestamp("2021/02/15"),pd.Timestamp("2021/02/15"),pd.Timestamp("2021/02/16")])
series1

2021-02-15    a
2021-02-15    b
2021-02-16    c
dtype: object

In [15]:
series1.index

DatetimeIndex(['2021-02-15', '2021-02-15', '2021-02-16'], dtype='datetime64[ns]', freq=None)

In [16]:
# you can also pass a period 
series1 = pd.Series(["a","b","c"],index=[pd.Period("2021/02"),pd.Period("2021/02"),pd.Period("2021/02")])
series1

2021-02    a
2021-02    b
2021-02    c
Freq: M, dtype: object

In [17]:
series1.index

PeriodIndex(['2021-02', '2021-02', '2021-02'], dtype='period[M]', freq='M')

# Convert to DateTime

In [26]:
# Pandas can also convert messy dateformats into unified format
# creating a sample dataframe
## the size of the randint is to match the DF shape
indexs = ["2 June 2020", "2020-06-06", "June 4, 2020", "05/06/2020"]
df = pd.DataFrame(np.random.randint(low=10, high=100, size=(4,2)), index=indexs, columns=["a","b"])
df

Unnamed: 0,a,b
2 June 2020,19,25
2020-06-06,37,13
"June 4, 2020",35,58
05/06/2020,39,69


In [27]:
# we can change the index to an appropiate datetime format using the to_datetime upper level function
df.index = pd.to_datetime(df.index)
df

Unnamed: 0,a,b
2020-06-02,19,25
2020-06-06,37,13
2020-06-04,35,58
2020-05-06,39,69


# Timedeltas

In [29]:
# these are differences in time and they allows some interesting date manipulation
# the difference between two timestamps is a timedelta class
pd.Timestamp("02/02/2021") - pd.Timestamp("15/02/2021")

Timedelta('-13 days +00:00:00')

In [31]:
# you can also operate with timedeltas
# this adds 10 days to the timestamp
pd.Timestamp("02/02/2021") + pd.Timedelta("10D")

Timestamp('2021-02-12 00:00:00')

# Offsets

In [33]:
# offsest are similar to timedeltas but they are more flexible and more related to business-times
pd.Timestamp("02/02/2021").day_of_week

1

In [34]:
# for example lets add a week!
pd.Timestamp("02/02/2021") + pd.offsets.Week()

Timestamp('2021-02-09 00:00:00')

In [35]:
# for example lets find the end of the month
pd.Timestamp("02/02/2021") + pd.offsets.MonthEnd()

Timestamp('2021-02-28 00:00:00')

In [36]:
# or the end of the quarter
pd.Timestamp("02/02/2021") + pd.offsets.QuarterEnd()

Timestamp('2021-03-31 00:00:00')

# Working with dates

In [46]:
# we can also create indexes from a date_range!
# date_range takes the following arguments 
##Start(date), 
##Periods (how many times to generate). in this example, 12 STEPS
##and frequency,freq (works as STEPS). In this example the FREQ is by-weekly
indexs = pd.date_range(start=pd.Timestamp.now(), periods= 12, freq="2W")
indexs

DatetimeIndex(['2021-02-07 21:36:51.797916', '2021-02-21 21:36:51.797916',
               '2021-03-07 21:36:51.797916', '2021-03-21 21:36:51.797916',
               '2021-04-04 21:36:51.797916', '2021-04-18 21:36:51.797916',
               '2021-05-02 21:36:51.797916', '2021-05-16 21:36:51.797916',
               '2021-05-30 21:36:51.797916', '2021-06-13 21:36:51.797916',
               '2021-06-27 21:36:51.797916', '2021-07-11 21:36:51.797916'],
              dtype='datetime64[ns]', freq='2W-SUN')

In [47]:
# we can also pass Busiess days, for example
indexs = pd.date_range(start=pd.Timestamp.now(), periods= 12, freq="B")
indexs

DatetimeIndex(['2021-02-08 21:38:20.030250', '2021-02-09 21:38:20.030250',
               '2021-02-10 21:38:20.030250', '2021-02-11 21:38:20.030250',
               '2021-02-12 21:38:20.030250', '2021-02-15 21:38:20.030250',
               '2021-02-16 21:38:20.030250', '2021-02-17 21:38:20.030250',
               '2021-02-18 21:38:20.030250', '2021-02-19 21:38:20.030250',
               '2021-02-22 21:38:20.030250', '2021-02-23 21:38:20.030250'],
              dtype='datetime64[ns]', freq='B')

In [50]:
# we can also pass quarterly and even set the start of the quarter to FEBRUARY.
indexs = pd.date_range(start=pd.Timestamp.now(), periods= 12, freq="QS-FEB")
indexs

DatetimeIndex(['2021-05-01 21:39:16.852331', '2021-08-01 21:39:16.852331',
               '2021-11-01 21:39:16.852331', '2022-02-01 21:39:16.852331',
               '2022-05-01 21:39:16.852331', '2022-08-01 21:39:16.852331',
               '2022-11-01 21:39:16.852331', '2023-02-01 21:39:16.852331',
               '2023-05-01 21:39:16.852331', '2023-08-01 21:39:16.852331',
               '2023-11-01 21:39:16.852331', '2024-02-01 21:39:16.852331'],
              dtype='datetime64[ns]', freq='QS-FEB')

In [54]:
# now let's explore working with datetimeindexes in dataframes
indexs = pd.date_range(start=pd.Timestamp.now(), periods= 3, freq="D")
df = pd.DataFrame([{"a":np.random.randint(1,10),"b":np.random.randint(1,10)},
                  {"a":np.random.randint(1,10),"b":np.random.randint(1,10)},
                  {"a":np.random.randint(1,10),"b":np.random.randint(1,10)}],
                 index=indexs)
df

Unnamed: 0,a,b
2021-02-07 21:44:53.824169,8,7
2021-02-08 21:44:53.824169,1,1
2021-02-09 21:44:53.824169,1,3


In [60]:
# we can explore the day for each entry
df.index.dayofweek

Int64Index([6, 0, 1], dtype='int64')

In [65]:
# we can also summarize data based on dates using resampling, similar to grouping.
# this example summarizes per week
df.resample("W").sum()

Unnamed: 0,a,b
2021-02-07,8,7
2021-02-14,2,4


In [66]:
# we can also use slicing
df.loc["2021-02-07"]

Unnamed: 0,a,b
2021-02-07 21:44:53.824169,8,7


In [68]:
#here we want the values from BEFORE a date, INCLUSIVE
df.loc[:"2021-02-08"]

Unnamed: 0,a,b
2021-02-07 21:44:53.824169,8,7
2021-02-08 21:44:53.824169,1,1
