In [1]:
import numpy as np
import pandas as pd
import datetime

# Time/date Overview

Converting string, np.datetime and default python datetime to pandas date time

In [2]:
dti = pd.to_datetime(['1/1/2018', np.datetime64('2018-01-01'),
   ...:                       datetime.datetime(2018, 1, 1)])

dti

DatetimeIndex(['2018-01-01', '2018-01-01', '2018-01-01'], dtype='datetime64[ns]', freq=None)

Generating a sequence

In [3]:
idx = pd.date_range('2020-1-1',periods=5,freq='M')
idx

DatetimeIndex(['2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30',
               '2020-05-31'],
              dtype='datetime64[ns]', freq='M')

In [4]:
ts=pd.DataFrame(range(len(idx)),index=idx,columns=['number'])
ts

Unnamed: 0,number
2020-01-31,0
2020-02-29,1
2020-03-31,2
2020-04-30,3
2020-05-31,4


In [5]:
ts.resample('D').mean()

Unnamed: 0,number
2020-01-31,0.0
2020-02-01,
2020-02-02,
2020-02-03,
2020-02-04,
...,...
2020-05-27,
2020-05-28,
2020-05-29,
2020-05-30,


Various built in functionalities

In [6]:
ts['Day of the Week']=ts.index.day_name().tolist()
ts

Unnamed: 0,number,Day of the Week
2020-01-31,0,Friday
2020-02-29,1,Saturday
2020-03-31,2,Tuesday
2020-04-30,3,Thursday
2020-05-31,4,Sunday


In [7]:
friday=ts.index[0]
print(friday)
print(friday.day_name())

2020-01-31 00:00:00
Friday


adding a day to date

In [8]:
saturday = friday + pd.Timedelta('1 d')
print(saturday)
print(saturday.day_name())

2020-02-01 00:00:00
Saturday


offseting to business days

In [9]:
tuesday = friday + pd.offsets.BDay(2)
print(tuesday)
print(tuesday.day_name())

2020-02-04 00:00:00
Tuesday


# Timestamps vs Time Spans

In [14]:
pd.Timestamp('2020/1/5')

Timestamp('2020-01-05 00:00:00')

In [17]:
pd.Timestamp(2012,12,25)

Timestamp('2012-12-25 00:00:00')

In [18]:
pd.Period('2011-1')

Period('2011-01', 'M')

In [19]:
pd.Period('2015-05',freq='D')

Period('2015-05-01', 'D')

In [24]:
dates = [pd.Timestamp('2012-05-01'),
   ....:          pd.Timestamp('2012-05-02'),
   ....:          pd.Timestamp('2012-05-03')]


ts=pd.Series(np.random.randint(100,size=3),index=dates)
ts

2012-05-01    55
2012-05-02    80
2012-05-03    21
dtype: int32

In [25]:
type(ts.index)

pandas.core.indexes.datetimes.DatetimeIndex

In [26]:
periods = [pd.Period('2012-01'), pd.Period('2012-02'), pd.Period('2012-03')]
ts = pd.Series(np.random.rand(3),index=periods)
ts

2012-01    0.668325
2012-02    0.009027
2012-03    0.493089
Freq: M, dtype: float64

In [28]:
ts.index

PeriodIndex(['2012-01', '2012-02', '2012-03'], dtype='period[M]', freq='M')

In [30]:
pd.to_datetime(pd.Series(['2009-3/1','1.15.20']))

0   2009-03-01
1   2020-01-15
dtype: datetime64[ns]

# Assembling datetime from multiple DF columns

In [31]:
df = pd.DataFrame({'year': [2015, 2016],
   ....:                    'month': [2, 3],
   ....:                    'day': [4, 5],
   ....:                    'hour': [2, 3]})

pd.to_datetime(df)

0   2015-02-04 02:00:00
1   2016-03-05 03:00:00
dtype: datetime64[ns]

In [33]:
df['ts']=pd.to_datetime(df.iloc[:,:-1])
df

Unnamed: 0,year,month,day,hour,ts
0,2015,2,4,2,2015-02-04
1,2016,3,5,3,2016-03-05


In [34]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   year    2 non-null      int64         
 1   month   2 non-null      int64         
 2   day     2 non-null      int64         
 3   hour    2 non-null      int64         
 4   ts      2 non-null      datetime64[ns]
dtypes: datetime64[ns](1), int64(4)
memory usage: 208.0 bytes


In [35]:
pd.bdate_range('1/1/2020','1/1/2025',freq='M')

DatetimeIndex(['2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30',
               '2020-05-31', '2020-06-30', '2020-07-31', '2020-08-31',
               '2020-09-30', '2020-10-31', '2020-11-30', '2020-12-31',
               '2021-01-31', '2021-02-28', '2021-03-31', '2021-04-30',
               '2021-05-31', '2021-06-30', '2021-07-31', '2021-08-31',
               '2021-09-30', '2021-10-31', '2021-11-30', '2021-12-31',
               '2022-01-31', '2022-02-28', '2022-03-31', '2022-04-30',
               '2022-05-31', '2022-06-30', '2022-07-31', '2022-08-31',
               '2022-09-30', '2022-10-31', '2022-11-30', '2022-12-31',
               '2023-01-31', '2023-02-28', '2023-03-31', '2023-04-30',
               '2023-05-31', '2023-06-30', '2023-07-31', '2023-08-31',
               '2023-09-30', '2023-10-31', '2023-11-30', '2023-12-31',
               '2024-01-31', '2024-02-29', '2024-03-31', '2024-04-30',
               '2024-05-31', '2024-06-30', '2024-07-31', '2024-08-31',
      

In [38]:
pd.date_range('1/1/2020','1/1/2025',freq='M')

DatetimeIndex(['2020-01-31', '2020-02-29', '2020-03-31', '2020-04-30',
               '2020-05-31', '2020-06-30', '2020-07-31', '2020-08-31',
               '2020-09-30', '2020-10-31', '2020-11-30', '2020-12-31',
               '2021-01-31', '2021-02-28', '2021-03-31', '2021-04-30',
               '2021-05-31', '2021-06-30', '2021-07-31', '2021-08-31',
               '2021-09-30', '2021-10-31', '2021-11-30', '2021-12-31',
               '2022-01-31', '2022-02-28', '2022-03-31', '2022-04-30',
               '2022-05-31', '2022-06-30', '2022-07-31', '2022-08-31',
               '2022-09-30', '2022-10-31', '2022-11-30', '2022-12-31',
               '2023-01-31', '2023-02-28', '2023-03-31', '2023-04-30',
               '2023-05-31', '2023-06-30', '2023-07-31', '2023-08-31',
               '2023-09-30', '2023-10-31', '2023-11-30', '2023-12-31',
               '2024-01-31', '2024-02-29', '2024-03-31', '2024-04-30',
               '2024-05-31', '2024-06-30', '2024-07-31', '2024-08-31',
      

In [40]:
pd.Timedelta('2020/1/1',unit='M')

ValueError: Units 'M' and 'Y' are no longer supported, as they do not represent unambiguous timedelta values durations.

In [41]:
df=pd.DataFrame(np.random.randint(100,size=(50,2)),columns=list('AB'))
df

Unnamed: 0,A,B
0,21,6
1,19,79
2,17,42
3,27,16
4,70,61
5,14,41
6,42,33
7,96,80
8,47,8
9,45,80


In [42]:
df['time']=pd.date_range('1/1/2002',freq='M',periods=50)

In [44]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   A       50 non-null     int32         
 1   B       50 non-null     int32         
 2   time    50 non-null     datetime64[ns]
dtypes: datetime64[ns](1), int32(2)
memory usage: 928.0 bytes


In [46]:
df['dateName']=df.time.dt.day_name()

In [49]:
df['offset']=df.time + pd.offsets.BDay(3)
df['offsetName']=df.offset.dt.day_name()
df

Unnamed: 0,A,B,time,dateName,offset,offsetName
0,21,6,2002-01-31,Thursday,2002-02-05,Tuesday
1,19,79,2002-02-28,Thursday,2002-03-05,Tuesday
2,17,42,2002-03-31,Sunday,2002-04-03,Wednesday
3,27,16,2002-04-30,Tuesday,2002-05-03,Friday
4,70,61,2002-05-31,Friday,2002-06-05,Wednesday
5,14,41,2002-06-30,Sunday,2002-07-03,Wednesday
6,42,33,2002-07-31,Wednesday,2002-08-05,Monday
7,96,80,2002-08-31,Saturday,2002-09-04,Wednesday
8,47,8,2002-09-30,Monday,2002-10-03,Thursday
9,45,80,2002-10-31,Thursday,2002-11-05,Tuesday


In [51]:
df['year']=np.random.randint(2000,2021,size=(len(df)))
df['month']=np.random.randint(1,13,size=(len(df)))
df['day']=np.random.randint(1,28,size=(len(df)))

In [52]:
df.head()

Unnamed: 0,A,B,time,dateName,offset,offsetName,year,month,day
0,21,6,2002-01-31,Thursday,2002-02-05,Tuesday,2006,5,3
1,19,79,2002-02-28,Thursday,2002-03-05,Tuesday,2005,7,15
2,17,42,2002-03-31,Sunday,2002-04-03,Wednesday,2004,2,27
3,27,16,2002-04-30,Tuesday,2002-05-03,Friday,2019,3,1
4,70,61,2002-05-31,Friday,2002-06-05,Wednesday,2016,1,27


In [57]:
df['combinedDate']=pd.to_datetime(df.iloc(axis=1)[-3:])

In [58]:
df.head()

Unnamed: 0,A,B,time,dateName,offset,offsetName,year,month,day,combinedDate
0,21,6,2002-01-31,Thursday,2002-02-05,Tuesday,2006,5,3,2006-05-03
1,19,79,2002-02-28,Thursday,2002-03-05,Tuesday,2005,7,15,2005-07-15
2,17,42,2002-03-31,Sunday,2002-04-03,Wednesday,2004,2,27,2004-02-27
3,27,16,2002-04-30,Tuesday,2002-05-03,Friday,2019,3,1,2019-03-01
4,70,61,2002-05-31,Friday,2002-06-05,Wednesday,2016,1,27,2016-01-27


In [59]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   A             50 non-null     int32         
 1   B             50 non-null     int32         
 2   time          50 non-null     datetime64[ns]
 3   dateName      50 non-null     object        
 4   offset        50 non-null     datetime64[ns]
 5   offsetName    50 non-null     object        
 6   year          50 non-null     int32         
 7   month         50 non-null     int32         
 8   day           50 non-null     int32         
 9   combinedDate  50 non-null     datetime64[ns]
dtypes: datetime64[ns](3), int32(5), object(2)
memory usage: 3.1+ KB


In [73]:
df['dateoffset2']=df.combinedDate+pd.Timedelta(days=5)
df.head()

Unnamed: 0,A,B,time,dateName,offset,offsetName,year,month,day,combinedDate,dateoffset2
0,21,6,2002-01-31,Thursday,2002-02-05,Tuesday,2006,5,3,2006-05-03,2006-05-08
1,19,79,2002-02-28,Thursday,2002-03-05,Tuesday,2005,7,15,2005-07-15,2005-07-20
2,17,42,2002-03-31,Sunday,2002-04-03,Wednesday,2004,2,27,2004-02-27,2004-03-03
3,27,16,2002-04-30,Tuesday,2002-05-03,Friday,2019,3,1,2019-03-01,2019-03-06
4,70,61,2002-05-31,Friday,2002-06-05,Wednesday,2016,1,27,2016-01-27,2016-02-01


# Resample

In [75]:
rng=pd.date_range('1/1/2020',periods=100,freq='S')

ts = pd.Series(np.random.randint(0,550,len(rng)),index=rng)
ts

2020-01-01 00:00:00    134
2020-01-01 00:00:01    446
2020-01-01 00:00:02     66
2020-01-01 00:00:03    169
2020-01-01 00:00:04    327
                      ... 
2020-01-01 00:01:35    531
2020-01-01 00:01:36    374
2020-01-01 00:01:37    536
2020-01-01 00:01:38    292
2020-01-01 00:01:39    314
Freq: S, Length: 100, dtype: int32

In [76]:
ts.resample('5Min').sum()

2020-01-01    27106
Freq: 5T, dtype: int32