In [1]:
import pandas as pd
from datetime import datetime
import numpy as np

In [2]:
df = pd.date_range(start='11/01/2019', end='11/07/2019', freq='H')
df

DatetimeIndex(['2019-11-01 00:00:00', '2019-11-01 01:00:00',
               '2019-11-01 02:00:00', '2019-11-01 03:00:00',
               '2019-11-01 04:00:00', '2019-11-01 05:00:00',
               '2019-11-01 06:00:00', '2019-11-01 07:00:00',
               '2019-11-01 08:00:00', '2019-11-01 09:00:00',
               ...
               '2019-11-06 15:00:00', '2019-11-06 16:00:00',
               '2019-11-06 17:00:00', '2019-11-06 18:00:00',
               '2019-11-06 19:00:00', '2019-11-06 20:00:00',
               '2019-11-06 21:00:00', '2019-11-06 22:00:00',
               '2019-11-06 23:00:00', '2019-11-07 00:00:00'],
              dtype='datetime64[ns]', length=145, freq='H')

In [3]:
len(df)

145

In [6]:
df = pd.DataFrame(df,columns=['date'])
df[:5]

Unnamed: 0,date
0,2019-11-01 00:00:00
1,2019-11-01 01:00:00
2,2019-11-01 02:00:00
3,2019-11-01 03:00:00
4,2019-11-01 04:00:00


In [8]:
df['sales'] = np.random.randint(0,1000,size=(len(df)))
df.head()

Unnamed: 0,date,sales
0,2019-11-01 00:00:00,460
1,2019-11-01 01:00:00,685
2,2019-11-01 02:00:00,50
3,2019-11-01 03:00:00,893
4,2019-11-01 04:00:00,76


In [10]:
df= df.set_index('date')
df.head()

Unnamed: 0_level_0,sales
date,Unnamed: 1_level_1
2019-11-01 00:00:00,460
2019-11-01 01:00:00,685
2019-11-01 02:00:00,50
2019-11-01 03:00:00,893
2019-11-01 04:00:00,76


In [12]:
df.loc['2019-11-01 04:00:00	','sales']

76

In [13]:
df.loc['2019-11-01 03:00:00']

sales    893
Name: 2019-11-01 03:00:00, dtype: int32

In [14]:
# Selecting an entire day
df.loc['2019-11-01']

# Similary you an use df.loc['2019-11'] to select and entire month

Unnamed: 0_level_0,sales
date,Unnamed: 1_level_1
2019-11-01 00:00:00,460
2019-11-01 01:00:00,685
2019-11-01 02:00:00,50
2019-11-01 03:00:00,893
2019-11-01 04:00:00,76
2019-11-01 05:00:00,674
2019-11-01 06:00:00,167
2019-11-01 07:00:00,179
2019-11-01 08:00:00,441
2019-11-01 09:00:00,573


In [15]:
df.loc['2019-11-01':'2019-11-02']

Unnamed: 0_level_0,sales
date,Unnamed: 1_level_1
2019-11-01 00:00:00,460
2019-11-01 01:00:00,685
2019-11-01 02:00:00,50
2019-11-01 03:00:00,893
2019-11-01 04:00:00,76
2019-11-01 05:00:00,674
2019-11-01 06:00:00,167
2019-11-01 07:00:00,179
2019-11-01 08:00:00,441
2019-11-01 09:00:00,573


In [16]:
# Using resample to get the average for each day per hour
df.resample('D').mean()
# D specifies DAY

Unnamed: 0_level_0,sales
date,Unnamed: 1_level_1
2019-11-01,452.25
2019-11-02,569.958333
2019-11-03,515.625
2019-11-04,545.875
2019-11-05,552.916667
2019-11-06,516.291667
2019-11-07,410.0


In [17]:
df.resample('T').mean()

Unnamed: 0_level_0,sales
date,Unnamed: 1_level_1
2019-11-01 00:00:00,460.0
2019-11-01 00:01:00,
2019-11-01 00:02:00,
2019-11-01 00:03:00,
2019-11-01 00:04:00,
...,...
2019-11-06 23:56:00,
2019-11-06 23:57:00,
2019-11-06 23:58:00,
2019-11-06 23:59:00,


## Resampling frequencies

- 'min', 'T' - minute
- ‘H’ - hour
- ‘D’ - day
- ‘B’ - business day
- ‘W’ - week
- ‘M’ - month
- ‘Q’ - quarter
- ‘A’ - year
- 'Siyam'

In [18]:
df = pd.DataFrame({'year':[2015,2016],
                  'month':[2,3],
                  'day':[4,5]})
df

Unnamed: 0,year,month,day
0,2015,2,4
1,2016,3,5


In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   year    2 non-null      int64
 1   month   2 non-null      int64
 2   day     2 non-null      int64
dtypes: int64(3)
memory usage: 176.0 bytes


In [20]:
pd.to_datetime(df)

0   2015-02-04
1   2016-03-05
dtype: datetime64[ns]

In [26]:
pd.to_datetime('2019-01-01', format='%Y-%m-%d', errors='ignore')

Timestamp('2019-01-01 00:00:00')