# Working with dates and time in Pandas

Source article for this example: https://towardsdatascience.com/basic-time-series-manipulation-with-pandas-4432afee64ea

## Creating dummy data

In [1]:
import pandas as pd
from datetime import datetime
import numpy as np

# Create some dummy data to represent a problem
date_rng = pd.date_range(start='1/1/2017', end='5/15/2017', freq='3H')
df = pd.DataFrame(date_rng, columns=['date'])
df['Column 1'] = np.random.randint(0,100,size=(len(date_rng)))
df['Column 2'] = np.random.randint(0,100,size=(len(date_rng)))
df['Column 3'] = np.random.randint(0,100,size=(len(date_rng)))
df['Column 4'] = np.random.randint(0,100,size=(len(date_rng)))
                               
# Set datetimes as index instead of the default '0, 1, 2' etc.  
df['datetime'] = pd.to_datetime(df['date'])
df = df.set_index('datetime')
df.drop(['date'], axis=1, inplace=True)
                               
df

  date_rng = pd.date_range(start='1/1/2017', end='5/15/2017', freq='3H')


Unnamed: 0_level_0,Column 1,Column 2,Column 3,Column 4
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-01-01 00:00:00,47,66,93,67
2017-01-01 03:00:00,29,54,67,44
2017-01-01 06:00:00,82,38,60,32
2017-01-01 09:00:00,97,59,8,40
2017-01-01 12:00:00,44,29,44,86
...,...,...,...,...
2017-05-14 12:00:00,37,70,39,17
2017-05-14 15:00:00,65,47,69,93
2017-05-14 18:00:00,32,40,17,34
2017-05-14 21:00:00,0,26,65,82


## Extract fast statistics

In [2]:
df.describe()

Unnamed: 0,Column 1,Column 2,Column 3,Column 4
count,1073.0,1073.0,1073.0,1073.0
mean,50.434296,49.860205,49.71109,49.899348
std,29.404437,28.706197,29.089742,29.047843
min,0.0,0.0,0.0,0.0
25%,25.0,26.0,25.0,25.0
50%,51.0,50.0,49.0,50.0
75%,77.0,75.0,75.0,75.0
max,99.0,99.0,99.0,99.0


## Extracting data for specific day of the month

In [3]:
df[df.index.day == 2]

Unnamed: 0_level_0,Column 1,Column 2,Column 3,Column 4
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-01-02 00:00:00,64,54,96,41
2017-01-02 03:00:00,24,65,60,60
2017-01-02 06:00:00,77,56,62,16
2017-01-02 09:00:00,86,93,62,6
2017-01-02 12:00:00,53,40,67,81
2017-01-02 15:00:00,51,98,17,32
2017-01-02 18:00:00,89,12,42,16
2017-01-02 21:00:00,19,56,52,96
2017-02-02 00:00:00,86,52,57,12
2017-02-02 03:00:00,44,48,11,99


## Extracting data for specific date

In [4]:
df['2017-01-04']

KeyError: '2017-01-04'

## Extracting data between two dates

In [None]:
df['2017-03-04':'2017-03-06']

## Compute statistics over time interval

There are many built-in arguments for the `resample` method, some basic ones include:  
* `'H'` for hours
* `'D'` for days
* `'B'` for business days (weekdays)
* `'M'` for months
* `'Y'` for years

In [None]:
df.resample('M').mean()

In [None]:
df.resample('5D').max()