# Time Methods

## Python Datetime Review

In [1]:
from datetime import datetime

In [2]:
# To illustrate the order of arguments
my_year = 2017
my_month = 1
my_day = 2
my_hour = 13
my_minute = 30
my_second = 15

In [3]:
# January 2nd, 2017
my_date = datetime(my_year, my_month, my_day)

In [4]:
# Defaults to 0:00
my_date

datetime.datetime(2017, 1, 2, 0, 0)

In [6]:
# January 2nd, 2017 t 13:30:15
my_date_time = datetime(my_year, my_month, my_day, my_hour, my_minute, my_second)

In [7]:
my_date_time

datetime.datetime(2017, 1, 2, 13, 30, 15)

You can grab any part of the datetime object you want

In [8]:
my_date_time.day

2

In [9]:
my_date_time.hour

13

# Pandas

## Converting to datetime

In [10]:
import pandas as pd

In [11]:
myser = pd.Series(['Nov 3, 2000', '2000-01-01', None])

In [12]:
myser

0    Nov 3, 2000
1     2000-01-01
2           None
dtype: object

## pd.to_datetime()

In [14]:
pd.to_datetime(myser, format='mixed')

0   2000-11-03
1   2000-01-01
2          NaT
dtype: datetime64[ns]

## Custom Time String Formatting
Sometimes dates can have a non standard format, luckily you can always specify to pandas the format. You should also note this could speed up the conversion, so it may be worth doing even if pandas can parse on its own.

In [16]:
style_date = '12--Dec--2000'

In [17]:
pd.to_datetime(style_date, format='%d--%b--%Y')

Timestamp('2000-12-12 00:00:00')

In [18]:
strange_date = '12th of Dec 2000'

In [19]:
pd.to_datetime(strange_date)

Timestamp('2000-12-12 00:00:00')

## Data

In [20]:
sales = pd.read_csv('../data/RetailSales_BeerWineLiquor.csv')

In [21]:
sales

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822
...,...,...
335,2019-12-01,6630
336,2020-01-01,4388
337,2020-02-01,4533
338,2020-03-01,5562


In [22]:
sales.dtypes

DATE             object
MRTSSM4453USN     int64
dtype: object

In [23]:
type(sales.iloc[0]['DATE'])

str

In [24]:
sales['DATE'] = pd.to_datetime(sales['DATE'])

In [25]:
sales

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822
...,...,...
335,2019-12-01,6630
336,2020-01-01,4388
337,2020-02-01,4533
338,2020-03-01,5562


In [29]:
sales.dtypes

DATE             datetime64[ns]
MRTSSM4453USN             int64
dtype: object

----

## Attempt to Parse Dates Automatically

In [31]:
# Parse Column at Index 0 as Datetime
sales = pd.read_csv('../data/RetailSales_BeerWineLiquor.csv', parse_dates=[0])

In [32]:
sales

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822
...,...,...
335,2019-12-01,6630
336,2020-01-01,4388
337,2020-02-01,4533
338,2020-03-01,5562


In [33]:
type(sales.iloc[0]['DATE'])

pandas._libs.tslibs.timestamps.Timestamp

## Resample



In [34]:
# Our index
sales.index

RangeIndex(start=0, stop=340, step=1)

In [35]:
# Reset DATE to index

In [36]:
sales = sales.set_index("DATE")

In [37]:
sales

Unnamed: 0_level_0,MRTSSM4453USN
DATE,Unnamed: 1_level_1
1992-01-01,1509
1992-02-01,1541
1992-03-01,1597
1992-04-01,1675
1992-05-01,1822
...,...
2019-12-01,6630
2020-01-01,4388
2020-02-01,4533
2020-03-01,5562


In [38]:
# Yearly Means
sales.resample(rule='A').mean()

Unnamed: 0_level_0,MRTSSM4453USN
DATE,Unnamed: 1_level_1
1992-12-31,1807.25
1993-12-31,1794.833333
1994-12-31,1841.75
1995-12-31,1833.916667
1996-12-31,1929.75
1997-12-31,2006.75
1998-12-31,2115.166667
1999-12-31,2206.333333
2000-12-31,2375.583333
2001-12-31,2468.416667


## .dt Method Calls

In [39]:
sales = sales.reset_index()

In [40]:
sales

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822
...,...,...
335,2019-12-01,6630
336,2020-01-01,4388
337,2020-02-01,4533
338,2020-03-01,5562


In [41]:
sales['DATE'].dt.month

0       1
1       2
2       3
3       4
4       5
       ..
335    12
336     1
337     2
338     3
339     4
Name: DATE, Length: 340, dtype: int32

In [42]:
sales['DATE'].dt.is_leap_year

0       True
1       True
2       True
3       True
4       True
       ...  
335    False
336     True
337     True
338     True
339     True
Name: DATE, Length: 340, dtype: bool

In [43]:
sales[sales['DATE'].dt.is_leap_year]

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822
...,...,...
299,2016-12-01,6057
336,2020-01-01,4388
337,2020-02-01,4533
338,2020-03-01,5562
