- Basic Python has a datetime object containing date and time information. Pandas allows us to easily extract information from a datetime object to use feature engineering
- For example having a recent timestamped sales data
- Pandas allows the extraction of information from the timestamp such as:
    - Day of the Week
    - Weekend vs Weekday
    - AM vs PM

# Time Methods For Date And Time Data

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
my_year = 2015
my_month = 1
my_day = 1
my_hour = 2
my_min = 30
my_sec = 15

In [3]:
my_date = datetime(my_year, my_month, my_day)

In [4]:
my_date

datetime.datetime(2015, 1, 1, 0, 0)

In [5]:
my_date_time = datetime(my_year, my_month, my_day, my_hour, my_min, my_sec)

In [6]:
my_date_time

datetime.datetime(2015, 1, 1, 2, 30, 15)

In [7]:
my_date_time.year

2015

In [43]:
my_ser = pd.Series(['Nov 3, 1990','01-04-2000',None])

In [44]:
my_ser

0    Nov 3, 1990
1     01-04-2000
2           None
dtype: object

In [62]:
time_ser = pd.to_datetime(my_ser, format='mixed', dayfirst=True)

In [67]:
time_ser[0].year # the year of the first element in time series

1990

In [76]:
euro_date = '20-11-1995'

In [77]:
pd.to_datetime(euro_date, dayfirst=True) # use dayfirst=True when converting the date to the European format d%m%Y

Timestamp('1995-11-20 00:00:00')

In [71]:
euro_date_2 = '10-12-2005'

In [73]:
pd.to_datetime(euro_date_2, yearfirst=True) #  use yearfirst=True when converting to an american date format m%d%Y

Timestamp('2005-10-12 00:00:00')

In [82]:
style_date = '12--Dec--1998'

In [84]:
pd.to_datetime(style_date, format='%d--%b--%Y')

Timestamp('1998-12-12 00:00:00')

In [87]:
custom_date = '12th of Nov 2007'

In [91]:
pd.to_datetime(custom_date, dayfirst=True)

Timestamp('2007-11-12 00:00:00')

In [92]:
sales = pd.read_csv('RetailSales_BeerWineLiquor.csv')

In [98]:
sales

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822
...,...,...
335,2019-12-01,6630
336,2020-01-01,4388
337,2020-02-01,4533
338,2020-03-01,5562


In [94]:
sales['DATE'] = pd.to_datetime(sales['DATE'])

In [95]:
sales['DATE']

0     1992-01-01
1     1992-02-01
2     1992-03-01
3     1992-04-01
4     1992-05-01
         ...    
335   2019-12-01
336   2020-01-01
337   2020-02-01
338   2020-03-01
339   2020-04-01
Name: DATE, Length: 340, dtype: datetime64[ns]

In [103]:
sales['DATE'][0].month

1

In [110]:
sales = pd.read_csv('RetailSales_BeerWineLiquor.csv', parse_dates=['DATE']) # you can specify the column by name or index when using parse_dates

In [111]:
sales['DATE']

0     1992-01-01
1     1992-02-01
2     1992-03-01
3     1992-04-01
4     1992-05-01
         ...    
335   2019-12-01
336   2020-01-01
337   2020-02-01
338   2020-03-01
339   2020-04-01
Name: DATE, Length: 340, dtype: datetime64[ns]

In [112]:
sales = sales.set_index('DATE') # allow resampling of the date column through passing a rule and an aggregation method

In [113]:
sales.head()

Unnamed: 0_level_0,MRTSSM4453USN
DATE,Unnamed: 1_level_1
1992-01-01,1509
1992-02-01,1541
1992-03-01,1597
1992-04-01,1675
1992-05-01,1822


In [115]:
sales.resample(rule='YE').mean() # group by year and get the average per year

Unnamed: 0_level_0,MRTSSM4453USN
DATE,Unnamed: 1_level_1
1992-12-31,1807.25
1993-12-31,1794.833333
1994-12-31,1841.75
1995-12-31,1833.916667
1996-12-31,1929.75
1997-12-31,2006.75
1998-12-31,2115.166667
1999-12-31,2206.333333
2000-12-31,2375.583333
2001-12-31,2468.416667


In [116]:
sales = pd.read_csv('RetailSales_BeerWineLiquor.csv', parse_dates=['DATE'])

In [117]:
sales.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 340 entries, 0 to 339
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   DATE           340 non-null    datetime64[ns]
 1   MRTSSM4453USN  340 non-null    int64         
dtypes: datetime64[ns](1), int64(1)
memory usage: 5.4 KB


In [118]:
sales['DATE'].dt.year

0      1992
1      1992
2      1992
3      1992
4      1992
       ... 
335    2019
336    2020
337    2020
338    2020
339    2020
Name: DATE, Length: 340, dtype: int32