# Date and Time Methods

- basic python has a datetime object
- pandas has .dt methods

In [1]:
import numpy as np
import pandas as pd
from datetime import datetime

In [2]:
myyr = 2015
mymn = 1
mydy = 1
myhr = 2
mymin = 30
mysec = 15

In [5]:
# Can create with cascading values or match them explicitly
mydate = datetime(myyr, mymn, mydy, myhr, mymin, mysec)

In [6]:
mydate

datetime.datetime(2015, 1, 1, 2, 30, 15)

In [7]:
mydate.ctime

<function datetime.ctime>

In [8]:
mydate.year

2015

In [11]:
mydate.hour

2

In [12]:
myser = pd.Series(['Nov 3, 1990', '2000-01-01', None])

In [13]:
myser

0    Nov 3, 1990
1     2000-01-01
2           None
dtype: object

In [14]:
myser[0]

'Nov 3, 1990'

In [21]:
# Ask pandas to try understand what the datetime obj is
timeser = pd.to_datetime(myser) # Very powerful!
timeser

0   1990-11-03
1   2000-01-01
2          NaT
dtype: datetime64[ns]

In [22]:
timeser[0].year

1990

In [23]:
# But how does it know it is a euro or us date??
obvi_euro_d = '31-12-2000'

In [24]:
pd.to_datetime(obvi_euro_d)

Timestamp('2000-12-31 00:00:00')

In [25]:
euro_d = '10-12-2000'

In [26]:
pd.to_datetime(euro_d) # Pandas asumes it is american date if it is not obvious because it is made by US developers

Timestamp('2000-10-12 00:00:00')

In [29]:
pd.to_datetime(euro_d, dayfirst=True) # We can specify dayfirst to make sure it is euro date.

Timestamp('2000-12-10 00:00:00')

In [30]:
style_d = '12--Dec--2000'

In [31]:
# We may need to tell pandas the expected formatting of the date.
pd.to_datetime(style_d, format='%d--%b--%Y') # You need to get from official datetime docs what the codes are

Timestamp('2000-12-12 00:00:00')

In [32]:
custom_d_code = '12th of Dec 2000'

In [36]:
pd.to_datetime(custom_d_code) # It is really good at understanding dates!

Timestamp('2000-12-12 00:00:00')

In [37]:
sales = pd.read_csv('RetailSales_BeerWineLiquor.csv')

In [38]:
sales

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822
...,...,...
335,2019-12-01,6630
336,2020-01-01,4388
337,2020-02-01,4533
338,2020-03-01,5562


In [39]:
sales['DATE']

0      1992-01-01
1      1992-02-01
2      1992-03-01
3      1992-04-01
4      1992-05-01
          ...    
335    2019-12-01
336    2020-01-01
337    2020-02-01
338    2020-03-01
339    2020-04-01
Name: DATE, Length: 340, dtype: object

In [42]:
# Convert to datetime
sales['DATE'] = pd.to_datetime(sales['DATE'])
sales['DATE']

0     1992-01-01
1     1992-02-01
2     1992-03-01
3     1992-04-01
4     1992-05-01
         ...    
335   2019-12-01
336   2020-01-01
337   2020-02-01
338   2020-03-01
339   2020-04-01
Name: DATE, Length: 340, dtype: datetime64[ns]

In [43]:
# One last way to read in csv and parse dates right away.
# Automatically parse date for 1st col
sales = pd.read_csv('RetailSales_BeerWineLiquor.csv', parse_dates=[0])
sales

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822
...,...,...
335,2019-12-01,6630
336,2020-01-01,4388
337,2020-02-01,4533
338,2020-03-01,5562


In [44]:
# Resampling...

In [45]:
sales = sales.set_index('DATE')

In [48]:
sales.resample(rule='A').mean() # A = year end frequency...

Unnamed: 0_level_0,MRTSSM4453USN
DATE,Unnamed: 1_level_1
1992-12-31,1807.25
1993-12-31,1794.833333
1994-12-31,1841.75
1995-12-31,1833.916667
1996-12-31,1929.75
1997-12-31,2006.75
1998-12-31,2115.166667
1999-12-31,2206.333333
2000-12-31,2375.583333
2001-12-31,2468.416667


In [49]:
sales = pd.read_csv('RetailSales_BeerWineLiquor.csv', parse_dates=[0])
sales.head()

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822


In [50]:
sales.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 340 entries, 0 to 339
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   DATE           340 non-null    datetime64[ns]
 1   MRTSSM4453USN  340 non-null    int64         
dtypes: datetime64[ns](1), int64(1)
memory usage: 5.4 KB


In [51]:
# .dt methods!
sales['DATE'].dt.year

0      1992
1      1992
2      1992
3      1992
4      1992
       ... 
335    2019
336    2020
337    2020
338    2020
339    2020
Name: DATE, Length: 340, dtype: int64

In [52]:
sales['DATE'].dt.hour

0      0
1      0
2      0
3      0
4      0
      ..
335    0
336    0
337    0
338    0
339    0
Name: DATE, Length: 340, dtype: int64

In [53]:
sales['DATE'].dt.month

0       1
1       2
2       3
3       4
4       5
       ..
335    12
336     1
337     2
338     3
339     4
Name: DATE, Length: 340, dtype: int64