In [1]:
from datetime import datetime

In [2]:
year = 2020
month = 1
day = 2
hour = 13
minute = 30
second = 15

In [3]:
date = datetime(year, month, day)

In [4]:
date

datetime.datetime(2020, 1, 2, 0, 0)

In [5]:
date_time = datetime(year, month, day, hour, minute, second)

In [6]:
date_time

datetime.datetime(2020, 1, 2, 13, 30, 15)

In [9]:
type(date_time)

datetime.datetime

## NumPy Datetime Arrays
We mentioned that NumPy handles dates more efficiently than Python's datetime format.<br>
The NumPy data type is called <em>datetime64</em> to distinguish it from Python's datetime.

In this section we'll show how to set up datetime arrays in NumPy. These will become useful later on in the course.<br>
For more info on NumPy visit https://docs.scipy.org/doc/numpy-1.15.4/reference/arrays.datetime.html

In [10]:
import numpy as np

In [11]:
np.array(['2020-03-15','2020-03-16','2020-03-17'])

array(['2020-03-15', '2020-03-16', '2020-03-17'], dtype='<U10')

In [14]:
np.array(['2020-03-15','2020-03-16','2020-03-17'], dtype='datetime64')

array(['2020-03-15', '2020-03-16', '2020-03-17'], dtype='datetime64[D]')

In [None]:
np.array(['2020-03-15','2020-03-16','2020-03-17'], dtype='datetime64[Y]')

In [15]:
np.array(['2020-03-15','2020-03-16','2020-03-17'], dtype='datetime64[h]')

array(['2020-03-15T00', '2020-03-16T00', '2020-03-17T00'],
      dtype='datetime64[h]')

## NumPy Date Ranges
Just as <tt>np.arange(start,stop,step)</tt> can be used to produce an array of evenly-spaced integers, we can pass a <tt>dtype</tt> argument to obtain an array of dates. Remember that the stop date is <em>exclusive</em>.

In [16]:
np.arange(0,10,2)

array([0, 2, 4, 6, 8])

In [18]:
np.arange('2017-06-01','2018-06-23', 7, dtype='datetime64[D]')

array(['2017-06-01', '2017-06-08', '2017-06-15', '2017-06-22',
       '2017-06-29', '2017-07-06', '2017-07-13', '2017-07-20',
       '2017-07-27', '2017-08-03', '2017-08-10', '2017-08-17',
       '2017-08-24', '2017-08-31', '2017-09-07', '2017-09-14',
       '2017-09-21', '2017-09-28', '2017-10-05', '2017-10-12',
       '2017-10-19', '2017-10-26', '2017-11-02', '2017-11-09',
       '2017-11-16', '2017-11-23', '2017-11-30', '2017-12-07',
       '2017-12-14', '2017-12-21', '2017-12-28', '2018-01-04',
       '2018-01-11', '2018-01-18', '2018-01-25', '2018-02-01',
       '2018-02-08', '2018-02-15', '2018-02-22', '2018-03-01',
       '2018-03-08', '2018-03-15', '2018-03-22', '2018-03-29',
       '2018-04-05', '2018-04-12', '2018-04-19', '2018-04-26',
       '2018-05-03', '2018-05-10', '2018-05-17', '2018-05-24',
       '2018-05-31', '2018-06-07', '2018-06-14', '2018-06-21'],
      dtype='datetime64[D]')

In [20]:
np.arange('1954','2022', dtype='datetime64[Y]')

array(['1954', '1955', '1956', '1957', '1958', '1959', '1960', '1961',
       '1962', '1963', '1964', '1965', '1966', '1967', '1968', '1969',
       '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977',
       '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985',
       '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993',
       '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001',
       '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009',
       '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017',
       '2018', '2019', '2020', '2021'], dtype='datetime64[Y]')

## Pandas Datetime Index

We'll usually deal with time series as a datetime index when working with pandas dataframes. Fortunately pandas has a lot of functions and methods to work with time series!<br>
For more on the pandas DatetimeIndex visit https://pandas.pydata.org/pandas-docs/stable/timeseries.html

In [21]:
import pandas as pd

In [22]:
pd.date_range('2020-01-01', periods=7, freq='D')

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05', '2020-01-06', '2020-01-07'],
              dtype='datetime64[ns]', freq='D')

<div class="alert alert-info"><strong>DatetimeIndex Frequencies:</strong> When we used <tt>pd.date_range()</tt> above, we had to pass in a frequency parameter <tt>'D'</tt>. This created a series of 7 dates spaced one day apart. We'll cover this topic in depth in upcoming lectures, but for now, a list of time series offset aliases like <tt>'D'</tt> can be found <a href='http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases'>here</a>.</div>

In [23]:
pd.date_range('Jan 01, 2018', periods=7, freq='D')

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06', '2018-01-07'],
              dtype='datetime64[ns]', freq='D')

In [25]:
# pd.date_range('Jan 01,2018', periods=7, freq='D') --> ERROR

In [33]:
pd.to_datetime(['1/2/2018','Jan 02, 2018','2/1/2022'])

# American format. January 2nd, February 1st

DatetimeIndex(['2018-01-02', '2018-01-02', '2022-02-01'], dtype='datetime64[ns]', freq=None)

In [32]:
pd.to_datetime(['1/2/2018','2/1/2022'], format='%d/%m/%Y')

DatetimeIndex(['2018-02-01', '2022-01-02'], dtype='datetime64[ns]', freq=None)

In [34]:
pd.to_datetime(['1--2--2018','2--1--2022'], format='%d--%m--%Y')

DatetimeIndex(['2018-02-01', '2022-01-02'], dtype='datetime64[ns]', freq=None)

In [36]:
data = np.random.randn(3,2)
cols = ['A','B']
print(data)

[[-1.96191911  0.36549312]
 [-0.33326045  1.34496014]
 [ 0.17724045 -0.74829175]]


In [37]:
idx = pd.date_range('2020-01-01', periods=3, freq='D')

df = pd.DataFrame(data, index=idx, columns=cols)

In [38]:
df

Unnamed: 0,A,B
2020-01-01,-1.961919,0.365493
2020-01-02,-0.33326,1.34496
2020-01-03,0.17724,-0.748292


In [39]:
df.index

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03'], dtype='datetime64[ns]', freq='D')

In [40]:
df.index.max()

Timestamp('2020-01-03 00:00:00', freq='D')

In [41]:
df.index.argmax()

2