<h1 style="color:blue" align="center">Pandas Time Series Tutorial: DateTimeIndex</h1>

In [37]:
import pandas as pd
df = pd.read_csv("aapl.csv",parse_dates=["Date"], index_col="Date")
df.head(2)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-07-07,142.9,144.75,142.9,144.18,19201712
2017-07-06,143.02,143.5,142.41,142.73,24128782


In [34]:
type(df.Date[0])

pandas._libs.tslib.Timestamp

In [2]:
df.index

DatetimeIndex(['2017-07-07', '2017-07-06', '2017-07-05', '2017-07-03',
               '2017-06-30', '2017-06-29', '2017-06-28', '2017-06-27',
               '2017-06-26', '2017-06-23',
               ...
               '2016-07-22', '2016-07-21', '2016-07-20', '2016-07-19',
               '2016-07-18', '2016-07-15', '2016-07-14', '2016-07-13',
               '2016-07-12', '2016-07-11'],
              dtype='datetime64[ns]', name='Date', length=251, freq=None)

<h3 style="color:purple">What is DatetimeIndex? Benefits of it</h3>

<h4> (1) Partial Date Index: Select Specific Months Data</h4>

In [3]:
df['2017-06-30']

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-06-30,144.45,144.96,143.78,144.02,23024107


In [4]:
df['2017-06'].head() 

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-06-30,144.45,144.96,143.78,144.02,23024107
2017-06-29,144.71,145.13,142.28,143.68,31499368
2017-06-28,144.49,146.11,143.16,145.83,22082432
2017-06-27,145.01,146.16,143.62,143.73,24761891
2017-06-26,147.17,148.28,145.38,145.82,25692361


<h4>Average price of aapl's stock in June, 2017</h4>

In [5]:
df['2017-06'].Close.mean()

147.8313636363636

In [6]:
df['2017'].head(2) 

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-07-07,142.9,144.75,142.9,144.18,19201712
2017-07-06,143.02,143.5,142.41,142.73,24128782


<h4>(2) Select Date Range</h4>

In [7]:
df['2017-01-08':'2017-01-03']

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-06,116.78,118.16,116.47,117.91,31751900
2017-01-05,115.92,116.86,115.81,116.61,22193587
2017-01-04,115.85,116.51,115.75,116.02,21118116
2017-01-03,115.8,116.33,114.76,116.15,28781865


In [8]:
df['2017-01']

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-31,121.15,121.39,120.62,121.35,49200993
2017-01-30,120.93,121.63,120.66,121.63,30377503
2017-01-27,122.14,122.35,121.6,121.95,20562944
2017-01-26,121.67,122.44,121.6,121.94,26337576
2017-01-25,120.42,122.1,120.28,121.88,32586673
2017-01-24,119.55,120.1,119.5,119.97,23211038
2017-01-23,120.0,120.81,119.77,120.08,22050218
2017-01-20,120.45,120.45,119.73,120.0,32597892
2017-01-19,119.4,120.09,119.37,119.78,25597291
2017-01-18,120.0,120.5,119.71,119.99,23712961


<h3 style="color:purple">Resampling</h3>

In [42]:
df['Close'].resample('M').mean().head()

Date
2016-07-31     99.473333
2016-08-31    107.665217
2016-09-30    110.857143
2016-10-31    115.707143
2016-11-30    110.154286
Freq: M, Name: Close, dtype: float64

In [10]:
df['2016-07']

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-07-29,104.19,104.55,103.68,104.21,27733688
2016-07-28,102.83,104.45,102.82,104.34,39869839
2016-07-27,104.26,104.35,102.75,102.95,92344820
2016-07-26,96.82,97.97,96.42,96.67,56239822
2016-07-25,98.25,98.84,96.92,97.34,40382921
2016-07-22,99.26,99.3,98.31,98.66,28313669
2016-07-21,99.83,101.0,99.13,99.43,32702028
2016-07-20,100.0,100.46,99.74,99.96,26275968
2016-07-19,99.56,100.0,99.34,99.87,23779924
2016-07-18,98.7,100.13,98.6,99.83,36493867


In [None]:
%matplotlib inline
df['Close'].plot()

In [12]:
df['Close'].resample('M').mean().plot(kind='bar')

<matplotlib.axes._subplots.AxesSubplot at 0x21037a3c4a8>