In [97]:
import pandas as pd

In this exercise, we will use a stock dataset and see how we can utilize an index comprising of more than one column.

In [98]:
dates = pd.to_datetime( [ '11-Oct-2017', '13-Nov-2017', '24-Nov-2017', '10-Dec-2017'])
g_prices = [ 300.5, 329.0, 319.7, 287.5 ]
a_prices = [ 3300.5, 3129.0, 3319.7, 2987.5 ]

data = {
    'GOOG' : { d: p for d,p in zip(dates, g_prices) },
    'AAPL'  : { d: p for d,p in zip(dates, a_prices) }, 
    }

df = pd.DataFrame(data)
df.index.name = 'Date'
print(df)
print(df.index)

              AAPL   GOOG
Date                     
2017-10-11  3300.5  300.5
2017-11-13  3129.0  329.0
2017-11-24  3319.7  319.7
2017-12-10  2987.5  287.5
DatetimeIndex(['2017-10-11', '2017-11-13', '2017-11-24', '2017-12-10'], dtype='datetime64[ns]', name=u'Date', freq=None)


Now suppose we want to see this data in a slightly different format. We want the data to be stored using a combination of stock and dates as index. We can do that using a MultiIndex

## Multilevel Index

In [99]:
prices = { 'price': g_prices  + a_prices }

# Lets create a MultiIndex from a list of tuples
stockIndex = pd.MultiIndex.from_tuples ( [(stock,d) for stock in ['GOOG', 'AAPL'] for d in dates ] )

df = pd.DataFrame(data=prices, index= stockIndex)
df.index.names = ['stock', 'date']
df

Unnamed: 0_level_0,Unnamed: 1_level_0,price
stock,date,Unnamed: 2_level_1
GOOG,2017-10-11,300.5
GOOG,2017-11-13,329.0
GOOG,2017-11-24,319.7
GOOG,2017-12-10,287.5
AAPL,2017-10-11,3300.5
AAPL,2017-11-13,3129.0
AAPL,2017-11-24,3319.7
AAPL,2017-12-10,2987.5


In [100]:
# Now let's see what kind of index is used for this DataFrame
df.index

MultiIndex(levels=[[u'AAPL', u'GOOG'], [2017-10-11 00:00:00, 2017-11-13 00:00:00, 2017-11-24 00:00:00, 2017-12-10 00:00:00]],
           labels=[[1, 1, 1, 1, 0, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]],
           names=[u'stock', u'date'])

In [101]:
# Now let's sort the hierarchical index
df = df.sort_index()
df

Unnamed: 0_level_0,Unnamed: 1_level_0,price
stock,date,Unnamed: 2_level_1
AAPL,2017-10-11,3300.5
AAPL,2017-11-13,3129.0
AAPL,2017-11-24,3319.7
AAPL,2017-12-10,2987.5
GOOG,2017-10-11,300.5
GOOG,2017-11-13,329.0
GOOG,2017-11-24,319.7
GOOG,2017-12-10,287.5


## Slicing DataFrame using multilevel index

In [102]:
# We can use loc to read data from a dataframe with multi-dimentional index

# Let's get all records for Google
df.loc['GOOG']

Unnamed: 0_level_0,price
date,Unnamed: 1_level_1
2017-10-11,300.5
2017-11-13,329.0
2017-11-24,319.7
2017-12-10,287.5


This method uses the outermost index to slice the data

In [103]:
# We can also use the tuple notation to get the same record
df.loc[('GOOG')]

Unnamed: 0_level_0,price
date,Unnamed: 1_level_1
2017-10-11,300.5
2017-11-13,329.0
2017-11-24,319.7
2017-12-10,287.5


In [104]:
# Now lets get a specific day's record
df.loc[('GOOG', '2017-10-11')]

price    300.5
Name: (GOOG, 2017-10-11 00:00:00), dtype: float64

In [105]:
# To get the price for that day, we can specify the column to be extracted
# That will extract a single element from the data table 
df.loc[('GOOG', '2017-10-11'), 'price']

stock  date      
GOOG   2017-10-11    300.5
Name: price, dtype: float64

In [106]:
# Once we have a sorted hierarchical index, we do a variety of slicing

# We can use a range syntax
print("\n Using Range Syntax : ")
print( df.loc['AAPL':'GOOG'] )

# We can pass an array of keys
print("\n\n Using Array notation : ") 
print( df.loc[(['AAPL','GOOG'])] )

print("\n\n Using Array notation and specific column filter: ") 
df.loc[(['AAPL','GOOG'], '2017-10-11'), 'price' ]


 Using Range Syntax : 
                   price
stock date              
AAPL  2017-10-11  3300.5
      2017-11-13  3129.0
      2017-11-24  3319.7
      2017-12-10  2987.5
GOOG  2017-10-11   300.5
      2017-11-13   329.0
      2017-11-24   319.7
      2017-12-10   287.5


 Using Array notation : 
                   price
stock date              
AAPL  2017-10-11  3300.5
      2017-11-13  3129.0
      2017-11-24  3319.7
      2017-12-10  2987.5
GOOG  2017-10-11   300.5
      2017-11-13   329.0
      2017-11-24   319.7
      2017-12-10   287.5


 Using Array notation and specific column filter: 


stock  date      
AAPL   2017-10-11    3300.5
GOOG   2017-10-11     300.5
Name: price, dtype: float64

In [107]:
# This sort of indexing also works for the inner index
filterdates = [ '2017-10-11', '2017-12-10' ]
df.loc[('AAPL', filterdates ), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,price
stock,date,Unnamed: 2_level_1
AAPL,2017-10-11,3300.5
AAPL,2017-12-10,2987.5


In [115]:
# Now let's look at the scenario where we want to slice the DataFrame using only the innermost index
df.loc[ ( slice(None), '2017-10-11'), : ]

Unnamed: 0_level_0,Unnamed: 1_level_0,price
stock,date,Unnamed: 2_level_1
AAPL,2017-10-11,3300.5
GOOG,2017-10-11,300.5


In [109]:
# To select a range of dates we can use the following syntax
df.loc[ ( slice(None), slice('2017-10-11', '2017-11-24')), : ]

Unnamed: 0_level_0,Unnamed: 1_level_0,price
stock,date,Unnamed: 2_level_1
AAPL,2017-10-11,3300.5
AAPL,2017-11-13,3129.0
AAPL,2017-11-24,3319.7
GOOG,2017-10-11,300.5
GOOG,2017-11-13,329.0
GOOG,2017-11-24,319.7


## swaplevel

swaplevel can be used to rearrange the order of the levels 

In [112]:
# We can swap the index level using the function swaplevel
df2 = df.swaplevel('date', 'stock')
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,price
date,stock,Unnamed: 2_level_1
2017-10-11,AAPL,3300.5
2017-11-13,AAPL,3129.0
2017-11-24,AAPL,3319.7
2017-12-10,AAPL,2987.5
2017-10-11,GOOG,300.5
2017-11-13,GOOG,329.0
2017-11-24,GOOG,319.7
2017-12-10,GOOG,287.5


## sortlevel

sortlevel can be used to sort the data (stably) using only the values in a single level.

In [111]:
df2.sortlevel(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,price
date,stock,Unnamed: 2_level_1
2017-10-11,AAPL,3300.5
2017-10-11,GOOG,300.5
2017-11-13,AAPL,3129.0
2017-11-13,GOOG,329.0
2017-11-24,AAPL,3319.7
2017-11-24,GOOG,319.7
2017-12-10,AAPL,2987.5
2017-12-10,GOOG,287.5


In [113]:
df2.sortlevel(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,price
date,stock,Unnamed: 2_level_1
2017-10-11,AAPL,3300.5
2017-11-13,AAPL,3129.0
2017-11-24,AAPL,3319.7
2017-12-10,AAPL,2987.5
2017-10-11,GOOG,300.5
2017-11-13,GOOG,329.0
2017-11-24,GOOG,319.7
2017-12-10,GOOG,287.5
