In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame([
                ["star", 2017, 10],
                 ["star", 2017, 20],
                 ["circle", 2017, 10],
                 ["circle", 2016, 20]
                    ],
                 columns=["name","year","spending"]
                 )

In [3]:
print(df)

     name  year  spending
0    star  2017        10
1    star  2017        20
2  circle  2017        10
3  circle  2016        20


In [4]:
# set a multiindex
# inplace=True means it will change this reference (df) instead of return a copy
df.set_index(['name','year'], inplace=True)

In [5]:
print(df)

             spending
name   year          
star   2017        10
       2017        20
circle 2017        10
       2016        20


In [9]:
# notice this changes the index from integer-looking things to these strings that used to be in the dataframe
# now you can use .loc on terms in this index instead of row numbers
# see https://pandas.pydata.org/pandas-docs/stable/advanced.html#advanced-indexing-with-hierarchical-index for more info
df.loc["circle", 2017]
# notice this is no longer like df.loc[0]!!



Unnamed: 0_level_0,Unnamed: 1_level_0,spending
name,year,Unnamed: 2_level_1
circle,2017,10


In [10]:
# can now use sum with the level attribute to set which levels of the multiindex to sum across
agg_df = df.sum(level=['name','year'])
print(agg_df)

             spending
name   year          
circle 2016        20
       2017        10
star   2017        30


In [25]:
# and now this aggregate one has the same multiindex as the original one, 
# so you can select specific data with .loc the same way
agg_df.loc['circle',2016]

spending    20
Name: (circle, 2016), dtype: int64