# Multi-Index DataFrames
### Multi-Index DataFrames are genreally created through aggregation operations
* They are stores as a list of tuples, with an item for each layer of the index

In [2]:
import numpy as np
import pandas as pd

In [3]:
# Generally we will see these when were performing aggregation operation. 
# But we might tend to avoid this with the .groupby(as_index=False) to prevent making multi-index DataFrames
premier_league_full = pd.read_excel("../Agg_&_Reshape_DataFrames/premier_league_games_full.xlsx")


In [4]:
agg_prem_league = premier_league_full.groupby(['season', 'HomeTeam'])[['HomeGoals']].sum()
agg_prem_league.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,HomeGoals
season,HomeTeam,Unnamed: 2_level_1
2008/2009,Arsenal,31
2008/2009,Aston Villa,27
2008/2009,Blackburn Rovers,22
2008/2009,Bolton Wanderers,21
2008/2009,Chelsea,33


In [5]:
agg_prem_league.loc[("2008/2009", "Arsenal"):("2008/2009", "Bolton Wanderers")] 
# you can select a single row of data by adding parenthasis and mutiple rows with a slice

Unnamed: 0_level_0,Unnamed: 1_level_0,HomeGoals
season,HomeTeam,Unnamed: 2_level_1
2008/2009,Arsenal,31
2008/2009,Aston Villa,27
2008/2009,Blackburn Rovers,22
2008/2009,Bolton Wanderers,21


In [6]:
agg_prem_league.loc["2008/2009": "2010/2011"] # use a slice to grabe mutiple rows

Unnamed: 0_level_0,Unnamed: 1_level_0,HomeGoals
season,HomeTeam,Unnamed: 2_level_1
2008/2009,Arsenal,31
2008/2009,Aston Villa,27
2008/2009,Blackburn Rovers,22
2008/2009,Bolton Wanderers,21
2008/2009,Chelsea,33
2008/2009,Everton,31
2008/2009,Fulham,28
2008/2009,Hull City,18
2008/2009,Liverpool,41
2008/2009,Manchester City,40


In [7]:
agg_prem_league.iloc[1] # you can also grab rows with .iloc. amd use slicing to grab multiple

HomeGoals    27
Name: (2008/2009, Aston Villa), dtype: int64

In [8]:
agg_prem_league = premier_league_full.groupby(['season', 'HomeTeam'])[['HomeGoals']].agg({"HomeGoals": ['sum', 'mean']})
# you can use the .agg() method to apply some aggregate analysis
agg_prem_league.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,HomeGoals,HomeGoals
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,mean
season,HomeTeam,Unnamed: 2_level_2,Unnamed: 3_level_2
2008/2009,Arsenal,31,1.631579
2008/2009,Aston Villa,27,1.421053
2008/2009,Blackburn Rovers,22,1.157895
2008/2009,Bolton Wanderers,21,1.105263
2008/2009,Chelsea,33,1.736842


In [9]:
agg_prem_league.iloc[0, 1] # we can use .iloc to grab specific values from our DataFrame with integer based indexing

1.631578947368421

In [10]:
agg_prem_league.loc["2010/2011", ("HomeGoals", "mean")] # we can use .loc to access inxed based on our labels

HomeTeam
Arsenal                    1.736842
Aston Villa                1.368421
Birmingham City            1.000000
Blackburn Rovers           1.157895
Blackpool                  1.578947
Bolton Wanderers           1.789474
Chelsea                    2.052632
Everton                    1.631579
Fulham                     1.578947
Liverpool                  1.947368
Manchester City            1.789474
Manchester United          2.578947
Newcastle United           2.157895
Stoke City                 1.631579
Sunderland                 1.315789
Tottenham Hotspur          1.578947
West Bromwich Albion       1.578947
West Ham United            1.263158
Wigan Athletic             1.157895
Wolverhampton Wanderers    1.578947
Name: (HomeGoals, mean), dtype: float64

# Modifying Multi-Index DataFrames
### There are several ways to modify multi-index DataFrames
* Reset the index. Moves the index level back to DataFrme columns <b>.reset_index()</b>
* Swap the index level. Changes the hierarchy for the index levels <b>.swaplevel()</b>
* Drop an index level. Drops an index level from the DataFrame entirely <b>.droplevel()</b>

### PRO TIP: In most cases it's best to reset the index and avoid multi-index DataFrames - they're not very intuitive!


In [18]:
agg_prem_league = premier_league_full.groupby(['season', 'HomeTeam']).agg({"HomeGoals": ['sum', 'mean']})
agg_prem_league.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,HomeGoals,HomeGoals
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,mean
season,HomeTeam,Unnamed: 2_level_2,Unnamed: 3_level_2
2008/2009,Arsenal,31,1.631579
2008/2009,Aston Villa,27,1.421053
2008/2009,Blackburn Rovers,22,1.157895
2008/2009,Bolton Wanderers,21,1.105263
2008/2009,Chelsea,33,1.736842


In [24]:
agg_prem_league.droplevel(0, axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,sum,mean
season,HomeTeam,Unnamed: 2_level_1,Unnamed: 3_level_1
2008/2009,Arsenal,31,1.631579
2008/2009,Aston Villa,27,1.421053
2008/2009,Blackburn Rovers,22,1.157895
2008/2009,Bolton Wanderers,21,1.105263
2008/2009,Chelsea,33,1.736842
...,...,...,...
2015/2016,Swansea City,20,1.052632
2015/2016,Tottenham Hotspur,35,1.842105
2015/2016,Watford,20,1.052632
2015/2016,West Bromwich Albion,20,1.052632


In [25]:
agg_prem_league = agg_prem_league.droplevel(0, axis=1)

In [28]:
agg_prem_league.swaplevel().loc["Arsenal"]

Unnamed: 0_level_0,sum,mean
season,Unnamed: 1_level_1,Unnamed: 2_level_1
2008/2009,31,1.631579
2009/2010,48,2.526316
2010/2011,33,1.736842
2011/2012,39,2.052632
2012/2013,47,2.473684
2013/2014,36,1.894737
2014/2015,41,2.157895
2015/2016,31,1.631579


In [29]:
agg_prem_league.reset_index()

Unnamed: 0,season,HomeTeam,sum,mean
0,2008/2009,Arsenal,31,1.631579
1,2008/2009,Aston Villa,27,1.421053
2,2008/2009,Blackburn Rovers,22,1.157895
3,2008/2009,Bolton Wanderers,21,1.105263
4,2008/2009,Chelsea,33,1.736842
...,...,...,...,...
155,2015/2016,Swansea City,20,1.052632
156,2015/2016,Tottenham Hotspur,35,1.842105
157,2015/2016,Watford,20,1.052632
158,2015/2016,West Bromwich Albion,20,1.052632
