# MultIndex

In [12]:
import pandas as pd

In [13]:
bigmac = pd.read_csv("bigmac.csv", parse_dates=["Date"])
bigmac.head(3)

Unnamed: 0,Date,Country,Price in US Dollars
0,2016-01-01,Argentina,2.39
1,2016-01-01,Australia,3.74
2,2016-01-01,Brazil,3.35


In [14]:
bigmac.dtypes

Date                   datetime64[ns]
Country                        object
Price in US Dollars           float64
dtype: object

In [15]:
bigmac.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 652 entries, 0 to 651
Data columns (total 3 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Date                 652 non-null    datetime64[ns]
 1   Country              652 non-null    object        
 2   Price in US Dollars  652 non-null    float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 15.4+ KB


## Create a MultIndex with the sex_index Method

Tip: Take the column with the least number of unique values and put in the most outerlevel

In [16]:
bigmac = pd.read_csv("bigmac.csv", parse_dates=["Date"])
bigmac.head(3)

Unnamed: 0,Date,Country,Price in US Dollars
0,2016-01-01,Argentina,2.39
1,2016-01-01,Australia,3.74
2,2016-01-01,Brazil,3.35


In [17]:
bigmac.nunique()

Date                    12
Country                 58
Price in US Dollars    330
dtype: int64

In [18]:
bigmac.set_index(["Date", "Country"])

bigmac.set_index(["Country", "Date"])

bigmac.set_index(["Date", "Country"])


Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74
2016-01-01,Brazil,3.35
2016-01-01,Britain,4.22
2016-01-01,Canada,4.14
...,...,...
2010-01-01,Turkey,3.83
2010-01-01,UAE,2.99
2010-01-01,Ukraine,1.83
2010-01-01,United States,3.58


## The sort_index Method on a MultIndex DataFrame

In [19]:
bigmac = pd.read_csv("bigmac.csv", parse_dates=["Date"], index_col= ["Date", "Country"])
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74
2016-01-01,Brazil,3.35


In [20]:
bigmac.sort_index()
bigmac.sort_index(ascending=True)
bigmac.sort_index(ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2016-01-01,Vietnam,2.67
2016-01-01,Venezuela,0.66
2016-01-01,Uruguay,3.74
2016-01-01,United States,4.93
2016-01-01,Ukraine,1.54
...,...,...
2010-01-01,Canada,3.97
2010-01-01,Britain,3.67
2010-01-01,Brazil,4.76
2010-01-01,Australia,3.98


Sort according to level:

In [21]:
bigmac.sort_index(ascending=[True, False])
bigmac.sort_index(ascending=[False, True],inplace=True)

In [22]:
bigmac.sort_index(level = "Date")

bigmac.sort_index(level = "Country")

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-07-01,Argentina,3.56
2011-07-01,Argentina,4.84
2012-01-01,Argentina,4.64
2012-07-01,Argentina,4.16
...,...,...
2014-01-01,Vietnam,2.84
2014-07-01,Vietnam,2.83
2015-01-01,Vietnam,2.81
2015-07-01,Vietnam,2.75


In [23]:
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74
2016-01-01,Austria,3.76


## Extract Rows from a MultIndex DataFrame

In [24]:
bigmac = pd.read_csv("bigmac.csv", parse_dates=["Date"], index_col= ["Date", "Country"])
bigmac.sort_index(inplace = True)
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


In [25]:
bigmac.loc["2010-01-01", "Argentina"] # Argentina is not a column, but a second level index

bigmac.loc["2010-01-01", "Price in US Dollars"] # Return a multindex series

Date        Country       
2010-01-01  Argentina         1.84
            Australia         3.98
            Brazil            4.76
            Britain           3.67
            Canada            3.97
            Chile             3.18
            China             1.83
            Colombia          3.91
            Costa Rica        3.52
            Czech Republic    3.71
            Denmark           5.99
            Egypt             2.38
            Euro area         4.84
            Hong Kong         1.91
            Hungary           3.86
            Indonesia         2.24
            Israel            3.99
            Japan             3.50
            Latvia            3.09
            Lithuania         2.87
            Malaysia          2.08
            Mexico            2.50
            New Zealand       3.61
            Norway            7.02
            Pakistan          2.42
            Peru              2.81
            Philippines       2.21
            Poland          

In [34]:
bigmac.loc[("2010-01-01", "Argentina")]

Price in US Dollars    1.84
Name: (2010-01-01 00:00:00, Argentina), dtype: float64

In [39]:
#bigmac.loc[("ROWS"), "COLUMNS"]
bigmac.loc[("2010-01-01", "Argentina"), "Price in US Dollars"]

Date        Country  
2010-01-01  Argentina    1.84
Name: Price in US Dollars, dtype: float64

In [37]:
bigmac.loc[("2010-01-01", "Argentina"), ("Price in US Dollars", "Price in US Dollars")]

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars,Price in US Dollars
Date,Country,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-01,Argentina,1.84,1.84


In [41]:
bigmac.loc[("2010-01-01")]

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97
2010-01-01,Chile,3.18
2010-01-01,China,1.83
2010-01-01,Colombia,3.91
2010-01-01,Costa Rica,3.52
2010-01-01,Czech Republic,3.71


In [43]:
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


In [45]:
bigmac.iloc[0]

Price in US Dollars    1.84
Name: (2010-01-01 00:00:00, Argentina), dtype: float64

## The transpose Method

It will swap the indexes of dataframe, rows moved to the columns, and columns header are moved to the rows labels

In [47]:
bigmac = pd.read_csv("bigmac.csv", parse_dates=["Date"], index_col= ["Date", "Country"])
bigmac.sort_index(inplace = True)
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


In [51]:
bigmac = bigmac.transpose()
bigmac.head(3)

Date,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,...,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01,2016-01-01
Country,Argentina,Australia,Brazil,Britain,Canada,Chile,China,Colombia,Costa Rica,Czech Republic,...,Switzerland,Taiwan,Thailand,Turkey,UAE,Ukraine,United States,Uruguay,Venezuela,Vietnam
Price in US Dollars,1.84,3.98,4.76,3.67,3.97,3.18,1.83,3.91,3.52,3.71,...,6.44,2.08,3.09,3.41,3.54,1.54,4.93,3.74,0.66,2.67


In [59]:
bigmac.loc["Price in US Dollars"]
bigmac.loc[("Price in US Dollars",), ("2010-01-01",)]
bigmac.loc[("Price in US Dollars",), ("2010-01-01", "Sri Lanka")]
bigmac.loc[("Price in US Dollars",), ("2010-01-01", "Sri Lanka"):("2010-01-01","Ukraine")] # From Sri Lanka column to Ukraine

Date,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01,2010-01-01
Country,Sri Lanka,Sweden,Switzerland,Taiwan,Thailand,Turkey,UAE,Ukraine
Price in US Dollars,1.83,5.51,6.3,2.36,2.11,3.83,2.99,1.83


## The swaplevel Method

It will swap the levels, as the name says

In [61]:
bigmac = pd.read_csv("bigmac.csv", parse_dates=["Date"], index_col= ["Date", "Country"])
bigmac.sort_index(inplace = True)
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


In [68]:
# Same result on these five code variations
# No matter the order, the swaplevel method will change the order where things are
bigmac.swaplevel() 
bigmac.swaplevel("Date", "Country")
bigmac.swaplevel("Country", "Date")

bigmac.swaplevel(0, 1)
bigmac = bigmac.swaplevel(1, 0)

In [69]:
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Country,Date,Unnamed: 2_level_1
Argentina,2010-01-01,1.84
Australia,2010-01-01,3.98
Brazil,2010-01-01,4.76


## The stack Method

In [76]:
world = pd.read_csv("worldstats.csv", index_col=["country", "year"])
world.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Arab World,2015,392022276.0,2530102000000.0
Arab World,2014,384222592.0,2873600000000.0
Arab World,2013,376504253.0,2846994000000.0


In [79]:
world.stack() # Panda Series with three level multindex and one column of data
world.stack().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Arab World,2015,Population,3.920223e+08
Arab World,2015,GDP,2.530102e+12
Arab World,2014,Population,3.842226e+08
Arab World,2014,GDP,2.873600e+12
Arab World,2013,Population,3.765043e+08
...,...,...,...
Zimbabwe,1962,GDP,1.117602e+09
Zimbabwe,1961,Population,3.876638e+06
Zimbabwe,1961,GDP,1.096647e+09
Zimbabwe,1960,Population,3.752390e+06


## The unstack Method

Does the reverse of the stack method (as the name says)

In [82]:
world = pd.read_csv("worldstats.csv", index_col=["country", "year"])
world.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Arab World,2015,392022276.0,2530102000000.0
Arab World,2014,384222592.0,2873600000000.0
Arab World,2013,376504253.0,2846994000000.0


### Part 1

In [80]:
s = world.stack() # remainder
s.head(3)


country     year            
Arab World  2015  Population    3.920223e+08
                  GDP           2.530102e+12
            2014  Population    3.842226e+08
dtype: float64

In [84]:
s.unstack().unstack() # Multindex column
s.unstack().unstack().unstack() # Multindex column

            year  country           
Population  1960  Afghanistan           8.994793e+06
                  Albania                        NaN
                  Algeria               1.112489e+07
                  Andorra                        NaN
                  Angola                         NaN
                                            ...     
GDP         2015  West Bank and Gaza    1.267740e+10
                  World                 7.343364e+13
                  Yemen, Rep.                    NaN
                  Zambia                2.120156e+10
                  Zimbabwe              1.389294e+10
Length: 28224, dtype: float64

### Part 2

In [82]:
world = pd.read_csv("worldstats.csv", index_col=["country", "year"])
world.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Arab World,2015,392022276.0,2530102000000.0
Arab World,2014,384222592.0,2873600000000.0
Arab World,2013,376504253.0,2846994000000.0


### Part 3

In [82]:
world = pd.read_csv("worldstats.csv", index_col=["country", "year"])
world.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Population,GDP
country,year,Unnamed: 2_level_1,Unnamed: 3_level_1
Arab World,2015,392022276.0,2530102000000.0
Arab World,2014,384222592.0,2873600000000.0
Arab World,2013,376504253.0,2846994000000.0
