# Introduction

This notebook coves the idea of multiindexing and the advantages of using such an indexing method. 

In [15]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame({
    "restaurant" : ["Diner", "Diner", "Pandas", "Pandas"],
    "location" :[(4,2), (4,2)]
    
})

(California, 2000)    33871648
(California, 2010)    37253956
(New York, 2000)      18976457
(New York, 2010)      19378102
(Texas, 2000)         20851820
(Texas, 2010)         25145561
dtype: int64

In [19]:
index = [('California', 2000), ('California', 2010),
         ('New York', 2000), ('New York', 2010),
         ('Texas', 2000), ('Texas', 2010)]
populations = [33871648, 37253956,
               18976457, 19378102,
               20851820, 25145561]
pop = pd.Series(populations, index=index)
pop


(California, 2000)    33871648
(California, 2010)    37253956
(New York, 2000)      18976457
(New York, 2010)      19378102
(Texas, 2000)         20851820
(Texas, 2010)         25145561
dtype: int64

In [8]:
index = pd.MultiIndex.from_tuples(index)
print(index)
pop = pop.reindex(index)
pop

MultiIndex([('California', 2000),
            ('California', 2010),
            (  'New York', 2000),
            (  'New York', 2010),
            (     'Texas', 2000),
            (     'Texas', 2010)],
           )


California  2000    33871648
            2010    37253956
New York    2000    18976457
            2010    19378102
Texas       2000    20851820
            2010    25145561
dtype: int64

In [11]:
pop.loc['Texas']

2000    20851820
2010    25145561
dtype: int64

## `stack())` and `unstack()`

In [12]:
pop_df = pop.unstack()
pop_df

Unnamed: 0,2000,2010
California,33871648,37253956
New York,18976457,19378102
Texas,20851820,25145561


In [13]:
pop_df.stack()

California  2000    33871648
            2010    37253956
New York    2000    18976457
            2010    19378102
Texas       2000    20851820
            2010    25145561
dtype: int64

## Creating Multiindex

In [16]:
df = pd.DataFrame(np.random.rand(4, 2),
                  index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
                  columns=['data1', 'data2'])
df

Unnamed: 0,Unnamed: 1,data1,data2
a,1,0.01676,0.574883
a,2,0.160097,0.381914
b,1,0.04091,0.256056
b,2,0.330309,0.529177


In [22]:
data = {('California', 2000): 33871648,
        ('California', 2010): 37253956,
        ('Texas', 2000): 20851820,
        ('Texas', 2010): 25145561,
        ('New York', 2000): 18976457,
        ('New York', 2010): 19378102}
data


{('California', 2000): 33871648,
 ('California', 2010): 37253956,
 ('Texas', 2000): 20851820,
 ('Texas', 2010): 25145561,
 ('New York', 2000): 18976457,
 ('New York', 2010): 19378102}

In [23]:
data = pd.Series(data)
data

California  2000    33871648
            2010    37253956
Texas       2000    20851820
            2010    25145561
New York    2000    18976457
            2010    19378102
dtype: int64

In [36]:
data.iloc[[0,1]]

California  2000    33871648
            2010    37253956
dtype: int64

In [44]:
data.loc[[('California'),('Texas')]]

California  2000    33871648
            2010    37253956
Texas       2000    20851820
            2010    25145561
dtype: int64

In [51]:
data.loc[[('California'),('Texas')]].iloc[:3]

California  2000    33871648
            2010    37253956
Texas       2000    20851820
dtype: int64

In [52]:
data.mean()

25912924.0