# **Chapter 8**
# **Data Wrangling: Join, Combine, and Reshape**

## **8.1 Hierarchical Indexing**

In [9]:
import pandas as pd
import numpy as np
data = pd.Series(np.random.randn(9),
             index=[['a', 'a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'],
                    [1, 2, 3, 1, 3, 1, 2, 2, 3]])

In [10]:
data

a  1   -0.156989
   2    1.353544
   3    1.057333
b  1   -0.007627
   3   -1.053630
c  1    1.227407
   2    0.153180
d  2    0.152872
   3   -1.138854
dtype: float64

In [11]:
data.index

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 1),
            ('b', 3),
            ('c', 1),
            ('c', 2),
            ('d', 2),
            ('d', 3)],
           )

In [12]:
data["b"]

1   -0.007627
3   -1.053630
dtype: float64

In [13]:
data["b" : "c"]

b  1   -0.007627
   3   -1.053630
c  1    1.227407
   2    0.153180
dtype: float64

In [14]:
data.loc[["b","d"]]

b  1   -0.007627
   3   -1.053630
d  2    0.152872
   3   -1.138854
dtype: float64

In [15]:
data.loc[:,2]

a    1.353544
c    0.153180
d    0.152872
dtype: float64

In [16]:
data.unstack()

Unnamed: 0,1,2,3
a,-0.156989,1.353544,1.057333
b,-0.007627,,-1.05363
c,1.227407,0.15318,
d,,0.152872,-1.138854


In [17]:
data.unstack().stack()

a  1   -0.156989
   2    1.353544
   3    1.057333
b  1   -0.007627
   3   -1.053630
c  1    1.227407
   2    0.153180
d  2    0.152872
   3   -1.138854
dtype: float64

In [22]:
frame=pd.DataFrame(np.arange(12).reshape((4,3)),
                  index=[['a','a','b','b'],[1,2,1,2]],
                  columns=[['Ohio','Ohio','Colorado'],
                          ['Green','Red','Green']])

In [23]:
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [24]:
frame.index.names=['key1','key2']

In [25]:
frame.columns.names=['state','color']

In [26]:
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [27]:
frame["Ohio"]

Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
b,1,6,7
b,2,9,10


In [30]:
from pandas import MultiIndex
MultiIndex.from_arrays([['Ohio', 'Ohio', 'Colorado'], ['Green', 'Red', 'Green']],names=['state', 'color'])

MultiIndex([(    'Ohio', 'Green'),
            (    'Ohio',   'Red'),
            ('Colorado', 'Green')],
           names=['state', 'color'])

### **Reordering and Sorting Levels**