# Intro to Pandas

## Index Hierarchy

In [2]:
# Standard Imports
import numpy as np
from pandas import Series,DataFrame
import pandas as pd

from numpy.random import randn

In [57]:
# Pandas allows you to have multiple index levels, which is very clear with this example:

ser = Series(np.random.randn(6), index=[[1,1,1,2,2,2],['a','b','c', 'a','b','c']])

In [58]:
#Show Series with multiple index levels
ser

1  a   -0.579311
   b    1.043526
   c    1.380974
2  a   -0.101427
   b    0.338554
   c   -0.165089
dtype: float64

In [5]:
# We can check the multiple levels
ser.index

MultiIndex(levels=[[1, 2], ['a', 'b', 'c']],
           codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])

Select specific subsets

In [59]:
#Now we can sleect specific subsets
ser[1]

a   -0.579311
b    1.043526
c    1.380974
dtype: float64

In [60]:
ser[2]

a   -0.101427
b    0.338554
c   -0.165089
dtype: float64

In [18]:
# deeper selection
ser[2]['a']

-1.8801751008090657

In [17]:
# same as above
ser[2][0]

-1.8801751008090657

In [7]:
# We can also select from an internal index level
ser[:,'a']

1   -1.000178
2   -1.880175
dtype: float64

### Unstack 

We can also create Data Frames from Series with multiple levels

In [20]:
# Unstuck a series into a DF
dframe = ser.unstack()

#Show
dframe

Unnamed: 0,a,b,c
1,-1.000178,1.390784,-2.260721
2,-1.880175,1.590288,-1.194955


Reverse a DF into a series

In [21]:
#Can also reverse
dframe.unstack()

a  1   -1.000178
   2   -1.880175
b  1    1.390784
   2    1.590288
c  1   -2.260721
   2   -1.194955
dtype: float64

### Multi Index DF

In [52]:
# We can also apply multiple level indexing to DataFrames

dframe2 = DataFrame(np.arange(24).reshape(4,6),
                    index=[['a','a','b','b'],[1,2,1,2]],
                    columns=[['London','London','Rome','Rome','Paris','Paris'],
                             ['cold','hot','cold','hot','hot','cold']])
                                                   
dframe2                                                

Unnamed: 0_level_0,Unnamed: 1_level_0,London,London,Rome,Rome,Paris,Paris
Unnamed: 0_level_1,Unnamed: 1_level_1,cold,hot,cold,hot,hot,cold
a,1,0,1,2,3,4,5
a,2,6,7,8,9,10,11
b,1,12,13,14,15,16,17
b,2,18,19,20,21,22,23


Give these index levels names

In [53]:
#Name the index levels
dframe2.index.names = ['Index_1','Index_2']

#Name the column levels
dframe2.columns.names = ['Cities','Temp']

dframe2

Unnamed: 0_level_0,Cities,London,London,Rome,Rome,Paris,Paris
Unnamed: 0_level_1,Temp,cold,hot,cold,hot,hot,cold
Index_1,Index_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
a,1,0,1,2,3,4,5
a,2,6,7,8,9,10,11
b,1,12,13,14,15,16,17
b,2,18,19,20,21,22,23


In [54]:
# We can also interchange level orders (axis=1 for columns)
dframe2.swaplevel('Cities','Temp', axis=1)

Unnamed: 0_level_0,Temp,cold,hot,cold,hot,hot,cold
Unnamed: 0_level_1,Cities,London,London,Rome,Rome,Paris,Paris
Index_1,Index_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
a,1,0,1,2,3,4,5
a,2,6,7,8,9,10,11
b,1,12,13,14,15,16,17
b,2,18,19,20,21,22,23


#### Sort Levels

Ascending

In [55]:
#We can also sort levels
dframe2.sort_values(['Index_2'])

Unnamed: 0_level_0,Cities,London,London,Rome,Rome,Paris,Paris
Unnamed: 0_level_1,Temp,cold,hot,cold,hot,hot,cold
Index_1,Index_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
a,1,0,1,2,3,4,5
b,1,12,13,14,15,16,17
a,2,6,7,8,9,10,11
b,2,18,19,20,21,22,23


Descending

In [56]:
dframe2.sort_values(['Index_2'], ascending=False)

Unnamed: 0_level_0,Cities,London,London,Rome,Rome,Paris,Paris
Unnamed: 0_level_1,Temp,cold,hot,cold,hot,hot,cold
Index_1,Index_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
a,2,6,7,8,9,10,11
b,2,18,19,20,21,22,23
a,1,0,1,2,3,4,5
b,1,12,13,14,15,16,17


**Note:** The change in sorting, now the Dframe index is sorted by the INDEX_2

In [45]:
dframe2

Unnamed: 0_level_0,Cities,London,London,Rome,Rome,Paris,Paris
Unnamed: 0_level_1,Temp,cold,hot,cold,hot,hot,cold
Index_1,Index_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
a,1,0,1,2,3,4,5
a,2,6,7,8,9,10,11
b,1,12,13,14,15,16,17
b,2,18,19,20,21,22,23


In [44]:
#We can also perform operations on particular levels
dframe2.sum(level='Temp',axis=1)

Unnamed: 0_level_0,Temp,cold,hot
Index_1,Index_2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,7,8
a,2,25,26
b,1,43,44
b,2,61,62


End of this section! 