## 1.hierarchical indexing

In [1]:
import numpy as np 
import pandas as pd

In [3]:
ps1 = pd.Series(np.random.randn(6) , index=[['2000','2000','2000','2005','2005','2010'] , 
                                            ['one','two','three','one','two','one']])
ps1

2000  one      0.670359
      two      0.178112
      three   -1.018234
2005  one      0.720011
      two      0.381903
2010  one     -1.626837
dtype: float64

In [5]:
ps1.index

MultiIndex([('2000',   'one'),
            ('2000',   'two'),
            ('2000', 'three'),
            ('2005',   'one'),
            ('2005',   'two'),
            ('2010',   'one')],
           )

In [7]:
ps1['2000']

one      0.670359
two      0.178112
three   -1.018234
dtype: float64

In [8]:
ps1['2000': '2005']

2000  one      0.670359
      two      0.178112
      three   -1.018234
2005  one      0.720011
      two      0.381903
dtype: float64

In [9]:
ps1[:,'one']

2000    0.670359
2005    0.720011
2010   -1.626837
dtype: float64

In [11]:
# the methode unstack convert a multi-index serie into dataframe
df1 = ps1.unstack()
df1

Unnamed: 0,one,three,two
2000,0.670359,-1.018234,0.178112
2005,0.720011,,0.381903
2010,-1.626837,,


In [12]:
# the methode stack convert a dataframe into a multi-index serie 
df1.stack()

2000  one      0.670359
      three   -1.018234
      two      0.178112
2005  one      0.720011
      two      0.381903
2010  one     -1.626837
dtype: float64

In [14]:
df1

Unnamed: 0,one,three,two
2000,0.670359,-1.018234,0.178112
2005,0.720011,,0.381903
2010,-1.626837,,


In [18]:
df1.fillna(0)

Unnamed: 0,one,three,two
2000,0.670359,-1.018234,0.178112
2005,0.720011,0.0,0.381903
2010,-1.626837,0.0,0.0


In [None]:
#set_index()

In [20]:
# we can do the same for columns 


## 2.reordering and sorting index levels

In [21]:
df2 = pd.DataFrame(np.random.randint(50,size=(4,3)), 
                   index=[['black','black','red','red'], ['one','two','one','two']])
df2

Unnamed: 0,Unnamed: 1,0,1,2
black,one,17,25,1
black,two,26,49,47
red,one,12,20,33
red,two,19,29,20


In [23]:
df2v2 = df2.swaplevel(0,1)
df2v2

Unnamed: 0,Unnamed: 1,0,1,2
one,black,17,25,1
two,black,26,49,47
one,red,12,20,33
two,red,19,29,20


In [26]:
df2v2.sort_index(level = 0)

Unnamed: 0,Unnamed: 1,0,1,2
one,black,17,25,1
one,red,12,20,33
two,black,26,49,47
two,red,19,29,20


In [34]:
# giving a name to the indexex 
df2.index.names = ['color','number']

## 3.summary statistics by level

In [30]:
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2
color,number,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
black,one,17,25,1
black,two,26,49,47
red,one,12,20,33
red,two,19,29,20


In [33]:
df2.groupby('color').mean()

Unnamed: 0_level_0,0,1,2
color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
black,21.5,37.0,24.0
red,15.5,24.5,26.5


## 4.indexing with columns in daraframe

In [36]:
df4 = pd.read_csv('population_by_country_2020.csv')
df4.head()

Unnamed: 0,Country (or dependency),Population (2020),Yearly Change,Net Change,Density (P/Km²),Land Area (Km²),Migrants (net),Fert. Rate,Med. Age,Urban Pop %,World Share
0,China,1438207241,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
1,India,1377233523,0.99 %,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
2,United States,330610570,0.59 %,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %
3,Indonesia,272931713,1.07 %,2898047,151,1811570,-98955.0,2.3,30,56 %,3.51 %
4,Pakistan,219992900,2.00 %,4327022,287,770880,-233379.0,3.6,23,35 %,2.83 %


In [37]:
df4.columns

Index(['Country (or dependency)', 'Population (2020)', 'Yearly Change',
       'Net Change', 'Density (P/Km²)', 'Land Area (Km²)', 'Migrants (net)',
       'Fert. Rate', 'Med. Age', 'Urban Pop %', 'World Share'],
      dtype='object')

In [40]:
df4.set_index('Country (or dependency)', inplace = True)

In [41]:
df4

Unnamed: 0_level_0,Population (2020),Yearly Change,Net Change,Density (P/Km²),Land Area (Km²),Migrants (net),Fert. Rate,Med. Age,Urban Pop %,World Share
Country (or dependency),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,1438207241,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
India,1377233523,0.99 %,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
United States,330610570,0.59 %,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %
Indonesia,272931713,1.07 %,2898047,151,1811570,-98955.0,2.3,30,56 %,3.51 %
Pakistan,219992900,2.00 %,4327022,287,770880,-233379.0,3.6,23,35 %,2.83 %
...,...,...,...,...,...,...,...,...,...,...
Montserrat,4991,0.06 %,3,50,100,,N.A.,N.A.,10 %,0.00 %
Falkland Islands,3458,3.05 %,103,0,12170,,N.A.,N.A.,66 %,0.00 %
Niue,1624,0.68 %,11,6,260,,N.A.,N.A.,46 %,0.00 %
Tokelau,1354,1.27 %,17,136,10,,N.A.,N.A.,0 %,0.00 %


In [44]:
df4.reset_index( inplace = True)

In [45]:
df4

Unnamed: 0,Country (or dependency),Population (2020),Yearly Change,Net Change,Density (P/Km²),Land Area (Km²),Migrants (net),Fert. Rate,Med. Age,Urban Pop %,World Share
0,China,1438207241,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
1,India,1377233523,0.99 %,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
2,United States,330610570,0.59 %,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %
3,Indonesia,272931713,1.07 %,2898047,151,1811570,-98955.0,2.3,30,56 %,3.51 %
4,Pakistan,219992900,2.00 %,4327022,287,770880,-233379.0,3.6,23,35 %,2.83 %
...,...,...,...,...,...,...,...,...,...,...,...
230,Montserrat,4991,0.06 %,3,50,100,,N.A.,N.A.,10 %,0.00 %
231,Falkland Islands,3458,3.05 %,103,0,12170,,N.A.,N.A.,66 %,0.00 %
232,Niue,1624,0.68 %,11,6,260,,N.A.,N.A.,46 %,0.00 %
233,Tokelau,1354,1.27 %,17,136,10,,N.A.,N.A.,0 %,0.00 %
