In [1]:
import pandas as pd

df = pd.read_csv('large_countries_2015.csv')

In [11]:
df.rename(columns={'Unnamed: 0': 'countries'}, inplace=True)
df.head()

Unnamed: 0,countries,population,fertility,continent
0,Bangladesh,160995600.0,2.12,Asia
1,Brazil,207847500.0,1.78,South America
2,China,1376049000.0,1.57,Asia
3,India,1311051000.0,2.43,Asia
4,Indonesia,257563800.0,2.28,Asia


### 1) Set index

In [12]:
df.set_index('countries', inplace=True) #cant run twice because the column no longer exists

In [13]:
df.head()

Unnamed: 0_level_0,population,fertility,continent
countries,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bangladesh,160995600.0,2.12,Asia
Brazil,207847500.0,1.78,South America
China,1376049000.0,1.57,Asia
India,1311051000.0,2.43,Asia
Indonesia,257563800.0,2.28,Asia


### 2) reset index

In [14]:
df.reset_index(inplace=True)
df.head()

Unnamed: 0,countries,population,fertility,continent
0,Bangladesh,160995600.0,2.12,Asia
1,Brazil,207847500.0,1.78,South America
2,China,1376049000.0,1.57,Asia
3,India,1311051000.0,2.43,Asia
4,Indonesia,257563800.0,2.28,Asia


### 3) groupby() to set index

Unnamed: 0,countries,population,fertility,continent
0,Bangladesh,160995600.0,2.12,Asia
1,Brazil,207847500.0,1.78,South America
2,China,1376049000.0,1.57,Asia
3,India,1311051000.0,2.43,Asia
4,Indonesia,257563800.0,2.28,Asia
5,Japan,126573500.0,1.45,Asia
6,Mexico,127017200.0,2.13,North America
7,Nigeria,182202000.0,5.89,Africa
8,Pakistan,188924900.0,3.04,Asia
9,Philippines,100699400.0,2.98,Asia


In [15]:
df.groupby('continent').mean()

Unnamed: 0_level_0,population,fertility
continent,Unnamed: 1_level_1,Unnamed: 2_level_1
Africa,182202000.0,5.89
Asia,503122400.0,2.267143
Europe,143456900.0,1.61
North America,224395400.0,2.05
South America,207847500.0,1.78


### 4) Hierarchical Indexing

In [23]:
#cannot sort by index. must reset and move back in
sort = df.sort_values(['continent','countries'])
h = sort.set_index(['continent','countries'])

In [24]:
#cannot just search "all" then "China"
#Must specify both in indexing order
h.loc['Asia']

Unnamed: 0_level_0,population,fertility
countries,Unnamed: 1_level_1,Unnamed: 2_level_1
Bangladesh,160995600.0,2.12
China,1376049000.0,1.57
India,1311051000.0,2.43
Indonesia,257563800.0,2.28
Japan,126573500.0,1.45
Pakistan,188924900.0,3.04
Philippines,100699400.0,2.98


In [25]:
h.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,population,fertility
continent,countries,Unnamed: 2_level_1,Unnamed: 3_level_1
Africa,Nigeria,182202000.0,5.89
Asia,Bangladesh,160995600.0,2.12
Asia,China,1376049000.0,1.57
Asia,India,1311051000.0,2.43
Asia,Indonesia,257563800.0,2.28


In [30]:
#you can also use groupby as well
grouped = df.groupby(['continent','countries']).first()
grouped.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,population,fertility
continent,countries,Unnamed: 2_level_1,Unnamed: 3_level_1
Africa,Nigeria,182202000.0,5.89
Asia,Bangladesh,160995600.0,2.12
Asia,China,1376049000.0,1.57
Asia,India,1311051000.0,2.43
Asia,Indonesia,257563800.0,2.28


In [28]:
df.head()

Unnamed: 0,countries,population,fertility,continent
0,Bangladesh,160995600.0,2.12,Asia
1,Brazil,207847500.0,1.78,South America
2,China,1376049000.0,1.57,Asia
3,India,1311051000.0,2.43,Asia
4,Indonesia,257563800.0,2.28,Asia


# Stacking and Unstacking

In [31]:
df = pd.read_csv('large_countries_2015.csv')
df.set_index('countries', inplace=True)
df.drop('continent', axis=1, inplace=True) #deletes a column (default is rows)

In [35]:
long = df.stack() #creates a long form on the data
long.head()

countries             
Bangladesh  population    1.609956e+08
            fertility     2.120000e+00
Brazil      population    2.078475e+08
            fertility     1.780000e+00
China       population    1.376049e+09
dtype: float64

In [36]:
long.unstack()

Unnamed: 0_level_0,population,fertility
countries,Unnamed: 1_level_1,Unnamed: 2_level_1
Bangladesh,160995600.0,2.12
Brazil,207847500.0,1.78
China,1376049000.0,1.57
India,1311051000.0,2.43
Indonesia,257563800.0,2.28
Japan,126573500.0,1.45
Mexico,127017200.0,2.13
Nigeria,182202000.0,5.89
Pakistan,188924900.0,3.04
Philippines,100699400.0,2.98
