# Pandas: Intro by DataCamp - 2

In [1]:
import pandas as pd
import numpy as np

In [2]:
gapminder = pd.read_csv('data/gapminder_tidy.csv')

## Setting & removing indexes

```python
DataFrame.set_index(self, keys, drop=True, append=False, inplace=False, verify_integrity=False)
```
Set the DataFrame index using existing columns.

Set the DataFrame index (row labels) using one or more existing columns or arrays (of the correct length). The index can replace the existing index or expand on it.

In [3]:
gapminder_ind = gapminder.set_index('Country')
gapminder_ind.head(5)

Unnamed: 0_level_0,Year,fertility,life,population,child_mortality,gdp,region
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Afghanistan,1964,7.671,33.639,10474903.0,339.7,1182.0,South Asia
Afghanistan,1965,7.671,34.152,10697983.0,334.1,1182.0,South Asia
Afghanistan,1966,7.671,34.662,10927724.0,328.7,1168.0,South Asia
Afghanistan,1967,7.671,35.17,11163656.0,323.3,1173.0,South Asia
Afghanistan,1968,7.671,35.674,11411022.0,318.1,1187.0,South Asia


```python
DataFrame.reset_index(self, level: Union[Hashable, Sequence[Hashable], NoneType] = None, drop: bool = False, inplace: bool = False, col_level: Hashable = 0, col_fill: Union[Hashable, NoneType] = '') → Union[ForwardRef('DataFrame'), NoneType]
```

Reset the index, or a level of it.

Reset the index of the DataFrame, and use the default one instead. If the DataFrame has a MultiIndex, this method can remove one or more levels.

In [4]:
gapminder_ind.reset_index(drop = True).head(5)

Unnamed: 0,Year,fertility,life,population,child_mortality,gdp,region
0,1964,7.671,33.639,10474903.0,339.7,1182.0,South Asia
1,1965,7.671,34.152,10697983.0,334.1,1182.0,South Asia
2,1966,7.671,34.662,10927724.0,328.7,1168.0,South Asia
3,1967,7.671,35.17,11163656.0,323.3,1173.0,South Asia
4,1968,7.671,35.674,11411022.0,318.1,1187.0,South Asia


## Subsetting with .loc[]

Access a group of rows and columns by label(s) or a boolean array.

In [5]:
countries = ['Afghanistan']

In [6]:
gapminder_ind.loc[countries].head(5)

Unnamed: 0_level_0,Year,fertility,life,population,child_mortality,gdp,region
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Afghanistan,1964,7.671,33.639,10474903.0,339.7,1182.0,South Asia
Afghanistan,1965,7.671,34.152,10697983.0,334.1,1182.0,South Asia
Afghanistan,1966,7.671,34.662,10927724.0,328.7,1168.0,South Asia
Afghanistan,1967,7.671,35.17,11163656.0,323.3,1173.0,South Asia
Afghanistan,1968,7.671,35.674,11411022.0,318.1,1187.0,South Asia


## Setting multi-level indexes

In [7]:
gapminder_ind2 = gapminder.set_index(['Country','Year'])

In [8]:
gapminder_ind2.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,fertility,life,population,child_mortality,gdp,region
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Afghanistan,1964,7.671,33.639,10474903.0,339.7,1182.0,South Asia
Afghanistan,1965,7.671,34.152,10697983.0,334.1,1182.0,South Asia
Afghanistan,1966,7.671,34.662,10927724.0,328.7,1168.0,South Asia
Afghanistan,1967,7.671,35.17,11163656.0,323.3,1173.0,South Asia
Afghanistan,1968,7.671,35.674,11411022.0,318.1,1187.0,South Asia


In [9]:
rows_to_keep = [('Afghanistan', 1964),('Afghanistan', 1966)]

In [10]:
gapminder_ind2.loc[rows_to_keep].head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,fertility,life,population,child_mortality,gdp,region
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Afghanistan,1964,7.671,33.639,10474903.0,339.7,1182.0,South Asia
Afghanistan,1966,7.671,34.662,10927724.0,328.7,1168.0,South Asia


```python
DataFrame.sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, ignore_index: bool = False)
```

In [11]:
gapminder_ind2.sort_index().head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,fertility,life,population,child_mortality,gdp,region
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Afghanistan,1964,7.671,33.639,10474903.0,339.7,1182.0,South Asia
Afghanistan,1965,7.671,34.152,10697983.0,334.1,1182.0,South Asia
Afghanistan,1966,7.671,34.662,10927724.0,328.7,1168.0,South Asia
Afghanistan,1967,7.671,35.17,11163656.0,323.3,1173.0,South Asia
Afghanistan,1968,7.671,35.674,11411022.0,318.1,1187.0,South Asia


In [12]:
gapminder_ind2.sort_index(level = ['Year','Country'], ascending = [False, True]).head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,fertility,life,population,child_mortality,gdp,region
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Afghanistan,2013,4.9,60.947,34499915.0,96.7,1884.0,South Asia
Albania,2013,1.771,77.392,3238316.0,14.9,9961.0,Europe & Central Asia
Algeria,2013,2.795,71.0,36983924.0,25.2,12893.0,Middle East & North Africa
Angola,2013,5.863,51.899,20714494.0,167.1,7488.0,Sub-Saharan Africa
Antigua and Barbuda,2013,2.089,75.954,91404.0,8.7,20353.0,America


## Slicing and subsetting with .loc and .iloc


In [13]:
gapminder_ind2.loc[('Afghanistan',1964):('Afghanistan',1968),'fertility':'gdp']

Unnamed: 0_level_0,Unnamed: 1_level_0,fertility,life,population,child_mortality,gdp
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Afghanistan,1964,7.671,33.639,10474903.0,339.7,1182.0
Afghanistan,1965,7.671,34.152,10697983.0,334.1,1182.0
Afghanistan,1966,7.671,34.662,10927724.0,328.7,1168.0
Afghanistan,1967,7.671,35.17,11163656.0,323.3,1173.0
Afghanistan,1968,7.671,35.674,11411022.0,318.1,1187.0


In [14]:
gapminder.iloc[1:3,0:2]

Unnamed: 0,Country,Year
1,Afghanistan,1965
2,Afghanistan,1966
