# 06_03: Pandas DataFrame Indexing

In [1]:
import math
import collections

import numpy as np
import pandas as pd
import matplotlib.pyplot as pp

%matplotlib inline

In [2]:
pd.options.display.max_rows = 16

In [3]:
nobels = pd.read_csv('nobels.csv', names=['year','discipline','nobelist'])

In [4]:
nobels.index

RangeIndex(start=0, stop=950, step=1)

In [5]:
nobels_by_year = nobels.set_index('year')

In [6]:
nobels_by_year

Unnamed: 0_level_0,discipline,nobelist
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1901,Chemistry,Jacobus Henricus van 't Hoff
1901,Literature,Sully Prudhomme
1901,Medicine,Emil Adolf von Behring
1901,Peace,Frédéric Passy
1901,Peace,Henry Dunant
1901,Physics,Wilhelm Röntgen
1902,Chemistry,Hermann Emil Fischer
1902,Literature,Theodor Mommsen
...,...,...
2019,Literature,Peter Handke


In [7]:
nobels_by_year.index

Int64Index([1901, 1901, 1901, 1901, 1901, 1901, 1902, 1902, 1902, 1902,
            ...
            2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019],
           dtype='int64', name='year', length=950)

In [8]:
nobels_by_year.loc[1901]

Unnamed: 0_level_0,discipline,nobelist
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1901,Chemistry,Jacobus Henricus van 't Hoff
1901,Literature,Sully Prudhomme
1901,Medicine,Emil Adolf von Behring
1901,Peace,Frédéric Passy
1901,Peace,Henry Dunant
1901,Physics,Wilhelm Röntgen


In [9]:
nobels_by_year.loc[1901, 'nobelist']

year
1901    Jacobus Henricus van 't Hoff
1901                 Sully Prudhomme
1901          Emil Adolf von Behring
1901                  Frédéric Passy
1901                    Henry Dunant
1901                 Wilhelm Röntgen
Name: nobelist, dtype: object

In [None]:
nobels_by_year.loc[1914:1918]

In [None]:
nobels_by_discipline = nobels.set_index('discipline').sort_index()

In [None]:
nobels_by_discipline.head()

In [None]:
nobels_by_discipline.loc['Physics']

In [None]:
nobels_by_discipline.loc['Medicine':'Peace']

In [10]:
nobels_by_year.iloc[0:10]

Unnamed: 0_level_0,discipline,nobelist
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1901,Chemistry,Jacobus Henricus van 't Hoff
1901,Literature,Sully Prudhomme
1901,Medicine,Emil Adolf von Behring
1901,Peace,Frédéric Passy
1901,Peace,Henry Dunant
1901,Physics,Wilhelm Röntgen
1902,Chemistry,Hermann Emil Fischer
1902,Literature,Theodor Mommsen
1902,Medicine,Ronald Ross
1902,Peace,Charles Albert Gobat


In [11]:
nobels_multi = nobels.set_index(['year','discipline'])

In [12]:
nobels_multi

Unnamed: 0_level_0,Unnamed: 1_level_0,nobelist
year,discipline,Unnamed: 2_level_1
1901,Chemistry,Jacobus Henricus van 't Hoff
1901,Literature,Sully Prudhomme
1901,Medicine,Emil Adolf von Behring
1901,Peace,Frédéric Passy
1901,Peace,Henry Dunant
1901,Physics,Wilhelm Röntgen
1902,Chemistry,Hermann Emil Fischer
1902,Literature,Theodor Mommsen
...,...,...
2019,Literature,Peter Handke


In [None]:
nobels_multi.index

In [None]:
nobels_multi.index.get_level_values(0)

In [None]:
nobels_multi.index.get_level_values(1)

In [None]:
nobels_multi.loc[(2017, 'Physics')]

In [None]:
nobels_multi.loc[(1901:1910, 'Chemistry')]

In [None]:
nobels_multi.loc[(slice(1901,1910), 'Chemistry')]

In [None]:
# to avoid multi-indexing ambiguity, we specify a range of columns (here ":" for all of them)
nobels_multi.loc[(slice(1901,1910), 'Chemistry'), :]

In [None]:
# slice(None) is longhand for the end-to-end slice ":"
nobels_multi.loc[(slice(None), ['Chemistry','Physics']), :]

In [None]:
# make three Boolean masks based on year and discipline values;
# combine them element-by-element with logical AND; use result as fancy index

nobels[(nobels.year >= 1901) & (nobels.year <= 1910) & (nobels.discipline == 'Chemistry')]

In [None]:
nobels.query('year >= 1901 and year <= 1910 and discipline == "Chemistry"')