# Four Ways to Select Data in a pandas.DataFrame

In [1]:
import pandas
df = pandas.DataFrame()
df['Last'] = ['Eenie', 'Meenie', 'Miney', 'Mo', 'Catcha']
df['Given'] = ['Al','Bo','Cat','D','El']
df['Midterm'] = [90,80,90,95,100]
df['Final'] = df.Midterm + 1
df['Grade'] = df.Midterm + 2
df.index = ['a','b','c','dee','e']
df

Unnamed: 0,Last,Given,Midterm,Final,Grade
a,Eenie,Al,90,91,92
b,Meenie,Bo,80,81,82
c,Miney,Cat,90,91,92
dee,Mo,D,95,96,97
e,Catcha,El,100,101,102


# 1. Select column(s)

In [2]:
df['Midterm']

a       90
b       80
c       90
dee     95
e      100
Name: Midterm, dtype: int64

In [3]:
df.Midterm

a       90
b       80
c       90
dee     95
e      100
Name: Midterm, dtype: int64

In [4]:
df[['Midterm','Last']]

Unnamed: 0,Midterm,Last
a,90,Eenie
b,80,Meenie
c,90,Miney
dee,95,Mo
e,100,Catcha


# 2. Select row(s)

In [5]:
df.loc['a']

Last       Eenie
Given         Al
Midterm       90
Final         91
Grade         92
Name: a, dtype: object

In [6]:
df.loc[['dee','a','a']]

Unnamed: 0,Last,Given,Midterm,Final,Grade
dee,Mo,D,95,96,97
a,Eenie,Al,90,91,92
a,Eenie,Al,90,91,92


In [7]:
df.loc['a':'c']

Unnamed: 0,Last,Given,Midterm,Final,Grade
a,Eenie,Al,90,91,92
b,Meenie,Bo,80,81,82
c,Miney,Cat,90,91,92


In [8]:
df.iloc[0:2]

Unnamed: 0,Last,Given,Midterm,Final,Grade
a,Eenie,Al,90,91,92
b,Meenie,Bo,80,81,82


# 3. Select column(s) and row(s)

In [9]:
df.iloc[0,0]

'Eenie'

In [10]:
df.loc['a', 'Grade']

92

In [11]:
df.loc[['a'], ['Grade']]

Unnamed: 0,Grade
a,92


In [12]:
df.loc[:, 'Midterm':]

Unnamed: 0,Midterm,Final,Grade
a,90,91,92
b,80,81,82
c,90,91,92
dee,95,96,97
e,100,101,102


In [13]:
df.loc[['e','a'], ['Given','Last']]

Unnamed: 0,Given,Last
e,El,Catcha
a,Al,Eenie


## **Warning.** Avoid "chained indexing":

In [14]:
df['Given']['dee'] # Not recommended

'D'

In [15]:
df.Given['dee'] # Not recommended

'D'

# 4. Select row(s) with a bool series

In [16]:
df[df.Given=='Cat']

Unnamed: 0,Last,Given,Midterm,Final,Grade
c,Miney,Cat,90,91,92
