In [1]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np

# Series indexing

In [3]:
obj = pd.Series(np.arange(4.), index=['a', 'b', 'c', 'd'])
obj

a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64

In [4]:
# Single entry
obj['b']

1.0

In [7]:
# Single entry with default index
obj[0]

0.0

In [8]:
# A range of index
obj[1:3]

b    1.0
c    2.0
dtype: float64

In [9]:
# Multiple indices
obj[['b', 'a', 'd']]

b    1.0
a    0.0
d    3.0
dtype: float64

In [10]:
# Multiple default indices
obj[[1,3]]

b    1.0
d    3.0
dtype: float64

In [16]:
# Boolean index
print(obj[obj<2])
print(obj<2)

a    0.0
b    1.0
dtype: float64
a     True
b     True
c    False
d    False
dtype: bool


In [17]:
# Slicing with labels behaves differently than normal Python slicing in that the end‐point is inclusive!
obj['b':'c']

b    1.0
c    2.0
dtype: float64

In [19]:
# assign values with slicing
obj['b':'c'] = 5
obj

a    0.0
b    5.0
c    5.0
d    3.0
dtype: float64

# DataFrame indexing

In [20]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)), 
                    index=['Ohio', 'Colorado', 'Utah', 'New York'], 
                    columns=['one', 'two', 'three', 'four'])
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [29]:
# single column
# Passing a single element or a list to the [] operator selects columns.
data['two']

Ohio         1
Colorado     5
Utah         9
New York    13
Name: two, dtype: int32

In [28]:
# single row
print(data.loc['Ohio'])
print(type(data.loc['Ohio']))

one      0
two      1
three    2
four     3
Name: Ohio, dtype: int32
<class 'pandas.core.series.Series'>


In [22]:
# Multiple columns
data[['three', 'one']]

Unnamed: 0,three,one
Ohio,2,0
Colorado,6,4
Utah,10,8
New York,14,12


In [23]:
# Row indexing
data[:2]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7


In [24]:
# Boolean indexing
data[data['three']>5]

Unnamed: 0,one,two,three,four
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [30]:
data<5

Unnamed: 0,one,two,three,four
Ohio,True,True,True,True
Colorado,True,False,False,False
Utah,False,False,False,False
New York,False,False,False,False


In [31]:
data[data<5]

Unnamed: 0,one,two,three,four
Ohio,0.0,1.0,2.0,3.0
Colorado,4.0,,,
Utah,,,,
New York,,,,


In [33]:
data[data<5]=0
data
# Note that data[data<5] returns a dataframe of the original shape, 
# but data[data<5]=0 only assigns values to certain entries specified by the condition.

Unnamed: 0,one,two,three,four
Ohio,0,0,0,0
Colorado,0,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


# loc, iloc

In [34]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)), 
                    index=['Ohio', 'Colorado', 'Utah', 'New York'], 
                    columns=['one', 'two', 'three', 'four'])
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [36]:
# 'loc': used for labels
# Note that it is not a function, and we need to use [] instead of ()
# we can specify row and column indices at the same time.
data.loc['Colorado', ['two','three']]

two      5
three    6
Name: Colorado, dtype: int32

In [37]:
# 'iloc': used for default integer labels.
data.iloc[2, [1,3]]

two      9
four    11
Name: Utah, dtype: int32

In [39]:
# When a single index is provided, iloc returns a row.
data.iloc[2]

one       8
two       9
three    10
four     11
Name: Utah, dtype: int32

In [40]:
data.iloc[[1, 2], [3, 0, 1]]

Unnamed: 0,four,one,two
Colorado,7,4,5
Utah,11,8,9


In [42]:
# index slicing
# Again, string index slicing is inclusive, so the 'Utah' row is included
data.loc[:'Utah', 'two']

Ohio        1
Colorado    5
Utah        9
Name: two, dtype: int32

In [53]:
# Use iloc consecutively
print(data)
print(data.iloc[:, :3])
print(data.loc[data['four']>10])
print(data.iloc[:, :3][data['four']>10]) # Note that this is intersection

          one  two  three  four
Ohio        0    1      2     3
Colorado    4    5      6     7
Utah        8    9     10    11
New York   12   13     14    15
          one  two  three
Ohio        0    1      2
Colorado    4    5      6
Utah        8    9     10
New York   12   13     14
          one  two  three  four
Utah        8    9     10    11
New York   12   13     14    15
          one  two  three
Utah        8    9     10
New York   12   13     14
