# .loc()
Pandas provide various methods to have purely label based indexing. When slicing, the start bound is also included. Integers are valid labels, but they refer to the label and not the position.

.loc() has multiple access methods like −

A single scalar label

A list of labels

A slice object

A Boolean array

loc takes two single/list/range operator separated by ','. The first one indicates the row and the second one indicates columns.

In [4]:
#import the pandas library and aliasing as pd
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4), index = ['a','b','c','d','e','f','g','h'], columns = ['A', 'B', 'C', 'D'])
print(df)

#select all rows for a specific column
print(df.loc[:,'A'])

          A         B         C         D
a  1.096278 -1.647415 -0.251057  1.954675
b -0.580774  0.395721  0.393233 -1.973930
c  1.410728 -0.240370 -1.214701 -0.172524
d -0.628234 -1.880941 -1.412224  1.118922
e -0.158342  0.878600 -0.241386  1.936614
f -0.836674 -0.688087  1.004928 -0.356123
g  0.438677  1.502420 -0.664426  1.164653
h  1.953061  0.461443  2.655314  0.224409
a    1.096278
b   -0.580774
c    1.410728
d   -0.628234
e   -0.158342
f   -0.836674
g    0.438677
h    1.953061
Name: A, dtype: float64


In [5]:
# import the pandas library and aliasing as pd
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4),
index = ['a','b','c','d','e','f','g','h'], columns = ['A', 'B', 'C', 'D'])
print(df)

# Select all rows for multiple columns, say list[]
print(df.loc[:,['A','C']])

          A         B         C         D
a  1.753347  0.548918  0.221170 -0.469401
b  0.126962  1.269080  0.193590 -0.528623
c -0.155304  0.078160 -0.825097  1.122959
d  1.971200 -2.889288 -1.551213 -0.717457
e  0.774243 -0.060985  1.511442  0.784624
f  1.380526  0.073035 -0.027343  0.071038
g -0.981993 -0.686952 -0.564366 -1.600899
h  0.555015 -0.332697  1.016569 -0.081587
          A         C
a  1.753347  0.221170
b  0.126962  0.193590
c -0.155304 -0.825097
d  1.971200 -1.551213
e  0.774243  1.511442
f  1.380526 -0.027343
g -0.981993 -0.564366
h  0.555015  1.016569


In [6]:
# import the pandas library and aliasing as pd
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4),
index = ['a','b','c','d','e','f','g','h'], columns = ['A', 'B', 'C', 'D'])
print(df)

# Select few rows for multiple columns, say list[]
print(df.loc[['a','b','f','h'],['A','C']])

          A         B         C         D
a  1.832813  0.180657 -0.284105  0.305496
b -1.445140 -1.382291 -1.590597 -0.317220
c -0.853815 -0.814720 -0.705065  1.973263
d  0.635805  0.206310 -0.462340 -2.090601
e  0.043988  1.746199 -0.321536  0.618607
f -1.535004  0.582930 -0.435163 -0.031373
g  0.814033 -0.458521  0.600361 -2.901787
h  1.285314  0.974239  0.350725  0.102968
          A         C
a  1.832813 -0.284105
b -1.445140 -1.590597
f -1.535004 -0.435163
h  1.285314  0.350725


In [9]:
# import the pandas library and aliasing as pd
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4), index = ['a','b','c','d','e','f','g','h'], columns = ['A', 'B', 'C', 'D'])
print(df)

# Select range of rows for all columns
print(df.loc['a':'d'])

          A         B         C         D
a -0.563802 -0.415113 -0.162519 -0.507117
b  1.587827 -0.593185 -0.362173  1.138198
c -0.469905 -1.217095  1.155412  1.597632
d -1.218572  0.827482  0.410244 -0.496775
e  2.084680  0.610526 -1.146113 -0.726076
f  2.675339  0.296635  0.168502 -1.587805
g -0.473146  0.886598  0.281522  0.715216
h -0.704196 -1.208114  0.872015 -2.034957
          A         B         C         D
a -0.563802 -0.415113 -0.162519 -0.507117
b  1.587827 -0.593185 -0.362173  1.138198
c -0.469905 -1.217095  1.155412  1.597632
d -1.218572  0.827482  0.410244 -0.496775


In [11]:
# import the pandas library and aliasing as pd
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4), index = ['a','b','c','d','e','f','g','h'], columns = ['A', 'B', 'C', 'D'])
print(df)

# for getting values with a boolean array
print(df.loc['a']>0)

          A         B         C         D
a  1.015093 -0.895725  3.193281 -0.578019
b  0.673328 -0.113202  0.992497 -0.196117
c -1.056500  0.868540  1.975858 -0.408475
d  0.752579  1.684633 -0.349043 -1.266912
e -0.412426 -0.930945  0.926463  0.131171
f -0.485452  0.808752 -0.693638 -0.924010
g -0.081315 -0.798329 -0.079063 -0.110479
h  2.036262 -1.544525 -1.034428 -0.636214
A     True
B    False
C     True
D    False
Name: a, dtype: bool


# .iloc()
Pandas provide various methods in order to get purely integer based indexing. Like python and numpy, these are 0-based indexing.

The various access methods are as follows −

An Integer

A list of integers

A range of values

In [12]:
# import the pandas library and aliasing as pd
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4), columns = ['A', 'B', 'C', 'D'])
print(df)

# select all rows for a specific column
print(df.iloc[:4])

          A         B         C         D
0  0.268583 -0.548744  0.407880 -1.623845
1 -0.225715 -0.121146  0.565861 -0.401561
2 -1.365244  1.017681 -0.931980 -1.468910
3 -0.734514  0.064865  0.730856 -0.018607
4 -1.984188 -0.025511 -0.635995  1.278972
5 -0.870295  1.649157  0.459523  0.596338
6  0.396406  0.267798 -1.074098  1.146011
7  1.331481 -1.362131 -0.578477  2.793404
          A         B         C         D
0  0.268583 -0.548744  0.407880 -1.623845
1 -0.225715 -0.121146  0.565861 -0.401561
2 -1.365244  1.017681 -0.931980 -1.468910
3 -0.734514  0.064865  0.730856 -0.018607


In [13]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4), columns = ['A', 'B', 'C', 'D'])
print(df)

# Integer slicing
print(df.iloc[:4])
print(df.iloc[1:5, 2:4])

          A         B         C         D
0  2.034693  0.347546  1.181646  0.946756
1  1.580094  1.467627  0.007093  0.349166
2  0.468708 -1.094926 -0.298761 -0.855611
3 -0.355183 -0.764617 -1.357742  0.643384
4  0.154332  0.544894  1.222597  1.682360
5 -0.585404  0.876634 -0.881914  0.419987
6  0.154115 -0.309241 -0.539036 -0.903217
7  0.316781 -1.202801  0.714396 -2.130788
          A         B         C         D
0  2.034693  0.347546  1.181646  0.946756
1  1.580094  1.467627  0.007093  0.349166
2  0.468708 -1.094926 -0.298761 -0.855611
3 -0.355183 -0.764617 -1.357742  0.643384
          C         D
1  0.007093  0.349166
2 -0.298761 -0.855611
3 -1.357742  0.643384
4  1.222597  1.682360


In [14]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4), columns = ['A', 'B', 'C', 'D'])
print(df)

# Slicing through list of values
print(df.iloc[[1, 3, 5], [1, 3]])
print(df.iloc[1:3, :])
print(df.iloc[:,1:3])

          A         B         C         D
0 -0.621386 -0.678477 -2.101714 -1.930692
1  0.701737  0.829515 -0.235221 -0.228226
2 -0.132358 -0.695124 -0.557506 -0.615735
3  0.343472 -1.435389 -0.589702 -0.665028
4  2.133530 -0.528777 -1.087123 -0.157017
5 -0.750968  0.625724  1.195847  0.023428
6  0.234251 -0.048307  0.779209 -0.794262
7 -0.998000  0.450051 -1.702466  1.806221
          B         D
1  0.829515 -0.228226
3 -1.435389 -0.665028
5  0.625724  0.023428
          A         B         C         D
1  0.701737  0.829515 -0.235221 -0.228226
2 -0.132358 -0.695124 -0.557506 -0.615735
          B         C
0 -0.678477 -2.101714
1  0.829515 -0.235221
2 -0.695124 -0.557506
3 -1.435389 -0.589702
4 -0.528777 -1.087123
5  0.625724  1.195847
6 -0.048307  0.779209
7  0.450051 -1.702466


# Let us now see how each operation can be performed on the DataFrame object. We will use the basic indexing operator '[ ]' −

In [22]:
import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.randn(8, 4), columns = ['A', 'B', 'C', 'D'])

# Integer slicing
print(df['A'])

# Note − We can pass a list of values to [ ] to select those columns.

0   -0.189627
1    0.973172
2    0.753107
3    0.026381
4   -0.649309
5   -0.385841
6    1.376621
7    0.120143
Name: A, dtype: float64


In [23]:
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randn(8, 4), columns = ['A', 'B', 'C', 'D'])

print(df[['A','B']])

          A         B
0 -0.009118  1.375696
1 -0.240983  0.094958
2  1.010420  0.740407
3  0.698165 -0.996659
4 -1.600820 -1.896138
5  1.197526  1.644210
6 -0.148386  0.506974
7  0.648166  0.304064


# Attribute Access
Columns can be selected using the attribute operator '.'

In [25]:
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randn(8, 4), columns = ['A', 'B', 'C', 'D'])

print(df.A)

0   -1.462288
1    0.318190
2   -0.967824
3   -1.626723
4   -0.281840
5    0.124161
6   -0.287092
7   -0.102288
Name: A, dtype: float64
