### Indexing, selection, and filtering

In [1]:
import numpy as np
import pandas as pd

Series indexing (obj[...]) works analogously to NumPy array indexing, except you can
use the Series’s index values instead of only integers. Here are some examples this:

In [2]:
obj=pd.Series(np.arange(4),dtype=int,index=['a','b','c','d'])

In [3]:
obj

a    0
b    1
c    2
d    3
dtype: int32

In [4]:
obj['b']

1

In [5]:
obj[1]

1

In [6]:
obj[1:]

b    1
c    2
d    3
dtype: int32

In [7]:
obj['b':]

b    1
c    2
d    3
dtype: int32

In [8]:
obj[['b','c','d']]

b    1
c    2
d    3
dtype: int32

In [9]:
obj[obj>1]

c    2
d    3
dtype: int32

In [10]:
obj[1:2]

b    1
dtype: int32

In [11]:
obj['b':'d']

b    1
c    2
d    3
dtype: int32

In [15]:
obj.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [16]:
obj[1:3]

b    5
c    5
dtype: int32

In [17]:
obj['b':'c']=5

In [18]:
obj

a    0
b    5
c    5
d    3
dtype: int32

In [27]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
index=['Trichy','Namakkal','Chennai','Erode'],
columns=['one', 'two', 'three', 'four'])

In [28]:
data

Unnamed: 0,one,two,three,four
Trichy,0,1,2,3
Namakkal,4,5,6,7
Chennai,8,9,10,11
Erode,12,13,14,15


In [30]:
data['one']

Trichy       0
Namakkal     4
Chennai      8
Erode       12
Name: one, dtype: int32

In [31]:
data[:2]

Unnamed: 0,one,two,three,four
Trichy,0,1,2,3
Namakkal,4,5,6,7


In [32]:
data[1:]

Unnamed: 0,one,two,three,four
Namakkal,4,5,6,7
Chennai,8,9,10,11
Erode,12,13,14,15


In [33]:
data[3:]

Unnamed: 0,one,two,three,four
Erode,12,13,14,15


In [34]:
data[data['three']>10]

Unnamed: 0,one,two,three,four
Erode,12,13,14,15


In [35]:
data<5

Unnamed: 0,one,two,three,four
Trichy,True,True,True,True
Namakkal,True,False,False,False
Chennai,False,False,False,False
Erode,False,False,False,False


In [38]:
data.ix['Namakkal',['two','three']]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


two      5
three    6
Name: Namakkal, dtype: int32

In [40]:
data.ix[['Namakkal','Trichy'], [3,1,0,2]]


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


Unnamed: 0,four,two,one,three
Namakkal,7,5,4,6
Trichy,3,1,0,2


In [41]:
data

Unnamed: 0,one,two,three,four
Trichy,0,1,2,3
Namakkal,4,5,6,7
Chennai,8,9,10,11
Erode,12,13,14,15


In [43]:
data

Unnamed: 0,one,two,three,four
Trichy,0,1,2,3
Namakkal,4,5,6,7
Chennai,8,9,10,11
Erode,12,13,14,15


In [48]:
data.ix[data.three >8 ,:3]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


Unnamed: 0,one,two,three
Chennai,8,9,10
Erode,12,13,14


### Arithmetic and data alignment

In [50]:
s1 = pd.Series([7.3, -2.5, 3.4, 1.5], index=['a', 'c', 'd', 'e'])
s2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1], index=['a', 'c', 'e', 'f', 'g'])

In [51]:
s1+s2

a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

In [52]:
s2+s1

a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

Arithmetic methods with fill values

In [57]:
df1 = pd.DataFrame(np.arange(12.).reshape((3, 4)), columns=list('abcd'))
df2 = pd.DataFrame(np.arange(20.).reshape((4, 5)), columns=list('abcde'))

In [58]:
df1

Unnamed: 0,a,b,c,d
0,0.0,1.0,2.0,3.0
1,4.0,5.0,6.0,7.0
2,8.0,9.0,10.0,11.0


In [59]:
df2

Unnamed: 0,a,b,c,d,e
0,0.0,1.0,2.0,3.0,4.0
1,5.0,6.0,7.0,8.0,9.0
2,10.0,11.0,12.0,13.0,14.0
3,15.0,16.0,17.0,18.0,19.0


In [61]:
df1+df2

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,
1,9.0,11.0,13.0,15.0,
2,18.0,20.0,22.0,24.0,
3,,,,,


In [64]:
print(df1+df2)

      a     b     c     d   e
0   0.0   2.0   4.0   6.0 NaN
1   9.0  11.0  13.0  15.0 NaN
2  18.0  20.0  22.0  24.0 NaN
3   NaN   NaN   NaN   NaN NaN


In [65]:
df1.add(df2,fill_value=0)

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,4.0
1,9.0,11.0,13.0,15.0,9.0
2,18.0,20.0,22.0,24.0,14.0
3,15.0,16.0,17.0,18.0,19.0


In [66]:
df1

Unnamed: 0,a,b,c,d
0,0.0,1.0,2.0,3.0
1,4.0,5.0,6.0,7.0
2,8.0,9.0,10.0,11.0


Relatedly, when reindexing a Series or DataFrame, you can also specify a different fill
value:

In [67]:
df1.reindex(columns=df2.columns,fill_value=0)

Unnamed: 0,a,b,c,d,e
0,0.0,1.0,2.0,3.0,0
1,4.0,5.0,6.0,7.0,0
2,8.0,9.0,10.0,11.0,0


### Operations between DataFrame and Series

In [68]:
arr=np.arange(9).reshape(3,3)

In [69]:
arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [71]:
arr[0]

array([0, 1, 2])

In [77]:
arr[1],arr[2]

(array([3, 4, 5]), array([6, 7, 8]))

In [24]:
new=arr-arr[0]
new

array([[0, 0, 0],
       [3, 3, 3],
       [6, 6, 6]])

In [78]:
frame =pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bde'),
index=['Trichy','Namakkal','Chennai','Erode'])

In [79]:
frame

Unnamed: 0,b,d,e
Trichy,0.0,1.0,2.0
Namakkal,3.0,4.0,5.0
Chennai,6.0,7.0,8.0
Erode,9.0,10.0,11.0


In [82]:
series=frame.ix[1]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


In [83]:
series

b    3.0
d    4.0
e    5.0
Name: Namakkal, dtype: float64

In [84]:
frame

Unnamed: 0,b,d,e
Trichy,0.0,1.0,2.0
Namakkal,3.0,4.0,5.0
Chennai,6.0,7.0,8.0
Erode,9.0,10.0,11.0


In [85]:
series2=pd.Series(np.arange(3),index=['b','d','e'])
series2

b    0
d    1
e    2
dtype: int32

In [87]:
frame

Unnamed: 0,b,d,e
Trichy,0.0,1.0,2.0
Namakkal,3.0,4.0,5.0
Chennai,6.0,7.0,8.0
Erode,9.0,10.0,11.0


In [89]:
series2

b    0
d    1
e    2
dtype: int32

In [90]:
frame-series2

Unnamed: 0,b,d,e
Trichy,0.0,0.0,0.0
Namakkal,3.0,3.0,3.0
Chennai,6.0,6.0,6.0
Erode,9.0,9.0,9.0


In [91]:
series3=frame['d']

In [92]:
series3

Trichy       1.0
Namakkal     4.0
Chennai      7.0
Erode       10.0
Name: d, dtype: float64

In [99]:
c=frame.sub(series3,axis=0)
c

Unnamed: 0,b,d,e
Trichy,-1.0,0.0,1.0
Namakkal,-1.0,0.0,1.0
Chennai,-1.0,0.0,1.0
Erode,-1.0,0.0,1.0


### Function application and mapping


In [100]:
np.abs(c)

Unnamed: 0,b,d,e
Trichy,1.0,0.0,1.0
Namakkal,1.0,0.0,1.0
Chennai,1.0,0.0,1.0
Erode,1.0,0.0,1.0


In [102]:
f=lambda x:x.max()-x.min()

In [103]:
frame

Unnamed: 0,b,d,e
Trichy,0.0,1.0,2.0
Namakkal,3.0,4.0,5.0
Chennai,6.0,7.0,8.0
Erode,9.0,10.0,11.0


In [105]:
frame.apply(f)

b    9.0
d    9.0
e    9.0
dtype: float64

In [106]:
frame.apply(f,axis=1)

Trichy      2.0
Namakkal    2.0
Chennai     2.0
Erode       2.0
dtype: float64

In [109]:
def f(x):
    return pd.Series([x.min(), x.max(),x.max()-x.min()], index=['min', 'max','diff'])
 

In [110]:
frame.apply(f)

Unnamed: 0,b,d,e
min,0.0,1.0,2.0
max,9.0,10.0,11.0
diff,9.0,9.0,9.0


In [113]:
frame

Unnamed: 0,b,d,e
Trichy,0.0,1.0,2.0
Namakkal,3.0,4.0,5.0
Chennai,6.0,7.0,8.0
Erode,9.0,10.0,11.0


In [114]:
format = lambda x: '%d' % x
frame.applymap(format)

Unnamed: 0,b,d,e
Trichy,0,1,2
Namakkal,3,4,5
Chennai,6,7,8
Erode,9,10,11


In [115]:
frame['b'].map(format)

Trichy      0
Namakkal    3
Chennai     6
Erode       9
Name: b, dtype: object