In [1]:
import pandas as pd
import numpy as np

# fast scalar value getting and setting
### at === loc
### iat === iloc

In [2]:
s = pd.Series([1,2,3,4] , index=['a','b','c','d'])

In [3]:
s

a    1
b    2
c    3
d    4
dtype: int64

In [4]:
s.iat[1]

2

In [7]:
df = pd.DataFrame({'A':[1,2,3,4] , 'B':[4,5,6,7]})
df

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6
3,4,7


In [11]:
df.at[1 , 'A'] #1 and 'A' are label index

2

In [12]:
df.iat[0,1]

4

In [13]:
#也可以使用at iat进行赋值
df.at[1,'B'] = 53 #label index
#df.iat[1 , 1] = 53 #integer index
df

Unnamed: 0,A,B
0,1,4
1,2,53
2,3,6
3,4,7


In [14]:
#如果指定的index没有的话 at会添加一个index
df.at[0 , 'C'] = 52
df

Unnamed: 0,A,B,C
0,1,4,52.0
1,2,53,
2,3,6,
3,4,7,


# boolean indexing
#### | = or
#### & = and
#### ~ = not

In [15]:
#使用Boolean类型的vector进行访问
s = pd.Series(range(-3 , 4))
s

0   -3
1   -2
2   -1
3    0
4    1
5    2
6    3
dtype: int32

In [16]:
s[s>0]

4    1
5    2
6    3
dtype: int32

In [17]:
s[(s<-1)|(s>0.5)]

0   -3
1   -2
4    1
5    2
6    3
dtype: int32

In [18]:
s[~(s<0)]

3    0
4    1
5    2
6    3
dtype: int32

In [20]:
df[df['A']%2==0] #Boolean长度与index的长度一样 进行选择 根据条件会得到满足条件的若干行

Unnamed: 0,A,B,C
1,2,53,
3,4,7,


In [21]:
#list  and map method
df2 = pd.DataFrame({'a':['one','one','two','three','two','one','six'],
                    'b':['x','y','y','x','y','x','x'],
                    'c':np.random.randn(7)})
df2

Unnamed: 0,a,b,c
0,one,x,1.40531
1,one,y,0.74084
2,two,y,1.75596
3,three,x,-3.181328
4,two,y,0.878512
5,one,x,0.05771
6,six,x,2.359227


In [25]:
df2[ (df2['a'] == 'two') | (df2['a'] == 'three')]

Unnamed: 0,a,b,c
2,two,y,1.75596
3,three,x,-3.181328
4,two,y,0.878512


In [27]:
criterion = df2['a'].map(lambda x: x.startswith('t'))
df2[criterion]

Unnamed: 0,a,b,c
2,two,y,1.75596
3,three,x,-3.181328
4,two,y,0.878512


In [28]:
#和上面的效果一样 但是效率低一点
df2[[x.startswith('t') for x in df2['a']]]

Unnamed: 0,a,b,c
2,two,y,1.75596
3,three,x,-3.181328
4,two,y,0.878512


In [29]:
df2[criterion & (df2['b'] == 'x')] #多种条件一起进行筛选

Unnamed: 0,a,b,c
3,three,x,-3.181328


In [30]:
#上面的用逻辑表达式对dataframe进行控制 控制的都是行 对行进行条件过滤
df2.loc[criterion & (df2['b'] == 'x') , 'b':'c']

Unnamed: 0,b,c
3,x,-3.181328


# indexing with isin
##### 传入一个value列表 isin判断这些value是否存在series之中

In [31]:
s = pd.Series(np.arange(5) , index = np.arange(5)[::-1] , dtype='int64')
s

4    0
3    1
2    2
1    3
0    4
dtype: int64

In [32]:
s.isin([2,4,6]) #判断value2 4 6 是否在series中


4    False
3    False
2     True
1    False
0     True
dtype: bool

In [33]:
s[s.isin([2,4,6])]

2    2
0    4
dtype: int64

In [34]:
s1=pd.Series([1,2,3,4] , index=['fei' , 2,'2k',4])
s1

fei    1
2      2
2k     3
4      4
dtype: int64

In [35]:
s1.index.isin(['fei' , 4])

array([ True, False, False,  True], dtype=bool)

In [37]:
s1[s1.index.isin(['fei' , 4])]
#isin 的参数是真实 实在的value 、label index

fei    1
4      4
dtype: int64

In [38]:
#多索引
s_mi = pd.Series(np.arange(6) , 
                index = pd.MultiIndex.from_product([[0 , 1],['a','b','c']]))
s_mi

0  a    0
   b    1
   c    2
1  a    3
   b    4
   c    5
dtype: int32

In [39]:
s_mi.index

MultiIndex(levels=[[0, 1], ['a', 'b', 'c']],
           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])

In [40]:
s_mi.iloc[s_mi.index.isin([(1,'a'),(2,'b'),(0,'c')])]

0  c    2
1  a    3
dtype: int32

In [42]:
s_mi.iloc[s_mi.index.isin(['a','b','e'] , level=1)]

0  a    0
   b    1
1  a    3
   b    4
dtype: int32

In [43]:
#isin方法 一样可以作用于dataframe之上
df = pd.DataFrame({'vals':[1,2,3,4],
                  'ids':['a','b','f','n'],
                  'ids2':['a','n','c','n']})
df

Unnamed: 0,ids,ids2,vals
0,a,a,1
1,b,n,2
2,f,c,3
3,n,n,4


In [44]:
values = ['a' , 'b', 1 , 3]
df.isin(values)

Unnamed: 0,ids,ids2,vals
0,True,True,True
1,True,False,False
2,False,False,True
3,False,False,False


In [45]:
df[df.isin(values)]

Unnamed: 0,ids,ids2,vals
0,a,a,1.0
1,b,,
2,,,3.0
3,,,


Oftentimes you’ll want to match certain values with certain columns. Just make values a dict where the key is the column, and the value is a list of items you want to check for.

In [46]:
values = {'ids':['a', 'b'] , 'vals':[1,3]}
#构建一个dict key为column value为value
df.isin(values)

Unnamed: 0,ids,ids2,vals
0,True,False,True
1,True,False,False
2,False,False,True
3,False,False,False


In [47]:
df[df.isin(values)]

Unnamed: 0,ids,ids2,vals
0,a,,1.0
1,b,,
2,,,3.0
3,,,


In [48]:
values = {'ids':['a','b'],
         'ids2':['a','c'],
         'vals':[1,3]}
df.isin(values)

Unnamed: 0,ids,ids2,vals
0,True,True,True
1,True,False,False
2,False,True,True
3,False,False,False


In [52]:
df.isin(values).all(1)
#df.all()Return whether all elements are True over requested axis

0     True
1    False
2    False
3    False
dtype: bool

In [55]:
df[df.isin(values).all(1)]

Unnamed: 0,ids,ids2,vals
0,a,a,1


In [54]:
df.all(1)

0    True
1    True
2    True
3    True
dtype: bool

# the where method and masking
http://pandas.pydata.org/pandas-docs/stable/indexing.html#selection-by-position