In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame({"A":[5,3,None,4], 
                 "B":[None,2,4,3], 
                 "C":[4,3,8,5], 
                 "D":[5,4,2,None]}) 
df

Unnamed: 0,A,B,C,D
0,5.0,,4,5.0
1,3.0,2.0,3,4.0
2,,4.0,8,2.0
3,4.0,3.0,5,


### 定位

In [3]:
df['A']

0    5.0
1    3.0
2    NaN
3    4.0
Name: A, dtype: float64

In [4]:
# 列的选择,多个列要用列表
df[['A']]

Unnamed: 0,A
0,5.0
1,3.0
2,
3,4.0


In [5]:
df[['A', 'B']]

Unnamed: 0,A,B
0,5.0,
1,3.0,2.0
2,,4.0
3,4.0,3.0


In [6]:
# 某几列
df.iloc[:5, 1:3]

Unnamed: 0,B,C
0,,4
1,2.0,3
2,4.0,8
3,3.0,5


In [7]:
df

Unnamed: 0,A,B,C,D
0,5.0,,4,5.0
1,3.0,2.0,3,4.0
2,,4.0,8,2.0
3,4.0,3.0,5,


In [8]:
df.loc[:3, ['A', 'B']]

Unnamed: 0,A,B
0,5.0,
1,3.0,2.0
2,,4.0
3,4.0,3.0


In [9]:
df.index = ['a', 'b', 'c', 'd']

In [10]:
df.loc[['a','b'], ['A', 'B']]

Unnamed: 0,A,B
a,5.0,
b,3.0,2.0


In [11]:
df.loc[:3, ['A', 'B']] ##错误

TypeError: cannot do slice indexing on <class 'pandas.core.indexes.base.Index'> with these indexers [3] of <class 'int'>

### 选择

In [12]:
df

Unnamed: 0,A,B,C,D
a,5.0,,4,5.0
b,3.0,2.0,3,4.0
c,,4.0,8,2.0
d,4.0,3.0,5,


In [13]:
df[(df['A'] < 5) & (df['C'] < 4)]

Unnamed: 0,A,B,C,D
b,3.0,2.0,3,4.0


### 数值替换

In [14]:
df['C'].replace(4, 'DD')

a    DD
b     3
c     8
d     5
Name: C, dtype: object

In [15]:
df['C'] = df['C'].replace(4, 'DD')
df

Unnamed: 0,A,B,C,D
a,5.0,,DD,5.0
b,3.0,2.0,3,4.0
c,,4.0,8,2.0
d,4.0,3.0,5,


In [16]:
df.replace(np.nan, 0)

Unnamed: 0,A,B,C,D
a,5.0,0.0,DD,5.0
b,3.0,2.0,3,4.0
c,0.0,4.0,8,2.0
d,4.0,3.0,5,0.0


In [17]:
df.replace([2, 4], [200, 400])

Unnamed: 0,A,B,C,D
a,5.0,,DD,5.0
b,3.0,200.0,3,400.0
c,,400.0,8,200.0
d,400.0,3.0,5,


In [18]:
df.replace({2: 200, 4: 400})

Unnamed: 0,A,B,C,D
a,5.0,,DD,5.0
b,3.0,200.0,3,400.0
c,,400.0,8,200.0
d,400.0,3.0,5,


In [19]:
df.replace([2, 4], 1000)

Unnamed: 0,A,B,C,D
a,5.0,,DD,5.0
b,3.0,1000.0,3,1000.0
c,,1000.0,8,1000.0
d,1000.0,3.0,5,


### 排序

In [20]:
df.sort_values(by=['A'], ascending=True)

Unnamed: 0,A,B,C,D
b,3.0,2.0,3,4.0
d,4.0,3.0,5,
a,5.0,,DD,5.0
c,,4.0,8,2.0


In [21]:
df = df.append({'A':4.0, 'B': 5.0, 'C':2.0, 'D':8.0}, ignore_index=True)

In [22]:
df

Unnamed: 0,A,B,C,D
0,5.0,,DD,5.0
1,3.0,2.0,3,4.0
2,,4.0,8,2.0
3,4.0,3.0,5,
4,4.0,5.0,2,8.0


In [23]:
df.sort_values(by=['A', 'B'], ascending=[True, False])

Unnamed: 0,A,B,C,D
1,3.0,2.0,3,4.0
4,4.0,5.0,2,8.0
3,4.0,3.0,5,
0,5.0,,DD,5.0
2,,4.0,8,2.0


### 删除

In [24]:
df.drop('A', axis=1)

Unnamed: 0,B,C,D
0,,DD,5.0
1,2.0,3,4.0
2,4.0,8,2.0
3,3.0,5,
4,5.0,2,8.0


In [25]:
df.drop(3, axis=0)

Unnamed: 0,A,B,C,D
0,5.0,,DD,5.0
1,3.0,2.0,3,4.0
2,,4.0,8,2.0
4,4.0,5.0,2,8.0


In [26]:
df['A'] < 4

0    False
1     True
2    False
3    False
4    False
Name: A, dtype: bool

In [27]:
df[df['A'] < 4]

Unnamed: 0,A,B,C,D
1,3.0,2.0,3,4.0


### 行列互换

In [28]:
df.T

Unnamed: 0,0,1,2,3,4
A,5,3,,4.0,4
B,,2,4.0,3.0,5
C,DD,3,8.0,5.0,2
D,5,4,2.0,,8


In [29]:
df.T.T

Unnamed: 0,A,B,C,D
0,5.0,,DD,5.0
1,3.0,2.0,3,4.0
2,,4.0,8,2.0
3,4.0,3.0,5,
4,4.0,5.0,2,8.0


### 索引重塑

In [30]:
df2 = pd.DataFrame([
                     ['a', 'b', 'c'], 
                     ['d', 'e', 'f']
                    ],
                    columns= ['one', 'two', 'three'],
                    index = ['first', 'second']
                   ) 
df2

Unnamed: 0,one,two,three
first,a,b,c
second,d,e,f


In [31]:
df2.stack()

first   one      a
        two      b
        three    c
second  one      d
        two      e
        three    f
dtype: object

In [32]:
df2.stack().index

MultiIndex([( 'first',   'one'),
            ( 'first',   'two'),
            ( 'first', 'three'),
            ('second',   'one'),
            ('second',   'two'),
            ('second', 'three')],
           )

In [33]:
df2.stack().reset_index()

Unnamed: 0,level_0,level_1,0
0,first,one,a
1,first,two,b
2,first,three,c
3,second,one,d
4,second,two,e
5,second,three,f


In [34]:
df2.unstack()

one    first     a
       second    d
two    first     b
       second    e
three  first     c
       second    f
dtype: object

In [35]:
df2.unstack().index

MultiIndex([(  'one',  'first'),
            (  'one', 'second'),
            (  'two',  'first'),
            (  'two', 'second'),
            ('three',  'first'),
            ('three', 'second')],
           )

In [36]:
df2.unstack().reset_index()

Unnamed: 0,level_0,level_1,0
0,one,first,a
1,one,second,d
2,two,first,b
3,two,second,e
4,three,first,c
5,three,second,f


In [37]:
df2.stack().reset_index()

Unnamed: 0,level_0,level_1,0
0,first,one,a
1,first,two,b
2,first,three,c
3,second,one,d
4,second,two,e
5,second,three,f
