In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## 5.1 行和列的添加与删除

# 5.1.1 添加行和列

5.1 使用方括号（[ ]）和 loc 属性添加新的列

In [2]:
df = pd.DataFrame([[1,2],[3,4]], columns=['a','b'])
df

Unnamed: 0,a,b
0,1,2
1,3,4


In [3]:
df['c'] = [1,2]
df['d'] = 3
df.loc[:,'e'] = [5,6]
df

Unnamed: 0,a,b,c,d,e
0,1,2,1,3,5
1,3,4,2,3,6


5.2 iloc 属性和扩展代入

In [4]:
df2 = pd.DataFrame([[1,2],[3,4]])
df2

Unnamed: 0,0,1
0,1,2
1,3,4


In [5]:
# df2.iloc[:, 2] = [5,6]#不支持

5.3 DataFrame 类的 assign 方法

In [6]:
df

Unnamed: 0,a,b,c,d,e
0,1,2,1,3,5
1,3,4,2,3,6


In [7]:

df = df.assign(f=[7,8])
df

Unnamed: 0,a,b,c,d,e,f
0,1,2,1,3,5,7
1,3,4,2,3,6,8


5.4 DataFrame 类的 assign 方法（多个）

In [8]:
df = df.assign(g=9,h=10)
df

Unnamed: 0,a,b,c,d,e,f,g,h
0,1,2,1,3,5,7,9,10
1,3,4,2,3,6,8,9,10


5.5 使用 loc 属性添加新的行

In [9]:
df

Unnamed: 0,a,b,c,d,e,f,g,h
0,1,2,1,3,5,7,9,10
1,3,4,2,3,6,8,9,10


In [10]:
df.loc[2] = list(range(8))
df

Unnamed: 0,a,b,c,d,e,f,g,h
0,1,2,1,3,5,7,9,10
1,3,4,2,3,6,8,9,10
2,0,1,2,3,4,5,6,7


5.6 DataFrame 类的 append 方法

In [11]:
df = pd.DataFrame([[1,2],[3,4]], columns=['a','b'])
df

Unnamed: 0,a,b
0,1,2
1,3,4


In [12]:
df2 = pd.DataFrame([[5,6]], columns=list('ab'))
df2

Unnamed: 0,a,b
0,5,6


In [13]:
df._append(df2)

Unnamed: 0,a,b
0,1,2
1,3,4
0,5,6


5.7 append 方法的 ignore_index 参数

In [14]:
df._append(df2, ignore_index=True)

Unnamed: 0,a,b
0,1,2
1,3,4
2,5,6


5.8 添加不同列标签的 DataFrame 对象

In [15]:
df

Unnamed: 0,a,b
0,1,2
1,3,4


In [16]:
df3 = pd.DataFrame([[7,8,9]],columns=['a','b','c'])
df._append(df3)

Unnamed: 0,a,b,c
0,1,2,
1,3,4,
0,7,8,9.0


### 5.1.2 删除行和列

5.9 使用 del 语句删除列

In [17]:
val = np.arange(0,12).reshape(3,4)
df = pd.DataFrame(val, columns=list('abcd'))
df

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11


In [18]:
del df['d']
df

Unnamed: 0,a,b,c
0,0,1,2
1,4,5,6
2,8,9,10


5.10 DataFrame 类的 drop 方法（axis=1）

In [19]:
df.drop(labels='b', axis=1)

Unnamed: 0,a,c
0,0,2
1,4,6
2,8,10


5.11 DataFrame 类的 drop 方法（axis=0）

In [20]:
df.drop(labels=2, axis=0)

Unnamed: 0,a,b,c
0,0,1,2
1,4,5,6


5.12 drop 方法的 columns 参数

In [21]:
df.drop(columns=['a','c'])

Unnamed: 0,b
0,1
1,5
2,9


5.13 drop 方法的 index 参数

In [22]:
df.drop(index=2)

Unnamed: 0,a,b,c
0,0,1,2
1,4,5,6


5.14 DataFrame 类的 pop 方法

In [23]:
df

Unnamed: 0,a,b,c
0,0,1,2
1,4,5,6
2,8,9,10


In [24]:
popped = df.pop('c')
popped

0     2
1     6
2    10
Name: c, dtype: int32

5.15 使用索引引用删除行和列

In [25]:
df = pd.DataFrame([[1,2,3,],[4,5,6],[7,8,9]], columns=list('abc'))
df

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [26]:
df = df.loc[[0,2],'a':'b']
df

Unnamed: 0,a,b
0,1,2
2,7,8


## 5.2 数据的连接和合并

### 5.2.1 concat函数

5.16 DataFrame 对象的创建

In [27]:
df1 = pd.DataFrame({
    'a':['a0','a1','a2'],
    'b':['b0','b1','b2'],
    'c':['c0','c1','c2'],
})

df1

Unnamed: 0,a,b,c
0,a0,b0,c0
1,a1,b1,c1
2,a2,b2,c2


In [28]:
df2 = pd.DataFrame({
    'a':['a3','a4','a5'],
    'b':['b3','b4','b5'],
    'c':['c3','c4','c5'],
})

df2

Unnamed: 0,a,b,c
0,a3,b3,c3
1,a4,b4,c4
2,a5,b5,c5


5.17 concat 函数

In [29]:
pd.concat([df1, df2], axis=0)

Unnamed: 0,a,b,c
0,a0,b0,c0
1,a1,b1,c1
2,a2,b2,c2
0,a3,b3,c3
1,a4,b4,c4
2,a5,b5,c5


5.18 3 个 DataFrame 对象的连接

In [30]:
df3 = pd.DataFrame({
    'a':['a6','a7','a8'],
    'b':['b6','b7','b8'],
    'c':['c6','c7','c8'],
})

pd.concat([df1,df2,df3], axis=0)

Unnamed: 0,a,b,c
0,a0,b0,c0
1,a1,b1,c1
2,a2,b2,c2
0,a3,b3,c3
1,a4,b4,c4
2,a5,b5,c5
0,a6,b6,c6
1,a7,b7,c7
2,a8,b8,c8


5.19 Series 对象的连接

In [31]:
# Series 对象的连接
ser1 = pd.Series(['s1','s2'])
ser2 = pd.Series(['s3','s4'])
pd.concat([ser1,ser2], axis=0)

0    s1
1    s2
0    s3
1    s4
dtype: object

5.20 concat 函数和 axis 参数

In [32]:
pd.concat([df1,df2], axis=1)

Unnamed: 0,a,b,c,a.1,b.1,c.1
0,a0,b0,c0,a3,b3,c3
1,a1,b1,c1,a4,b4,c4
2,a2,b2,c2,a5,b5,c5


5.21 Series 对象和 concat 函数（axis=1）

In [33]:
pd.concat([ser1,ser2], axis=1)

Unnamed: 0,0,1
0,s1,s3
1,s2,s4


In [34]:
ser1.name = 'series1'
ser2.name = 'series2'
pd.concat([ser1,ser2], axis=1)

Unnamed: 0,series1,series2
0,s1,s3
1,s2,s4


5.22 DataFrame 对象和 concat 函数（axis=0）

In [35]:
dft = df1.copy()
dft.columns = ['a','b','d']
pd.concat([df1,dft], axis=0)

Unnamed: 0,a,b,c,d
0,a0,b0,c0,
1,a1,b1,c1,
2,a2,b2,c2,
0,a0,b0,,c0
1,a1,b1,,c1
2,a2,b2,,c2


5.23 DataFrame 对象和 concat 函数（axis=1）

In [36]:
dft = df1.copy()
dft.index = [0,2,4]
pd.concat([df1,dft], axis=1)

Unnamed: 0,a,b,c,a.1,b.1,c.1
0,a0,b0,c0,a0,b0,c0
1,a1,b1,c1,,,
2,a2,b2,c2,a1,b1,c1
4,,,,a2,b2,c2


5.24 concat 函数的 join 参数

In [37]:
pd.concat([df1,dft], axis=1, join='outer')

Unnamed: 0,a,b,c,a.1,b.1,c.1
0,a0,b0,c0,a0,b0,c0
1,a1,b1,c1,,,
2,a2,b2,c2,a1,b1,c1
4,,,,a2,b2,c2


In [38]:
pd.concat([df1,dft], axis=1, join='inner')

Unnamed: 0,a,b,c,a.1,b.1,c.1
0,a0,b0,c0,a0,b0,c0
2,a2,b2,c2,a1,b1,c1


5.25 concat 函数的 join_axes 参数

In [39]:
# pd.concat([df1,dft], axis=1, join_axes=[df1.index]) #最新版没有这个参数了

5.26 concat 函数的 ignore_index 参数（axis=0）

In [40]:
pd.concat([df1,df2], axis=0, ignore_index=True)

Unnamed: 0,a,b,c
0,a0,b0,c0
1,a1,b1,c1
2,a2,b2,c2
3,a3,b3,c3
4,a4,b4,c4
5,a5,b5,c5


5.27 concat 函数的 ignore_index 参数（axis=1）

In [41]:
pd.concat([df1,df2], axis=1, ignore_index=True)

Unnamed: 0,0,1,2,3,4,5
0,a0,b0,c0,a3,b3,c3
1,a1,b1,c1,a4,b4,c4
2,a2,b2,c2,a5,b5,c5


5.28 DataFrame类的append方法

In [42]:
df1._append(df2)

Unnamed: 0,a,b,c
0,a0,b0,c0
1,a1,b1,c1
2,a2,b2,c2
0,a3,b3,c3
1,a4,b4,c4
2,a5,b5,c5


5.29 append 方法的 ignore_index 参数

In [43]:
df1._append(df2, ignore_index=True)

Unnamed: 0,a,b,c
0,a0,b0,c0
1,a1,b1,c1
2,a2,b2,c2
3,a3,b3,c3
4,a4,b4,c4
5,a5,b5,c5


5.30 DataFrame 类的 append 方法

In [44]:
dft = df2.copy()
dft.columns = ['a','b','d']
df1._append(dft.loc[0,:])

Unnamed: 0,a,b,c,d
0,a0,b0,c0,
1,a1,b1,c1,
2,a2,b2,c2,
0,a3,b3,,c3


5.31 Series 类的 append 方法

In [45]:
ser1 = pd.Series(['s1','s2','s3'])
ser2 = pd.Series(['s4','s5','s6'])
ser1._append(ser2)

0    s1
1    s2
2    s3
0    s4
1    s5
2    s6
dtype: object

5.32 DataFrame 类的 append 方法

In [46]:
ser1.index = ['a','b','c']
ser1.name = 'ser1'
df1._append(ser1)

Unnamed: 0,a,b,c
0,a0,b0,c0
1,a1,b1,c1
2,a2,b2,c2
ser1,s1,s2,s3


### 5.2.2 merge函数

5.33 DataFrame 对象的创建

In [47]:
leftdf = pd.DataFrame({
    'a':['a0','a1','a2'],
    'b':['b0','b1','b2'],
    'key':['k0','k1','k2'],
})

leftdf

Unnamed: 0,a,b,key
0,a0,b0,k0
1,a1,b1,k1
2,a2,b2,k2


In [48]:
rightdf = pd.DataFrame({
    'c':['c0','c1','c2'],
    'd':['d0','d1','d2'],
    'key':['k0','k1','k2'],
})

rightdf

Unnamed: 0,c,d,key
0,c0,d0,k0
1,c1,d1,k1
2,c2,d2,k2


5.34 merge 函数

In [49]:
pd.merge(leftdf, rightdf, on='key')

Unnamed: 0,a,b,key,c,d
0,a0,b0,k0,c0,d0
1,a1,b1,k1,c1,d1
2,a2,b2,k2,c2,d2


5.35 merge 函数的 left_on 参数和 right_on 参数

In [50]:
left2 = leftdf.copy()
left2.columns = ['a','b','key_left']
pd.merge(left2, rightdf, left_on='key_left', right_on='key')

Unnamed: 0,a,b,key_left,c,d,key
0,a0,b0,k0,c0,d0,k0
1,a1,b1,k1,c1,d1,k1
2,a2,b2,k2,c2,d2,k2


5.36 append 方法

In [51]:
ser1 = pd.Series(['a3','b3','left_k3'],index=['a','b','key'])
leftdf = leftdf._append(ser1, ignore_index=True)
leftdf

Unnamed: 0,a,b,key
0,a0,b0,k0
1,a1,b1,k1
2,a2,b2,k2
3,a3,b3,left_k3


In [52]:
ser2 = pd.Series(['c3','d3','right_k3'],index=['c','d','key'])
rightdf = rightdf._append(ser2, ignore_index=True)
rightdf

Unnamed: 0,c,d,key
0,c0,d0,k0
1,c1,d1,k1
2,c2,d2,k2
3,c3,d3,right_k3


5.37 merge 函数的 how 参数（inner）

In [53]:
pd.merge(leftdf, rightdf, on=['key'], how='inner')

Unnamed: 0,a,b,key,c,d
0,a0,b0,k0,c0,d0
1,a1,b1,k1,c1,d1
2,a2,b2,k2,c2,d2


5.38 merge 函数的 how 参数（outer）

In [54]:
pd.merge(leftdf, rightdf, on=['key'], how='outer')

Unnamed: 0,a,b,key,c,d
0,a0,b0,k0,c0,d0
1,a1,b1,k1,c1,d1
2,a2,b2,k2,c2,d2
3,a3,b3,left_k3,,
4,,,right_k3,c3,d3


5.39 merge 函数的 how 参数（left 和 right）

In [55]:
pd.merge(leftdf, rightdf, on=['key'], how='left')

Unnamed: 0,a,b,key,c,d
0,a0,b0,k0,c0,d0
1,a1,b1,k1,c1,d1
2,a2,b2,k2,c2,d2
3,a3,b3,left_k3,,


In [56]:
pd.merge(leftdf, rightdf, on=['key'], how='right')

Unnamed: 0,a,b,key,c,d
0,a0,b0,k0,c0,d0
1,a1,b1,k1,c1,d1
2,a2,b2,k2,c2,d2
3,,,right_k3,c3,d3


5.40 使用多个键值的合并

In [57]:
left2 = leftdf.copy()
left2['key2'] = ['k20','k21','k22','k23']
left2

Unnamed: 0,a,b,key,key2
0,a0,b0,k0,k20
1,a1,b1,k1,k21
2,a2,b2,k2,k22
3,a3,b3,left_k3,k23


In [58]:
right2 = rightdf.copy()
right2['key2'] = ['k20','k21','k21','k22']
right2

Unnamed: 0,c,d,key,key2
0,c0,d0,k0,k20
1,c1,d1,k1,k21
2,c2,d2,k2,k21
3,c3,d3,right_k3,k22


In [59]:
pd.merge(left2,right2, on=['key','key2'], how='inner')

Unnamed: 0,a,b,key,key2,c,d
0,a0,b0,k0,k20,c0,d0
1,a1,b1,k1,k21,c1,d1


5.41 merge 函数与索引标签

In [60]:
left2.index = ['あ','い','う','え']
right2.index = ['か','き','く','け']
pd.merge(left2, right2, on=['key','key2'], how='outer')

Unnamed: 0,a,b,key,key2,c,d
0,a0,b0,k0,k20,c0,d0
1,a1,b1,k1,k21,c1,d1
2,a2,b2,k2,k22,,
3,a3,b3,left_k3,k23,,
4,,,k2,k21,c2,d2
5,,,right_k3,k22,c3,d3


5.42 将列 key 变更为索引标签

In [61]:
left2 = leftdf.copy()
left2.index = left2['key']
left2.drop('key',axis=1,inplace=True)
left2

Unnamed: 0_level_0,a,b
key,Unnamed: 1_level_1,Unnamed: 2_level_1
k0,a0,b0
k1,a1,b1
k2,a2,b2
left_k3,a3,b3


In [62]:
right2 = rightdf.copy()
right2.index = right2['key']
right2.drop('key',axis=1,inplace=True)
right2

Unnamed: 0_level_0,c,d
key,Unnamed: 1_level_1,Unnamed: 2_level_1
k0,c0,d0
k1,c1,d1
k2,c2,d2
right_k3,c3,d3


5.43 merge 函数的 left_index 参数和 right_index 参数

In [63]:
pd.merge(left2, right2, left_index=True, right_index=True, how='outer')

Unnamed: 0_level_0,a,b,c,d
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
k0,a0,b0,c0,d0
k1,a1,b1,c1,d1
k2,a2,b2,c2,d2
left_k3,a3,b3,,
right_k3,,,c3,d3


5.44 基于索引标签和列进行合并

In [64]:
pd.merge(left2, rightdf, left_index=True, right_on='key', how='outer')

Unnamed: 0,a,b,c,d,key
0.0,a0,b0,c0,d0,k0
1.0,a1,b1,c1,d1,k1
2.0,a2,b2,c2,d2,k2
,a3,b3,,,left_k3
3.0,,,c3,d3,right_k3


5.45 DataFrame 类的 join 方法

In [65]:
left2.join(right2, how='outer')

Unnamed: 0_level_0,a,b,c,d
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
k0,a0,b0,c0,d0
k1,a1,b1,c1,d1
k2,a2,b2,c2,d2
left_k3,a3,b3,,
right_k3,,,c3,d3


5.46 DataFrame 对象的创建

In [66]:
right2 = rightdf[0:2].copy()
right2['key'] = 'k0'
right2

Unnamed: 0,c,d,key
0,c0,d0,k0
1,c1,d1,k0


In [67]:
left2 = leftdf[0:2].copy()
left2['key'] = 'k0'
left2

Unnamed: 0,a,b,key
0,a0,b0,k0
1,a1,b1,k0


5.47 包含重复键值的 DataFrame 对象的合并

In [68]:
pd.merge(left2,right2, on='key')

Unnamed: 0,a,b,key,c,d
0,a0,b0,k0,c0,d0
1,a0,b0,k0,c1,d1
2,a1,b1,k0,c0,d0
3,a1,b1,k0,c1,d1


5.48 merge 函数的 validate 参数（one_to_one）

In [69]:
# pd.merge(left2,right2, on='key', validate='one_to_one')

5.49 merge 函数的 validate 参数（many_to_many）

In [70]:
pd.merge(left2,right2, on='key', validate='many_to_many')

Unnamed: 0,a,b,key,c,d
0,a0,b0,k0,c0,d0
1,a0,b0,k0,c1,d1
2,a1,b1,k0,c0,d0
3,a1,b1,k0,c1,d1


5.50 包含重复列标签的 DataFrame 对象的合并

In [71]:
rightdf.columns = leftdf.columns
pd.merge(leftdf, rightdf, on='key')

Unnamed: 0,a_x,b_x,key,a_y,b_y
0,a0,b0,k0,c0,d0
1,a1,b1,k1,c1,d1
2,a2,b2,k2,c2,d2


5.51 merge 函数的 suffixes 参数

In [72]:
pd.merge(leftdf, rightdf, on='key', suffixes=['_左','_右'])

Unnamed: 0,a_左,b_左,key,a_右,b_右
0,a0,b0,k0,c0,d0
1,a1,b1,k1,c1,d1
2,a2,b2,k2,c2,d2


## 其他的数据变形

### 随机抽样

5.52 DataFrame 对象的创建

In [73]:
np.random.seed(seed=1)
val = np.random.randint(0,100,size=16).reshape(4, 4)
df = pd.DataFrame(val, index=list('efgh'), columns=list('abcd'))
df

Unnamed: 0,a,b,c,d
e,37,12,72,9
f,75,5,79,64
g,16,1,76,71
h,6,25,50,20


5.53 Series 类的 sample 方法

In [74]:
df['a'].sample(n=3)

h     6
e    37
f    75
Name: a, dtype: int32

5.54 sample 方法的 axis 参数

In [75]:
df.sample(n=2, axis=0)

Unnamed: 0,a,b,c,d
f,75,5,79,64
g,16,1,76,71


In [76]:
df.sample(n=2, axis=1)

Unnamed: 0,c,d
e,72,9
f,79,64
g,76,71
h,50,20


5.55 sample 方法的 frac 参数

In [77]:
df.sample(frac=0.3)

Unnamed: 0,a,b,c,d
h,6,25,50,20


5.56 sample 方法的 random_state 参数

In [78]:
df.sample(n=1, random_state=1)

Unnamed: 0,a,b,c,d
h,6,25,50,20


5.57 使用 sample 方法对整体数据进行随机排序

In [79]:
df.sample(frac=1, axis=0)

Unnamed: 0,a,b,c,d
e,37,12,72,9
g,16,1,76,71
f,75,5,79,64
h,6,25,50,20


5.58 sample 方法和 replace 参数

In [80]:
df.sample(n=3, random_state=5, replace=True)

Unnamed: 0,a,b,c,d
h,6,25,50,20
g,16,1,76,71
h,6,25,50,20


### 5.3.2 虚拟变量

5.59 DataFrame 对象的创建

In [81]:
cities = ['Shibuya','Sapporo','Toyota','Shibuya','Sapporo']
prefs = ['Tokyo','Hokkaido','Aichi','Tokyo','Hokkaido']
df = pd.DataFrame({'city':cities, 'pref':prefs})
df

Unnamed: 0,city,pref
0,Shibuya,Tokyo
1,Sapporo,Hokkaido
2,Toyota,Aichi
3,Shibuya,Tokyo
4,Sapporo,Hokkaido


5.60 get_dummies 函数

In [82]:
pd.get_dummies(df['city'])

Unnamed: 0,Sapporo,Shibuya,Toyota
0,False,True,False
1,True,False,False
2,False,False,True
3,False,True,False
4,True,False,False


5.61 get_dummies 函数的 drop_first 参数

In [83]:
pd.get_dummies(df['city'], drop_first=True)

Unnamed: 0,Shibuya,Toyota
0,True,False
1,False,False
2,False,True
3,True,False
4,False,False


5.62 get_dummies 函数

In [84]:
pd.get_dummies(df)

Unnamed: 0,city_Sapporo,city_Shibuya,city_Toyota,pref_Aichi,pref_Hokkaido,pref_Tokyo
0,False,True,False,False,False,True
1,True,False,False,False,True,False
2,False,False,True,True,False,False
3,False,True,False,False,False,True
4,True,False,False,False,True,False


5.63 get_dummies 函数

In [85]:
df['sales'] = [120000,80000,90000,170000,55000]
pd.get_dummies(df)

Unnamed: 0,sales,city_Sapporo,city_Shibuya,city_Toyota,pref_Aichi,pref_Hokkaido,pref_Tokyo
0,120000,False,True,False,False,False,True
1,80000,True,False,False,False,True,False
2,90000,False,False,True,True,False,False
3,170000,False,True,False,False,False,True
4,55000,True,False,False,False,True,False


5.64 get_dummies 函数的 columns 参数和 prefix 参数

In [86]:
pd.get_dummies(df, columns=['city'], prefix='c')

Unnamed: 0,pref,sales,c_Sapporo,c_Shibuya,c_Toyota
0,Tokyo,120000,False,True,False
1,Hokkaido,80000,True,False,False
2,Aichi,90000,False,False,True
3,Tokyo,170000,False,True,False
4,Hokkaido,55000,True,False,False


5.65 qcut 函数

In [87]:
df['label'] = pd.qcut(df['sales'], 3, labels=['low','mid','high'])
df

Unnamed: 0,city,pref,sales,label
0,Shibuya,Tokyo,120000,high
1,Sapporo,Hokkaido,80000,low
2,Toyota,Aichi,90000,mid
3,Shibuya,Tokyo,170000,high
4,Sapporo,Hokkaido,55000,low


5.66 get_dummies 函数

In [88]:
pd.get_dummies(df, columns=['label'])

Unnamed: 0,city,pref,sales,label_low,label_mid,label_high
0,Shibuya,Tokyo,120000,False,False,True
1,Sapporo,Hokkaido,80000,True,False,False
2,Toyota,Aichi,90000,False,True,False
3,Shibuya,Tokyo,170000,False,False,True
4,Sapporo,Hokkaido,55000,True,False,False


5.67 DataFrame 对象的创建

### 5.3.3 长型数据和宽型数据的变形

In [89]:
data = [
    ['A','出生地','Tokyo'],
    ['A','年龄',28],
    ['A','性別','M'],
    ['B','出生地','Osaka'],
    ['B','年龄',32],
    ['B','性別','F']

]

df_long = pd.DataFrame(data, columns=['name','attribute','value'])
df_long

Unnamed: 0,name,attribute,value
0,A,出生地,Tokyo
1,A,年龄,28
2,A,性別,M
3,B,出生地,Osaka
4,B,年龄,32
5,B,性別,F


5.68 DataFrame 类的 pivot 方法

In [90]:
df_long.pivot(index='name', columns='attribute')

Unnamed: 0_level_0,value,value,value
attribute,出生地,年龄,性別
name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
A,Tokyo,28,M
B,Osaka,32,F


5.69 pivot 方法的 values 参数

In [91]:
df_long['flag'] = [1,1,0,0,1,1]
df_long.pivot(index='name', columns='attribute', values='flag')

attribute,出生地,年龄,性別
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,1,1,0
B,0,1,1


5.70 DataFrame 对象的创建

In [92]:
data = [
    ['A','Tokyo',28,'M'],
    ['B','Osaka',32,'F'],
]
		
df_wide = pd.DataFrame(data, columns=['name','出生地','年龄','性別'])
df_wide

Unnamed: 0,name,出生地,年龄,性別
0,A,Tokyo,28,M
1,B,Osaka,32,F


5.71 DataFrame 类的 melt 方法

In [93]:
df_wide.melt(id_vars='name')

Unnamed: 0,name,variable,value
0,A,出生地,Tokyo
1,B,出生地,Osaka
2,A,年龄,28
3,B,年龄,32
4,A,性別,M
5,B,性別,F


5.72 melt 方法的 var_name 参数和 value_name 参数

In [94]:
df_wide.melt(id_vars='name', var_name='项目名称', value_name='值')

Unnamed: 0,name,项目名称,值
0,A,出生地,Tokyo
1,B,出生地,Osaka
2,A,年龄,28
3,B,年龄,32
4,A,性別,M
5,B,性別,F


5.73 melt 方法的 value_vars 参数

In [95]:
df_wide.melt(id_vars='name', value_vars=['出生地','年龄'])

Unnamed: 0,name,variable,value
0,A,出生地,Tokyo
1,B,出生地,Osaka
2,A,年龄,28
3,B,年龄,32


5.74 melt 函数

In [96]:
pd.melt(df_wide, id_vars='name')

Unnamed: 0,name,variable,value
0,A,出生地,Tokyo
1,B,出生地,Osaka
2,A,年龄,28
3,B,年龄,32
4,A,性別,M
5,B,性別,F
