In [397]:
import pandas as pd
import numpy as np

# 参考array的修改与共享内存.ipynb

In [398]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
        'year': [2000, 2001, 2002, 2001, 2002, 2003],
        'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
df = pd.DataFrame(data)
df.index = ['one', 'two', 'three', 'four', 'five', 'six']
df

Unnamed: 0,state,year,pop
one,Ohio,2000,1.5
two,Ohio,2001,1.7
three,Ohio,2002,3.6
four,Nevada,2001,2.4
five,Nevada,2002,2.9
six,Nevada,2003,3.2


In [399]:
df.loc[:, 'debt'] = 16.5  # 增加一列(标量)
df  # frame发生了改变

Unnamed: 0,state,year,pop,debt
one,Ohio,2000,1.5,16.5
two,Ohio,2001,1.7,16.5
three,Ohio,2002,3.6,16.5
four,Nevada,2001,2.4,16.5
five,Nevada,2002,2.9,16.5
six,Nevada,2003,3.2,16.5


In [400]:
df['debt_1'] = [1, 2, 3, 4, 5, 6]  # 增加一列(长度必须相等)
df

Unnamed: 0,state,year,pop,debt,debt_1
one,Ohio,2000,1.5,16.5,1
two,Ohio,2001,1.7,16.5,2
three,Ohio,2002,3.6,16.5,3
four,Nevada,2001,2.4,16.5,4
five,Nevada,2002,2.9,16.5,5
six,Nevada,2003,3.2,16.5,6


In [401]:
df['eastern'] = (df.state == 'Ohio')
df

Unnamed: 0,state,year,pop,debt,debt_1,eastern
one,Ohio,2000,1.5,16.5,1,True
two,Ohio,2001,1.7,16.5,2,True
three,Ohio,2002,3.6,16.5,3,True
four,Nevada,2001,2.4,16.5,4,False
five,Nevada,2002,2.9,16.5,5,False
six,Nevada,2003,3.2,16.5,6,False


In [402]:
df['debt_1'] = np.arange(6.)  # 修改一列(长度必须相等)
df

Unnamed: 0,state,year,pop,debt,debt_1,eastern
one,Ohio,2000,1.5,16.5,0.0,True
two,Ohio,2001,1.7,16.5,1.0,True
three,Ohio,2002,3.6,16.5,2.0,True
four,Nevada,2001,2.4,16.5,3.0,False
five,Nevada,2002,2.9,16.5,4.0,False
six,Nevada,2003,3.2,16.5,5.0,False


In [403]:
val = pd.Series([-1.2, -1.5, -1.7, 3],
                index=['two', 'four', 'five', 'severn'])
# 增加一列(Series只在Series.index和DataFrame.index交集处赋值,其余位置为NaN)
df['debt_2'] = val
df

Unnamed: 0,state,year,pop,debt,debt_1,eastern,debt_2
one,Ohio,2000,1.5,16.5,0.0,True,
two,Ohio,2001,1.7,16.5,1.0,True,-1.2
three,Ohio,2002,3.6,16.5,2.0,True,
four,Nevada,2001,2.4,16.5,3.0,False,-1.5
five,Nevada,2002,2.9,16.5,4.0,False,-1.7
six,Nevada,2003,3.2,16.5,5.0,False,


In [404]:
df.iloc[2:4, 1:3] = 2019  # 利用切片同时修改多行或多列
df

Unnamed: 0,state,year,pop,debt,debt_1,eastern,debt_2
one,Ohio,2000,1.5,16.5,0.0,True,
two,Ohio,2001,1.7,16.5,1.0,True,-1.2
three,Ohio,2019,2019.0,16.5,2.0,True,
four,Nevada,2019,2019.0,16.5,3.0,False,-1.5
five,Nevada,2002,2.9,16.5,4.0,False,-1.7
six,Nevada,2003,3.2,16.5,5.0,False,


In [405]:
del df['eastern']  # 删除某列(del关键字)
df.columns

Index(['state', 'year', 'pop', 'debt', 'debt_1', 'debt_2'], dtype='object')

In [406]:
frame = pd.DataFrame(np.arange(16).reshape((4, 4)),
                     index=['Ohio', 'Colorado', 'Utah', 'New York'],
                     columns=['one', 'two', 'three', 'four'])
frame

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [407]:
ser = frame.iloc[:, 2]
ser

Ohio         2
Colorado     6
Utah        10
New York    14
Name: three, dtype: int32

In [408]:
ser[2] = -1000
ser

Ohio           2
Colorado       6
Utah       -1000
New York      14
Name: three, dtype: int32

In [409]:
frame  # frame对应位置发生了改变


Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,-1000,11
New York,12,13,14,15
