In [2]:
import pandas as pd

# 像字典一樣的鍵值對應
data = pd.Series([0.25, 0.5, 0.75, 1.0],index=['a', 'b', 'c', 'd'])
print(data)

# 使用類似 python 字典檢索方式
print('a' in data)
print(data.keys())
print(data.values)
print(list(data.items()))

# 也可以用字典的方式新增值
data['e'] = 1.25
print(data)

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64
True
Index(['a', 'b', 'c', 'd'], dtype='object')
[0.25 0.5  0.75 1.  ]
[('a', 0.25), ('b', 0.5), ('c', 0.75), ('d', 1.0)]
a    0.25
b    0.50
c    0.75
d    1.00
e    1.25
dtype: float64


In [3]:
# 索引切片  包含'c'
print(data['a':'c'])

# 用隱含的整數索引切片 不包含2
print(data[0:2])

# 遮罩
print(data[(data > 0.3) & (data < 0.8)])

# fancy index
print(data[['a','e']])

a    0.25
b    0.50
c    0.75
dtype: float64
a    0.25
b    0.50
dtype: float64
b    0.50
c    0.75
dtype: float64
a    0.25
e    1.25
dtype: float64


In [4]:
data = pd.Series(['a', 'b', 'c'], index=[1, 3, 5])
print(data)
# 顯性索引
print(data[1])
# 隱性索引
print(data[1:3])

1    a
3    b
5    c
dtype: object
a
3    b
5    c
dtype: object


In [5]:
# loc 顯性 含尾巴
print(data.loc[1])
print(data.loc[1:3])

# iloc 隱性 不含尾巴
print(data.iloc[1])
print(data.iloc[1:3])

a
1    a
3    b
dtype: object
b
3    b
5    c
dtype: object


In [8]:
area = pd.Series({'California': 423967, 'Texas': 695662, 'Florida': 170312, 
                  'New York': 141297, 'Pennsylvania': 119280})
pop = pd.Series({'California': 39538223, 'Texas': 29145505, 'Florida': 21438187, 
                 'New York': 20201249, 'Pennsylvania': 13002700})
data = pd.DataFrame({'area': area, 'pop': pop})
print(data)

# 每一個Series 是一欄 可透過欄位名當作鍵取值
print(data['area'])

# 欄位也是一種屬性 可直接以屬性取值
print(data.area)

# 注意 DataFrame 有 pop 這個方法 不能用.pop 取值 應避免這樣取名 或用 data['pop'] 取值
print(data.pop is data["pop"])

# 可以加入新欄位
data['density'] = data['pop'] / data['area']
print(data)

                area       pop
California    423967  39538223
Texas         695662  29145505
Florida       170312  21438187
New York      141297  20201249
Pennsylvania  119280  13002700
California      423967
Texas           695662
Florida         170312
New York        141297
Pennsylvania    119280
Name: area, dtype: int64
California      423967
Texas           695662
Florida         170312
New York        141297
Pennsylvania    119280
Name: area, dtype: int64
False
                area       pop     density
California    423967  39538223   93.257784
Texas         695662  29145505   41.896072
Florida       170312  21438187  125.875963
New York      141297  20201249  142.970120
Pennsylvania  119280  13002700  109.009893


In [9]:
# 檢視所有值 (原始資料)
print(data.values)

# 陣列轉置
print(data.T)

# 取得單列 (data.values[i]) 或 單欄 data['colname'] 資料與 NumPy 有點不同
print(data.values[0])
print(data['area'])

# 若要使用 NumPy 的所有取值方式 要使用 loc / iloc
# iloc 就是 python 的切片方式 取到指定的前一個值
print(data.iloc[:3,:2])

# loc 會包含指定的值
print(data.loc[:'Florida',:'pop'])

# loc 但結合遮罩與fancy index
print(data.loc[data.density > 120, ['pop', 'density']])

# 當然也可以改值
data.iloc[0, 2] = 90
print(data)

[[4.23967000e+05 3.95382230e+07 9.32577842e+01]
 [6.95662000e+05 2.91455050e+07 4.18960717e+01]
 [1.70312000e+05 2.14381870e+07 1.25875963e+02]
 [1.41297000e+05 2.02012490e+07 1.42970120e+02]
 [1.19280000e+05 1.30027000e+07 1.09009893e+02]]
           California         Texas       Florida      New York  Pennsylvania
area     4.239670e+05  6.956620e+05  1.703120e+05  1.412970e+05  1.192800e+05
pop      3.953822e+07  2.914550e+07  2.143819e+07  2.020125e+07  1.300270e+07
density  9.325778e+01  4.189607e+01  1.258760e+02  1.429701e+02  1.090099e+02
[4.23967000e+05 3.95382230e+07 9.32577842e+01]
California      423967
Texas           695662
Florida         170312
New York        141297
Pennsylvania    119280
Name: area, dtype: int64
              area       pop
California  423967  39538223
Texas       695662  29145505
Florida     170312  21438187
              area       pop
California  423967  39538223
Texas       695662  29145505
Florida     170312  21438187
               pop     densi

In [14]:
# 不用loc / iloc 直接用列索引切片取值 只有切片才可以 取單列的值還是只能用 loc / iloc
print(data['Florida':'New York'])

# 數字也行
print(data[1:3])

# 遮罩可以以列為單位處理
print(data[data.density > 120])

            area       pop     density
Florida   170312  21438187  125.875963
New York  141297  20201249  142.970120
           area       pop     density
Texas    695662  29145505   41.896072
Florida  170312  21438187  125.875963
            area       pop     density
Florida   170312  21438187  125.875963
New York  141297  20201249  142.970120
