In [2]:
import pandas as pd

data = pd.Series([0.2, 0.4, 0.6, 0.8, 1.0],
                               index = ['a', 'b', 'c', 'd', 'e'])


data

a    0.2
b    0.4
c    0.6
d    0.8
e    1.0
dtype: float64

In [3]:
data['a']

0.2

In [5]:
'c' in data

True

In [6]:
data.keys()

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [8]:
list(data.items())

[('a', 0.2), ('b', 0.4), ('c', 0.6), ('d', 0.8), ('e', 1.0)]

In [10]:
data['f'] = 1.2
data['g'] = 1.4

data

a    0.2
b    0.4
c    0.6
d    0.8
e    1.0
f    1.2
g    1.4
dtype: float64

In [12]:
## 顯式索引 - 指定索引值進行切片
data['a':'d']

a    0.2
b    0.4
c    0.6
d    0.8
dtype: float64

In [14]:
## 隱式索引
data[0:3]

a    0.2
b    0.4
c    0.6
dtype: float64

In [23]:
## 遮罩
data[(data > 0.2) & (data < 1.4)]

b    0.4
c    0.6
d    0.8
e    1.0
f    1.2
dtype: float64

In [25]:
## 高級索引
data[['a', 'g']]

a    0.2
g    1.4
dtype: float64

In [27]:
## 拿整數值當索引
data = pd.Series(['a', 'b', 'c', 'd', 'e'], index = [1, 3, 5, 7, 9])
data

1    a
3    b
5    c
7    d
9    e
dtype: object

In [29]:
## 顯式索引
data[1]

'a'

In [31]:
## 隱是索引
data[1:5]

3    b
5    c
7    d
9    e
dtype: object

In [34]:
data.loc[1]

'a'

In [35]:
data.loc[1:5]

1    a
3    b
5    c
dtype: object

In [37]:
data.iloc[1]

'b'

In [39]:
data.iloc[1:5]

3    b
5    c
7    d
9    e
dtype: object

In [41]:
city_population = pd.Series({'Hsinchu': 543286, 'Taipei': 612458, 'Taichung': 530456, 'Kaohsiung': 632937, 'Hualien': 264385})

city_house = pd.Series({'Hsinchu': 600000, 'Taipei': 8012450, 'Taichung': 1220468, 'Kaohsiung': 6242358, 'Hualien': 2584328})

## 組合成DataFrame
data = pd.DataFrame({'pop': city_population, 'house': city_house})

data

Unnamed: 0,pop,house
Hsinchu,543286,600000
Taipei,612458,8012450
Taichung,530456,1220468
Kaohsiung,632937,6242358
Hualien,264385,2584328


In [43]:
## 關鍵字索引方式
data['house']

Hsinchu       600000
Taipei       8012450
Taichung     1220468
Kaohsiung    6242358
Hualien      2584328
Name: house, dtype: int64

In [44]:
## 屬性方式
data.house

Hsinchu       600000
Taipei       8012450
Taichung     1220468
Kaohsiung    6242358
Hualien      2584328
Name: house, dtype: int64

In [46]:
data.house is data['house']

True

In [48]:
data.pop is data['pop']

False

In [50]:
## 增加一個新的列, 記錄每個城市平均一間房子要給幾個人住
data['average'] = data['pop'] / data['house']

data

Unnamed: 0,pop,house,average
Hsinchu,543286,600000,0.905477
Taipei,612458,8012450,0.076438
Taichung,530456,1220468,0.434633
Kaohsiung,632937,6242358,0.101394
Hualien,264385,2584328,0.102303


In [51]:
## 轉換成底層數組
data.values

array([[5.43286000e+05, 6.00000000e+05, 9.05476667e-01],
       [6.12458000e+05, 8.01245000e+06, 7.64382929e-02],
       [5.30456000e+05, 1.22046800e+06, 4.34633272e-01],
       [6.32937000e+05, 6.24235800e+06, 1.01393896e-01],
       [2.64385000e+05, 2.58432800e+06, 1.02303191e-01]])

In [53]:
## 倒置 - 將行和列交換
data.T

Unnamed: 0,Hsinchu,Taipei,Taichung,Kaohsiung,Hualien
pop,543286.0,612458.0,530456.0,632937.0,264385.0
house,600000.0,8012450.0,1220468.0,6242358.0,2584328.0
average,0.905477,0.07643829,0.4346333,0.1013939,0.1023032


In [55]:
## 取得第一行數據
data.values[0]

array([5.43286000e+05, 6.00000000e+05, 9.05476667e-01])

In [57]:
## 獲取一個列
data['pop']

Hsinchu      543286
Taipei       612458
Taichung     530456
Kaohsiung    632937
Hualien      264385
Name: pop, dtype: int64

In [59]:
data.iloc[:4, :2]

Unnamed: 0,pop,house
Hsinchu,543286,600000
Taipei,612458,8012450
Taichung,530456,1220468
Kaohsiung,632937,6242358


In [61]:
data.loc[:'Kaohsiung', :'house']

Unnamed: 0,pop,house
Hsinchu,543286,600000
Taipei,612458,8012450
Taichung,530456,1220468
Kaohsiung,632937,6242358


In [62]:
## 找到average大於0.2的城市, 並顯示其'pop'和'house'列
data.loc[data.average > 0.2, ['pop', 'house']]


Unnamed: 0,pop,house
Hsinchu,543286,600000
Taichung,530456,1220468


In [64]:
data.iloc[1, 0] = 100

data

Unnamed: 0,pop,house,average
Hsinchu,543286,600000,0.905477
Taipei,100,8012450,0.076438
Taichung,530456,1220468,0.434633
Kaohsiung,632937,6242358,0.101394
Hualien,264385,2584328,0.102303


In [66]:
## 透過行的索引值
data['Hsinchu':'Taichung']

Unnamed: 0,pop,house,average
Hsinchu,543286,600000,0.905477
Taipei,100,8012450,0.076438
Taichung,530456,1220468,0.434633


In [68]:
data[2:4]

Unnamed: 0,pop,house,average
Taichung,530456,1220468,0.434633
Kaohsiung,632937,6242358,0.101394


In [70]:
data[data.average > 0.2]

Unnamed: 0,pop,house,average
Hsinchu,543286,600000,0.905477
Taichung,530456,1220468,0.434633


In [72]:
## 導入所需的套件
import pandas as pd
import numpy as np

In [74]:
## 構建一個Series, 裡面的元素從1到50中隨機取六個數
rng = np.random.RandomState(6)
series_obj = pd.Series(rng.randint(0, 50, 6))

series_obj

0    10
1     9
2    35
3    20
4    42
5    45
dtype: int32

In [76]:
## 構建一個3X5的DataFrame, 並隨機從0到50取數填入
df = pd.DataFrame(rng.randint(0, 50, (3, 5)), columns = ['A', 'B', 'C', 'D', 'E']

)

df

Unnamed: 0,A,B,C,D,E
0,2,12,20,27,31
1,2,5,26,15,25
2,22,43,5,37,49


In [78]:
## 計算指數函數
np.exp(series_obj)

0    2.202647e+04
1    8.103084e+03
2    1.586013e+15
3    4.851652e+08
4    1.739275e+18
5    3.493427e+19
dtype: float64

In [80]:
## 對df進行複雜的運算
np.sin(df * np.pi / 6)

Unnamed: 0,A,B,C,D,E
0,0.866025,-2.449294e-16,-0.866025,1.0,-0.5
1,0.866025,0.5,0.866025,1.0,0.5
2,-0.866025,-0.5,0.5,0.5,0.5


In [82]:
## 構建一個Series(人口數據集)
ciity_population = pd.Series({'Hsinchu': 534266, 'Taipei': 632450, 'Taichung': 560266}, name = 'pop')

## 構建一個Series(房屋數據集)
ciity_house = pd.Series({'Taipei': 8212456,  'Taichung': 1420566, 'Kaohsiung': 6842738 }, name = 'house')

In [84]:
city_population / city_house

Hsinchu      0.905477
Taipei       0.076438
Taichung     0.434633
Kaohsiung    0.101394
Hualien      0.102303
dtype: float64

In [86]:
city_population.index | city_house.index

Index(['Hsinchu', 'Taipei', 'Taichung', 'Kaohsiung', 'Hualien'], dtype='object')

In [88]:
x = pd.Series([1, 3, 5, 7], index = [0, 1, 2, 3])
y = pd.Series([2, 4, 6, 8], index = [0, 2, 3, 4])
x + y

0     3.0
1     NaN
2     9.0
3    13.0
4     NaN
dtype: float64

In [90]:
x.add(y, fill_value = 0)

0     3.0
1     3.0
2     9.0
3    13.0
4     8.0
dtype: float64

In [92]:
## 構建一個 3 X 3 的df, 隨機從0到50取數
x = pd.DataFrame(rng.randint(0, 50, (3, 3)),
                                  columns = list('ABC'))
                                  
x

Unnamed: 0,A,B,C
0,5,26,14
1,40,5,6
2,42,26,10


In [94]:
## 構建一個 4 X 4 的df, 隨機從0到50取數
y = pd.DataFrame(rng.randint(0, 50, (4, 4)),
                                  columns = list('BACD'))
                                  
y

Unnamed: 0,B,A,C,D
0,33,12,4,33
1,15,5,47,21
2,6,1,9,48
3,37,42,40,13


In [96]:
## 拿3 X 3的df 去加油4 X 4的df
x + y

Unnamed: 0,A,B,C,D
0,17.0,59.0,18.0,
1,45.0,20.0,53.0,
2,43.0,32.0,19.0,
3,,,,


In [98]:
fill = x.stack().mean()
x.mul(y, fill_value = fill)

Unnamed: 0,A,B,C,D
0,60.0,858.0,56.0,638.0
1,200.0,75.0,282.0,406.0
2,42.0,156.0,90.0,928.0
3,812.0,715.333333,773.333333,251.333333


In [99]:
## 構建一個3 X 5的二維數組
x = rng.randint(0, 10, size = (3, 5))

x

array([[9, 1, 2, 2, 9],
       [5, 4, 1, 6, 3],
       [1, 4, 9, 8, 4]])

In [101]:
## 將x中的所有行減掉第二行的數據
x - x[1]

array([[ 4, -3,  1, -4,  6],
       [ 0,  0,  0,  0,  0],
       [-4,  0,  8,  2,  1]])

In [103]:
df = pd.DataFrame(x, columns = list('ABCDE'))
print(df)

## df中的所有行減掉其第二行
df - df.iloc[1]

   A  B  C  D  E
0  9  1  2  2  9
1  5  4  1  6  3
2  1  4  9  8  4


Unnamed: 0,A,B,C,D,E
0,4,-3,1,-4,6
1,0,0,0,0,0
2,-4,0,8,2,1


In [105]:
## 依照列來進行減法
df.subtract(df['C'], axis = 0)

Unnamed: 0,A,B,C,D,E
0,7,-1,0,0,7
1,4,3,0,5,2
2,-8,-5,0,-1,-5


In [107]:
## 取第一行的B、D列
half_row = df.iloc[0, 1::2]

half_row

B    1
D    2
Name: 0, dtype: int32

In [109]:
df - half_row

Unnamed: 0,A,B,C,D,E
0,,0.0,,0.0,
1,,3.0,,4.0,
2,,3.0,,6.0,
