In [54]:
import pandas as pd
import numpy as np

frame = pd.DataFrame(np.random.randn(4,3),
                    columns=list('bde'),
                    index=['台北','台中','台南','高雄'])
frame

Unnamed: 0,b,d,e
台北,-1.131775,0.883916,-0.735546
台中,1.154421,-0.028497,-1.352493
台南,1.389021,0.869516,-0.737622
高雄,-0.581889,0.873594,2.148636


In [55]:
np.abs(frame) #numpy的運算也可以用在pandas

Unnamed: 0,b,d,e
台北,1.131775,0.883916,0.735546
台中,1.154421,0.028497,1.352493
台南,1.389021,0.869516,0.737622
高雄,0.581889,0.873594,2.148636


In [56]:
frame

Unnamed: 0,b,d,e
台北,-1.131775,0.883916,-0.735546
台中,1.154421,-0.028497,-1.352493
台南,1.389021,0.869516,-0.737622
高雄,-0.581889,0.873594,2.148636


In [57]:
#自訂function
def subValue(x):
    return x.max()-x.min()

frame.apply(subValue)

b    2.520796
d    0.912413
e    3.501129
dtype: float64

In [58]:
#暱名function lambda
frame.apply(lambda x: x.max() - x.min())

b    2.520796
d    0.912413
e    3.501129
dtype: float64

In [59]:
frame.apply(lambda x: x.max() - x.min(), axis=1)

台北    2.015691
台中    2.506914
台南    2.126643
高雄    2.730524
dtype: float64

In [60]:
def subValue1(x):
    return pd.Series([x.min(), x.max()], index=['min','max'])
frame.apply(subValue1)

Unnamed: 0,b,d,e
min,-1.131775,-0.028497,-1.352493
max,1.389021,0.883916,2.148636


In [61]:
frame.apply(subValue1,axis=1).T

Unnamed: 0,台北,台中,台南,高雄
min,-1.131775,-1.352493,-0.737622,-0.581889
max,0.883916,1.154421,1.389021,2.148636


In [62]:
def subValue2(x):
    return float("{:.2f}".format(x))
frame.applymap(subValue2)


Unnamed: 0,b,d,e
台北,-1.13,0.88,-0.74
台中,1.15,-0.03,-1.35
台南,1.39,0.87,-0.74
高雄,-0.58,0.87,2.15


In [63]:
frame.applymap(lambda x:float("{:.2f}".format(x)))

Unnamed: 0,b,d,e
台北,-1.13,0.88,-0.74
台中,1.15,-0.03,-1.35
台南,1.39,0.87,-0.74
高雄,-0.58,0.87,2.15


In [64]:
frame

Unnamed: 0,b,d,e
台北,-1.131775,0.883916,-0.735546
台中,1.154421,-0.028497,-1.352493
台南,1.389021,0.869516,-0.737622
高雄,-0.581889,0.873594,2.148636


In [65]:
newFrame = frame['b'].map(lambda x: float("{:.2f}".format(x)))
newFrame.sum()

0.83

In [66]:
df = pd.DataFrame([[1.4, np.nan],
                  [7.1, -4.5],
                  [np.nan, np.nan],
                  [0.75, -1.3]],
                 index=['a', 'b', 'c', 'd'],
                 columns=['one', 'two'])
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [67]:
df.sum()

one    9.25
two   -5.80
dtype: float64

In [68]:
df.sum(axis=1)

a    1.40
b    2.60
c    0.00
d   -0.55
dtype: float64

In [69]:
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [70]:
df.describe()

Unnamed: 0,one,two
count,3.0,2.0
mean,3.083333,-2.9
std,3.493685,2.262742
min,0.75,-4.5
25%,1.075,-3.7
50%,1.4,-2.9
75%,4.25,-2.1
max,7.1,-1.3


In [71]:
s1 = pd.Series([0, 5, 9, 14])
s1.mean()

7.0

In [72]:
s2 = pd.Series([5, 6, 8, 9])
s2.mean()

7.0

In [73]:
s1.std()

5.94418483337567

In [74]:
s2.std()

1.8257418583505538

In [75]:
#利用各鄉鎮市區人口密度計算出總人口數和土地總面積
import pandas as pd
import numpy as np
dataFrame = pd.read_csv('各鄉鎮市區人口密度.csv')
dataFrame

Unnamed: 0,statistic_yyy,site_id,people_total,area,population_density
0,統計年,區域別,年底人口數,土地面積,人口密度
1,106,新北市板橋區,551480,23.1373,23835
2,106,新北市三重區,387484,16.317,23747
3,106,新北市中和區,413590,20.144,20532
4,106,新北市永和區,222585,5.7138,38956
...,...,...,...,...,...
371,,,,,
372,,說明：1.人口密度係指每單位土地面積內之人口數。,,,
373,,2.96年12月起，我國土地面積增列東沙群島(2.38平方公里)及南沙群島(0.4896平方公里),,,
374,,，由高雄市代管；原金門縣烏坵鄉面積，因重測由2.6平方公里修正為1.2平方公里。,,,


In [76]:
dataFrame.dtypes

statistic_yyy         object
site_id               object
people_total          object
area                  object
population_density    object
dtype: object

In [77]:
dataFrame.columns = dataFrame.loc[0]

In [78]:
dataFrame = dataFrame.drop(0)

In [81]:
dataFrame

Unnamed: 0,統計年,區域別,年底人口數,土地面積,人口密度
1,106,新北市板橋區,551480,23.1373,23835
2,106,新北市三重區,387484,16.317,23747
3,106,新北市中和區,413590,20.144,20532
4,106,新北市永和區,222585,5.7138,38956
5,106,新北市新莊區,416524,19.7383,21102
...,...,...,...,...,...
371,,,,,
372,,說明：1.人口密度係指每單位土地面積內之人口數。,,,
373,,2.96年12月起，我國土地面積增列東沙群島(2.38平方公里)及南沙群島(0.4896平方公里),,,
374,,，由高雄市代管；原金門縣烏坵鄉面積，因重測由2.6平方公里修正為1.2平方公里。,,,
