In [90]:
import pandas as pd
import numpy as np

frame = pd.DataFrame(np.random.randn(4,3),
                    columns=list('bde'),
                    index=['台北','台中','台南','高雄'])
frame

Unnamed: 0,b,d,e
台北,1.361643,2.024838,0.497987
台中,-0.428679,-0.964359,1.797869
台南,0.998392,-1.016219,-1.059831
高雄,-0.220295,-0.750427,0.110221


In [91]:
np.abs(frame) #numpy的運算也可以用在pandas

Unnamed: 0,b,d,e
台北,1.361643,2.024838,0.497987
台中,0.428679,0.964359,1.797869
台南,0.998392,1.016219,1.059831
高雄,0.220295,0.750427,0.110221


In [92]:
frame

Unnamed: 0,b,d,e
台北,1.361643,2.024838,0.497987
台中,-0.428679,-0.964359,1.797869
台南,0.998392,-1.016219,-1.059831
高雄,-0.220295,-0.750427,0.110221


In [93]:
#自訂function
def subValue(x):
    return x.max()-x.min()

frame.apply(subValue)

b    1.790322
d    3.041056
e    2.857699
dtype: float64

In [94]:
#暱名function lambda
frame.apply(lambda x: x.max() - x.min())

b    1.790322
d    3.041056
e    2.857699
dtype: float64

In [95]:
frame.apply(lambda x: x.max() - x.min(), axis=1)

台北    1.526851
台中    2.762227
台南    2.058223
高雄    0.860648
dtype: float64

In [96]:
def subValue1(x):
    return pd.Series([x.min(), x.max()], index=['min','max'])
frame.apply(subValue1)

Unnamed: 0,b,d,e
min,-0.428679,-1.016219,-1.059831
max,1.361643,2.024838,1.797869


In [97]:
frame.apply(subValue1,axis=1).T

Unnamed: 0,台北,台中,台南,高雄
min,0.497987,-0.964359,-1.059831,-0.750427
max,2.024838,1.797869,0.998392,0.110221


In [98]:
def subValue2(x):
    return float("{:.2f}".format(x))
frame.applymap(subValue2)


Unnamed: 0,b,d,e
台北,1.36,2.02,0.5
台中,-0.43,-0.96,1.8
台南,1.0,-1.02,-1.06
高雄,-0.22,-0.75,0.11


In [99]:
frame.applymap(lambda x:float("{:.2f}".format(x)))

Unnamed: 0,b,d,e
台北,1.36,2.02,0.5
台中,-0.43,-0.96,1.8
台南,1.0,-1.02,-1.06
高雄,-0.22,-0.75,0.11


In [100]:
frame

Unnamed: 0,b,d,e
台北,1.361643,2.024838,0.497987
台中,-0.428679,-0.964359,1.797869
台南,0.998392,-1.016219,-1.059831
高雄,-0.220295,-0.750427,0.110221


In [101]:
newFrame = frame['b'].map(lambda x: float("{:.2f}".format(x)))
newFrame.sum()

1.7100000000000002

In [102]:
df = pd.DataFrame([[1.4, np.nan],
                  [7.1, -4.5],
                  [np.nan, np.nan],
                  [0.75, -1.3]],
                 index=['a', 'b', 'c', 'd'],
                 columns=['one', 'two'])
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [103]:
df.sum()

one    9.25
two   -5.80
dtype: float64

In [104]:
df.sum(axis=1)

a    1.40
b    2.60
c    0.00
d   -0.55
dtype: float64

In [105]:
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [106]:
df.describe()

Unnamed: 0,one,two
count,3.0,2.0
mean,3.083333,-2.9
std,3.493685,2.262742
min,0.75,-4.5
25%,1.075,-3.7
50%,1.4,-2.9
75%,4.25,-2.1
max,7.1,-1.3


In [107]:
s1 = pd.Series([0, 5, 9, 14])
s1.mean()

7.0

In [108]:
s2 = pd.Series([5, 6, 8, 9])
s2.mean()

7.0

In [109]:
s1.std()

5.94418483337567

In [110]:
s2.std()

1.8257418583505538

In [111]:
#利用各鄉鎮市區人口密度計算出總人口數和土地總面積
import pandas as pd
import numpy as np
dataFrame = pd.read_csv('各鄉鎮市區人口密度.csv')
dataFrame

Unnamed: 0,statistic_yyy,site_id,people_total,area,population_density
0,統計年,區域別,年底人口數,土地面積,人口密度
1,106,新北市板橋區,551480,23.1373,23835
2,106,新北市三重區,387484,16.317,23747
3,106,新北市中和區,413590,20.144,20532
4,106,新北市永和區,222585,5.7138,38956
...,...,...,...,...,...
371,,,,,
372,,說明：1.人口密度係指每單位土地面積內之人口數。,,,
373,,2.96年12月起，我國土地面積增列東沙群島(2.38平方公里)及南沙群島(0.4896平方公里),,,
374,,，由高雄市代管；原金門縣烏坵鄉面積，因重測由2.6平方公里修正為1.2平方公里。,,,


In [112]:
dataFrame.dtypes

statistic_yyy         object
site_id               object
people_total          object
area                  object
population_density    object
dtype: object

In [113]:
dataFrame.columns = dataFrame.loc[0]

In [114]:
dataFrame = dataFrame.drop(0)

In [115]:
dataFrame[-20:]

Unnamed: 0,統計年,區域別,年底人口數,土地面積,人口密度
356,106.0,新竹市香山區,77943,54.8491,1421
357,106.0,嘉義市東區,122103,30.1556,4049
358,106.0,嘉義市西區,147295,29.87,4931
359,106.0,金門縣金城鎮,43285,21.713,1994
360,106.0,金門縣金湖鎮,29413,41.696,705
361,106.0,金門縣金沙鎮,20535,41.19,499
362,106.0,金門縣金寧鄉,30838,29.854,1033
363,106.0,金門縣烈嶼鄉,12700,16.003,794
364,106.0,金門縣烏坵鄉,685,1.2,571
365,106.0,連江縣南竿鄉,7544,10.4,725


In [116]:
dataFrame=dataFrame.loc[1:370,['區域別','年底人口數','土地面積']]

In [117]:
dataFrame

Unnamed: 0,區域別,年底人口數,土地面積
1,新北市板橋區,551480,23.1373
2,新北市三重區,387484,16.317
3,新北市中和區,413590,20.144
4,新北市永和區,222585,5.7138
5,新北市新莊區,416524,19.7383
...,...,...,...
366,連江縣北竿鄉,2360,9.9
367,連江縣莒光鄉,1624,4.7
368,連江縣東引鄉,1352,3.8
369,東沙群島,…,2.38


In [118]:
dataFrame.dtypes

0
區域別      object
年底人口數    object
土地面積     object
dtype: object

In [122]:
def changeValue(x):
    try:
        value = int(x)
        return value    
    except:
        return 0
    
dataFrame['年底人口數'] = dataFrame['年底人口數'].map(changeValue)

In [124]:
dataFrame.dtypes

0
區域別      object
年底人口數     int64
土地面積     object
dtype: object

In [125]:
dataFrame

Unnamed: 0,區域別,年底人口數,土地面積
1,新北市板橋區,551480,23.1373
2,新北市三重區,387484,16.317
3,新北市中和區,413590,20.144
4,新北市永和區,222585,5.7138
5,新北市新莊區,416524,19.7383
...,...,...,...
366,連江縣北竿鄉,2360,9.9
367,連江縣莒光鄉,1624,4.7
368,連江縣東引鄉,1352,3.8
369,東沙群島,0,2.38


In [126]:
dataFrame['土地面積'] = dataFrame['土地面積'].map(lambda x: float(x))

In [128]:
dataFrame.dtypes

0
區域別       object
年底人口數      int64
土地面積     float64
dtype: object

In [131]:
dataFrame['年底人口數'].sum()

23571227

In [132]:
dataFrame['土地面積'].sum()

36197.0669

In [142]:
dataFrame.sort_values(by='年底人口數',ascending=False)[:20]

Unnamed: 0,區域別,年底人口數,土地面積
1,新北市板橋區,551480,23.1373
42,桃園市桃園區,440840,34.8046
5,新北市新莊區,416524,19.7383
3,新北市中和區,413590,20.144
43,桃園市中壢區,405216,76.52
2,新北市三重區,387484,16.317
132,高雄市鳳山區,359120,26.759
125,高雄市三民區,343243,19.7866
32,臺北市大安區,309969,11.3614
6,新北市新店區,302089,120.2255


In [143]:
dataFrame.sort_values(by='土地面積',ascending=False)[:20]

Unnamed: 0,區域別,年底人口數,土地面積
338,花蓮縣秀林鄉,15889,1641.8555
239,南投縣信義鄉,16253,1422.4188
240,南投縣仁愛鄉,15748,1273.5312
83,臺中市和平區,10949,1037.8192
340,花蓮縣卓溪鄉,6066,1021.313
157,高雄市桃源區,4246,928.98
324,臺東縣海端鄉,4222,880.0382
170,宜蘭縣南澳鄉,5900,740.652
169,宜蘭縣大同鄉,6102,657.5442
339,花蓮縣萬榮鄉,6384,618.491


In [144]:
data = {'年廣告費投入':[12.5, 15.3, 23.2, 26.4, 33.5, 34.4, 39.4, 45.2, 55.4, 60.9],
            '月均銷售額':[21.2, 23.9, 32.9, 34.1, 42.5, 43.2, 49.0, 52.8, 59.4, 63.5]}
dataFrame = pd.DataFrame(data, index=[2020,2019,2018,2017,2016,2015,2014,2013,2012,2011])
dataFrame

Unnamed: 0,年廣告費投入,月均銷售額
2020,12.5,21.2
2019,15.3,23.9
2018,23.2,32.9
2017,26.4,34.1
2016,33.5,42.5
2015,34.4,43.2
2014,39.4,49.0
2013,45.2,52.8
2012,55.4,59.4
2011,60.9,63.5
