## Create a DatataFrame from a list of dicts

In [2]:
import pandas as pd

In [3]:
dfmm=pd.DataFrame( [ {'key':'A','data':1} , 
                     {'key':'B','data':2} ,
                     {'key':'C','data':3} ,                       
                     {'key':'A','data':4} ,
                     {'key':'B', 'data':5},
                     {'key':'C','data':6}
                   ]  
                 )


In [4]:
print(dfmm)

   data key
0     1   A
1     2   B
2     3   C
3     4   A
4     5   B
5     6   C


In [5]:
dfmm.groupby('key').sum()

Unnamed: 0_level_0,data
key,Unnamed: 1_level_1
A,5
B,7
C,9


In [6]:
dfmm.groupby('key').count()

Unnamed: 0_level_0,data
key,Unnamed: 1_level_1
A,2
B,2
C,2


## example : analyze the data 

In [1]:
import json
import requests
res=requests.get("https://od.cdc.gov.tw/eic/NHI_EnteroviralInfection.json")

In [2]:
result=json.loads(res.text)
print(type(result))

<class 'list'>


In [3]:
len(result)

112173

In [4]:
result[0]

{'健保就診總人次': '105',
 '就診類別': '住院',
 '年': '2008',
 '年齡別': '0-2',
 '縣市': '台中市',
 '腸病毒健保就診人次': '0',
 '週': '14'}

In [5]:
import pandas as pd
df=pd.DataFrame(result)

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 112173 entries, 0 to 112172
Data columns (total 7 columns):
健保就診總人次      112173 non-null object
就診類別         112173 non-null object
年            112173 non-null object
年齡別          112173 non-null object
縣市           112173 non-null object
腸病毒健保就診人次    112173 non-null int64
週            112173 non-null object
dtypes: int64(1), object(6)
memory usage: 6.0+ MB


# transform the string into integer

In [12]:
df['腸病毒健保就診人次'] = df['腸病毒健保就診人次'].astype(int)

In [13]:
df.dtypes

健保就診總人次      object
就診類別         object
年            object
年齡別          object
縣市           object
腸病毒健保就診人次     int64
週            object
dtype: object

## 　groupby('年') 分析：每年Taiwan（2008~2018) 腸病毒健保就診人次多少?

In [9]:
df.groupby('年')[['腸病毒健保就診人次']].sum()

Unnamed: 0_level_0,腸病毒健保就診人次
年,Unnamed: 1_level_1
2008,360000
2009,270178
2010,855397
2011,409542
2012,483404
2013,578879
2014,503170
2015,535773
2016,590212
2017,455533


## Let's check the above one-line code : 

In [14]:
leng=len(df['縣市'])
print(leng)

112173


In [15]:


d=dict()
for i in range(leng):
    key=df['年'][i]
    d[key]=0
    
for i in range(leng):
        key=df['年'][i]
        d[key]=d[key]+df['腸病毒健保就診人次'][i]
print(d)

{'2008': 360000, '2009': 270178, '2010': 855397, '2011': 409542, '2012': 483404, '2013': 578879, '2014': 503170, '2015': 535773, '2016': 590212, '2017': 455533, '2018': 90211}


## 樞紐分析
## 不同縣市每年腸病毒就診類別(住院,門診)的總人次是多少 ? 

In [16]:
df.pivot_table(values='腸病毒健保就診人次',index=['年','就診類別'],columns='縣市',aggfunc='sum')

Unnamed: 0_level_0,縣市,南投縣,台中市,台北市,台南市,台東縣,嘉義市,嘉義縣,基隆市,宜蘭縣,屏東縣,...,新竹市,新竹縣,桃園市,澎湖縣,花蓮縣,苗栗縣,連江縣,金門縣,雲林縣,高雄市
年,就診類別,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2008,住院,14,765,739,62,6,11,30,78,13,36,...,158,31,152,0,60,2,1,47,10,291
2008,門診,10056,55128,26905,41456,2934,8274,4016,4760,7288,8292,...,7864,10312,34864,1526,2605,10261,81,1694,14893,34811
2009,住院,153,986,820,115,59,186,237,124,297,136,...,243,29,728,0,130,18,0,17,8,620
2009,門診,6645,46014,17825,24913,2806,6779,2948,4229,7042,7758,...,5540,7781,24582,2211,3159,7701,90,1458,11097,23736
2010,住院,352,2327,2305,251,68,408,588,388,1279,228,...,545,220,1838,1,122,202,1,25,54,1557
2010,門診,23468,137646,68862,72263,4521,19335,8712,13457,14426,19609,...,23046,26741,84853,2581,6415,19104,142,3098,30818,78846
2011,住院,165,891,1110,142,63,189,339,207,783,138,...,277,96,1188,2,106,24,0,30,61,698
2011,門診,8758,55265,34228,36128,2371,10101,5179,6580,6767,9455,...,10525,11189,45576,1117,3552,8617,20,1061,19495,34705
2012,住院,128,1193,1256,212,68,149,264,206,583,199,...,327,93,1152,1,101,101,0,25,20,783
2012,門診,8768,68450,44295,39727,2339,8245,3790,8448,9785,11674,...,13669,14022,48974,1036,4122,12364,27,905,15852,39225


## Let's check the above one-line code for YiLan : 

In [17]:
d=dict()
for i in range(leng):
    key=df['年'][i]
    d[key]=0
    
for i in range(leng):
    if df['縣市'][i]=='宜蘭縣':
        key=df['年'][i]
        d[key]=d[key]+df['腸病毒健保就診人次'][i]
print(d)

{'2008': 7301, '2009': 7339, '2010': 15705, '2011': 7550, '2012': 10368, '2013': 11486, '2014': 11816, '2015': 11529, '2016': 11781, '2017': 10905, '2018': 1889}


In [20]:
dfm=df.pivot_table(values='腸病毒健保就診人次',index=['年'],columns='縣市',aggfunc='sum')
dfm['宜蘭縣']

年
2008     7301
2009     7339
2010    15705
2011     7550
2012    10368
2013    11486
2014    11816
2015    11529
2016    11781
2017    10905
2018     1889
Name: 宜蘭縣, dtype: int64