# 보건복지부 코로나 19 감염현황 

```
(1) https://data.go.kr 회원가입
(2) 보건복지부_코로나19 감염_현황
https://data.go.kr/data/15043376/openapi.do
[활용신청] 후 [마이페이지] 에서 신청한 OpenAPI 서비스가 승인상태인지 확인
(3) 서비스키 정보 확인 decode
: VM/dd7n1AmsYq+idWxsgXQgUKW2nVYVg+wISPkG3ArmJPPtKZBizTBlzylZ3v8wLrVPIldiOZG5F4/x5qxTpuQ==
```


# XML 이란?

- eXtensible Markup Language
- 다목적의 성격을 가진 마크업 언어 
- https://www.w3schools.com/Xml/

```
xml 문법
<태그>내용</태그>
<태그 />

가장 최상위 태그는 1개 => 루트 태그/ 부모요소
```

In [1]:
# 관련 모듈 임포트 

#import xml.etree.ElementTree
from xml.etree.ElementTree import *
import pandas as pd


### xml.etree.ElementTree
https://docs.python.org/ko/3/library/xml.etree.elementtree.html


In [6]:
# ls

In [7]:
# xml 파일 불러오기 => xml 객체
tree = parse('country_data_xml')
print(type(tree))
tree

<class 'xml.etree.ElementTree.ElementTree'>


<xml.etree.ElementTree.ElementTree at 0x19c53976a30>

In [9]:
# root 찾기 => xml객체.getroot()
root = tree.getroot()
print(root)
print(root.tag)

<Element 'data' at 0x0000019C53973B80>
data


In [11]:
# 루트에 속한 직게 데이터 정보 출력
for child in root:
    print(child.tag,child.attrib)

country {'name': 'Liechtenstein'}
country {'name': 'Singapore'}
country {'name': 'Panama'}


In [20]:
# findall(태그명) => 리스트
# find(태그명) => 1개 요소
print(root.findall('country'))
print(root.find('country'))
print(root.find('country').find('gdppc'))
# 텍스트만 출럭
print(root.find('country').find('gdppc').text)
# 첫번째 위치한 country 태그의 name 속성값
print(root.find('country').get('name'))

[<Element 'country' at 0x0000019C539739A0>, <Element 'country' at 0x0000019C5399D220>, <Element 'country' at 0x0000019C5399D3B0>]
<Element 'country' at 0x0000019C539739A0>
<Element 'gdppc' at 0x0000019C5399D130>
141100
Liechtenstein


In [24]:
# xml 리스트
country_list = []
for country in root.findall('country'):
    rank = country.find('rank').text
    year = country.find('year').text
    gdppc = country.find('gdppc').text
    country_list.append([rank,year,gdppc])
country_list

[['1', '2008', '141100'], ['4', '2011', '59900'], ['68', '2011', '13600']]

In [26]:
df_country = pd.DataFrame(country_list, columns=['rank','year','gdppc'])
df_country

Unnamed: 0,rank,year,gdppc
0,1,2008,141100
1,4,2011,59900
2,68,2011,13600


# 보건복지부 코로나 19
- xml 활용

In [36]:
from xml.etree.ElementTree import *
import pandas as pd
import datetime 
import requests

In [37]:


# 시계열 데이타  => 문자열 데이타 
# datetime.date(년,월일).strftime('%Y%m%d')

start_day = datetime.date(2021, 1, 4).strftime('%Y%m%d')
today = datetime.datetime.now()
end_day = today.strftime('%Y%m%d') 

print(today)
print(start_day, end_day)
print(type(start_day), type(end_day))


2021-09-14 16:54:05.442505
20210104 20210914
<class 'str'> <class 'str'>


In [52]:
# 2) 페이지 요청 

# url 생성
url = 'http://openapi.data.go.kr/openapi/service/rest/Covid19/getCovid19InfStateJson'

#'ServiceKey': '일반 인증키(Decoding)',
payload = { 'ServiceKey': 'Yhq6n9vlpaKZsCOw0jec8zIdZ8p+Bpuku6WLjVgDhqXRW6dHnfXoauSEj19jpnjv59CUGspyTxVCgCeXOua7dg==',
            'startCreateDt': start_day,
            'endCreateDt': end_day }

res = requests.get(url, params=payload)

print(res) 
print(type(res.text)) 
print(res.text)


<Response [200]>
<class 'str'>
<?xml version="1.0" encoding="UTF-8" standalone="yes"?><response><header><resultCode>00</resultCode><resultMsg>NORMAL SERVICE.</resultMsg></header><body><items><item><accDefRate>2.1428523165</accDefRate><accExamCnt>13790229</accExamCnt><accExamCompCnt>12875829</accExamCompCnt><careCnt>25896</careCnt><clearCnt>247647</clearCnt><createDt>2021-09-14 09:45:02.459</createDt><deathCnt>2367</deathCnt><decideCnt>275910</decideCnt><examCnt>914400</examCnt><resutlNegCnt>12599919</resutlNegCnt><seq>635</seq><stateDt>20210914</stateDt><stateTime>00:00</stateTime><updateDt>2021-09-14 15:21:23.528</updateDt></item><item><accDefRate>2.1421776514</accDefRate><accExamCnt>13735741</accExamCnt><accExamCompCnt>12810002</accExamCompCnt><careCnt>26548</careCnt><clearCnt>245505</clearCnt><createDt>2021-09-13 09:40:59.629</createDt><deathCnt>2360</deathCnt><decideCnt>274413</decideCnt><examCnt>925739</examCnt><resutlNegCnt>12535589</resutlNegCnt><seq>634</seq><stateDt>20210913</

In [53]:
# 3) text => xml 객체화 
tree = ElementTree(fromstring(res.text))
tree, type(tree)

# 루트 요소 
root = tree.getroot()
print(root)


<Element 'response' at 0x0000019C5506ADB0>


In [54]:
# 4) 필터링 
# 필터링 시작 요소 설정 
# body > items > item > accDefRate| DECIDE_CNT ....
res = root.find('body')
print(res.find('items').find('item').find('accDefRate'))
print(res.find('items').find('item').find('accDefRate').text)


<Element 'accDefRate' at 0x0000019C54FDD7C0>
2.1428523165


In [55]:
item_list = res.find('items').findall('item')
item_list[0]
len(item_list)

254

In [56]:
# 2차원 리스트로 저장 
field_list = ['기준일', '확진자수', '사망자수', '검사진행 수']
result_list = []
for item in item_list:
    result_list.append([item.find('stateDt').text, \
                        item.find('decideCnt').text, \
                        item.find('deathCnt').text, \
                        item.find('examCnt').text  ])
len(result_list)


254