# Python 下載XML檔案與解析


* 了解 xml 檔案格式與內容
* 能夠利用套件存取 xml 格式的檔案


## 作業目標

* 比較一下範例檔案中的「File I/O」與「xmltodict」讀出來的內容有什麼差異

* 根據範例檔案的結果：
    1. 請問高雄市有多少地區有溫度資料？
    2. 請取出每一個地區所記錄的第一個時間點跟溫度
    3. 請取出第一個地區所記錄的每一個時間點跟溫度

In [17]:
# 下載檔案
import urllib.request
import zipfile

res = "http://opendata.cwb.gov.tw/govdownload?dataid=F-D0047-093&authorizationkey=rdec-key-123-45678-011121314"
urllib.request.urlretrieve(res, "./data/d3.zip")
f = zipfile.ZipFile('./data/d3.zip')
f.extractall('./data/d3')

import os, sys

# 打开文件
dirs = os.listdir( './data/d3' )

# 输出所有文件和文件夹
for file in dirs:
    print(file)

66_Week24_EN.xml
68_Week24_CH.xml
67_Weekday_EN.xml
65_Weekday_CH.xml
09020_72hr_CH.xml
10002_72hr_EN.xml
10017_72hr_EN.xml
10014_Week24_EN.xml
67_72hr_EN.xml
10008_72hr_CH.xml
TAIWAN_72hr_EN.xml
10009_Week24_CH.xml
10007_Week24_EN.xml
10010_Week24_EN.xml
09007_72hr_CH.xml
10014_72hr_CH.xml
68_Weekday_EN.xml
64_72hr_CH.xml
10017_Week24_EN.xml
10020_Week24_CH.xml
10013_Week24_EN.xml
10004_Week24_EN.xml
10020_Weekday_EN.xml
63_Weekday_CH.xml
10020_72hr_EN.xml
65_Week24_EN.xml
09020_Weekday_EN.xml
10004_72hr_CH.xml
10018_72hr_CH.xml
66_Weekday_EN.xml
10007_72hr_EN.xml
68_72hr_CH.xml
TAIWAN_Week24_EN.xml
64_Weekday_CH.xml
65_Week24_CH.xml
09020_Weekday_CH.xml
10004_72hr_EN.xml
66_Weekday_CH.xml
10018_72hr_EN.xml
10007_72hr_CH.xml
68_72hr_EN.xml
TAIWAN_Week24_CH.xml
64_Weekday_EN.xml
10017_Week24_CH.xml
10020_Week24_EN.xml
10013_Week24_CH.xml
10020_Weekday_CH.xml
10004_Week24_CH.xml
10020_72hr_CH.xml
63_Weekday_EN.xml
10014_Week24_CH.xml
10017_72hr_CH.xml
67_72hr_CH.xml
10008_72hr_EN.xml
TA

### 比較一下範例檔案中的「File I/O」與「xmltodict」讀出來的內容有什麼差異


In [1]:
#FileI/O
with open('./data/d3/sample.xml') as a:
    b = a.read()
    print(b)

<?xml version="1.0" encoding="UTF-8"?>
<CUPOY>
    <Title>爬蟲馬拉松</Title>
    <Author>Wei</Author>
    <Chapters>
        <Chapter name="01">資料來源與存取</Chapter>
        <Chapter name="02">靜態網頁爬蟲</Chapter>
        <Chapter name="03">動態網頁爬蟲</Chapter>
    </Chapters>
</CUPOY>


In [15]:
#xml.dom

import xml.dom.minidom
doc = xml.dom.minidom.parse("./data/d3/sample.xml") # 存取檔案
print(doc.getElementsByTagName("Title")[0].firstChild.nodeValue) # 存取資訊

chapters = doc.getElementsByTagName("Chapter") # 用迴圈存取我們的資訊
for chapter in chapters:
    print (chapter.getAttribute('name'), chapter.firstChild.nodeValue)

<DOM Element: Title at 0x7fe63b677280>
爬蟲馬拉松
01 資料來源與存取
02 靜態網頁爬蟲
03 動態網頁爬蟲


In [27]:
#xmltodict

import xmltodict

with open("./data/d3/sample.xml") as fd:
    doc = dict(xmltodict.parse(fd.read()))
    print(doc)


chapters = doc['CUPOY']['Chapters']['Chapter']
for chapter in chapters:
    print(chapter)
    
for chapter in chapters:
    print(chapter['@name'],chapter['#text'])


{'CUPOY': OrderedDict([('Title', '爬蟲馬拉松'), ('Author', 'Wei'), ('Chapters', OrderedDict([('Chapter', [OrderedDict([('@name', '01'), ('#text', '資料來源與存取')]), OrderedDict([('@name', '02'), ('#text', '靜態網頁爬蟲')]), OrderedDict([('@name', '03'), ('#text', '動態網頁爬蟲')])])]))])}
OrderedDict([('@name', '01'), ('#text', '資料來源與存取')])
OrderedDict([('@name', '02'), ('#text', '靜態網頁爬蟲')])
OrderedDict([('@name', '03'), ('#text', '動態網頁爬蟲')])
01 資料來源與存取
02 靜態網頁爬蟲
03 動態網頁爬蟲


### 根據範例檔案的結果：

1. 請問高雄市有多少地區有溫度資料？
2. 請取出每一個地區所記錄的第一個時間點跟溫度
3. 請取出第一個地區所記錄的每一個時間點跟溫度

In [53]:
# 1. 請問高雄市有多少地區有溫度資料？
with open('./Data/d3/64_72hr_CH.xml','r',encoding = 'utf-8') as f:
    dictcontent = dict(xmltodict.parse(f.read()))
#     print(dictcontent)
    locations = dictcontent['cwbopendata']['dataset']['locations']['location']

district = []
for location in locations:
    d = location['locationName']
    district.append(d)
print(district)


{'cwbopendata': OrderedDict([('@xmlns', 'urn:cwb:gov:tw:cwbcommon:0.1'), ('identifier', '6aaafb9b-4f0b-4cbe-b0e6-ba443ff0d863'), ('sender', 'weather@cwb.gov.tw'), ('sent', '2021-02-09T22:36:01+08:00'), ('status', 'Actual'), ('scope', 'Public'), ('msgType', 'Issue'), ('dataid', 'D0047-065'), ('source', 'MFC'), ('dataset', OrderedDict([('datasetInfo', OrderedDict([('datasetDescription', '臺灣各縣市鄉鎮未來3天(72小時)逐3小時天氣預報'), ('datasetLanguage', 'zh-TW'), ('issueTime', '2021-02-09T23:00:00+08:00'), ('validTime', OrderedDict([('startTime', '2021-02-10T00:00:00+08:00'), ('endTime', '2021-02-12T23:00:00+08:00')])), ('update', '2021-02-09T22:36:01+08:00')])), ('locations', OrderedDict([('locationsName', '高雄市'), ('location', [OrderedDict([('locationName', '鹽埕區'), ('geocode', '6400100'), ('lat', '22.626497'), ('lon', '120.278707'), ('weatherElement', [OrderedDict([('elementName', 'T'), ('description', '溫度'), ('time', [OrderedDict([('dataTime', '2021-02-10T00:00:00+08:00'), ('elementValue', OrderedDict([

In [71]:
# 2. 請取出每一個地區所記錄的第一個時間點跟溫度
with open('./Data/d3/64_72hr_CH.xml','r',encoding = 'utf-8') as f:
    dictcontent = dict(xmltodict.parse(f.read()))
#     print(dictcontent)
    locations = dictcontent['cwbopendata']['dataset']['locations']['location']

a = []
for location in locations:
    district = location['locationName']
#     print(district)
    time = location['weatherElement'][0]['time'][0]['dataTime']
#     print(time)
    temp = location['weatherElement'][0]['time'][0]['elementValue']['value'] + " 度C"
#     print(temp)
    print(district+ "," +temp + "," + time)

鹽埕區,20 度C,2021-02-10T00:00:00+08:00
鼓山區,20 度C,2021-02-10T00:00:00+08:00
左營區,19 度C,2021-02-10T00:00:00+08:00
楠梓區,18 度C,2021-02-10T00:00:00+08:00
三民區,19 度C,2021-02-10T00:00:00+08:00
新興區,20 度C,2021-02-10T00:00:00+08:00
前金區,20 度C,2021-02-10T00:00:00+08:00
苓雅區,20 度C,2021-02-10T00:00:00+08:00
前鎮區,20 度C,2021-02-10T00:00:00+08:00
旗津區,20 度C,2021-02-10T00:00:00+08:00
小港區,19 度C,2021-02-10T00:00:00+08:00
鳳山區,19 度C,2021-02-10T00:00:00+08:00
林園區,20 度C,2021-02-10T00:00:00+08:00
大寮區,20 度C,2021-02-10T00:00:00+08:00
大樹區,18 度C,2021-02-10T00:00:00+08:00
大社區,18 度C,2021-02-10T00:00:00+08:00
仁武區,18 度C,2021-02-10T00:00:00+08:00
鳥松區,19 度C,2021-02-10T00:00:00+08:00
岡山區,18 度C,2021-02-10T00:00:00+08:00
橋頭區,18 度C,2021-02-10T00:00:00+08:00
燕巢區,17 度C,2021-02-10T00:00:00+08:00
田寮區,16 度C,2021-02-10T00:00:00+08:00
阿蓮區,16 度C,2021-02-10T00:00:00+08:00
路竹區,18 度C,2021-02-10T00:00:00+08:00
湖內區,18 度C,2021-02-10T00:00:00+08:00
茄萣區,17 度C,2021-02-10T00:00:00+08:00
永安區,17 度C,2021-02-10T00:00:00+08:00
彌陀區,17 度C,2021-02-10T00:00:0

In [94]:
# 3. 請取出第一個地區所記錄的每一個時間點跟溫度
with open('./Data/d3/64_72hr_CH.xml','r',encoding = 'utf-8') as f:
    dictcontent = dict(xmltodict.parse(f.read()))
#     print(dictcontent)
    locations = dictcontent['cwbopendata']['dataset']['locations']['location']
    first_l = locations[0]
    times = first_l['weatherElement'][0]['time']
    print(first_l['locationName'])
    
for t in times:
    time = t['dataTime']
    temp = t['elementValue']['value'] + " 度C"
#     print(t)
    print(time + "," + temp)

    

鹽埕區
2021-02-10T00:00:00+08:00,20 度C
2021-02-10T03:00:00+08:00,19 度C
2021-02-10T06:00:00+08:00,19 度C
2021-02-10T09:00:00+08:00,20 度C
2021-02-10T12:00:00+08:00,24 度C
2021-02-10T15:00:00+08:00,24 度C
2021-02-10T18:00:00+08:00,23 度C
2021-02-10T21:00:00+08:00,21 度C
2021-02-11T00:00:00+08:00,20 度C
2021-02-11T03:00:00+08:00,19 度C
2021-02-11T06:00:00+08:00,18 度C
2021-02-11T09:00:00+08:00,20 度C
2021-02-11T12:00:00+08:00,24 度C
2021-02-11T15:00:00+08:00,25 度C
2021-02-11T18:00:00+08:00,23 度C
2021-02-11T21:00:00+08:00,21 度C
2021-02-12T00:00:00+08:00,19 度C
2021-02-12T03:00:00+08:00,18 度C
2021-02-12T06:00:00+08:00,17 度C
2021-02-12T09:00:00+08:00,19 度C
2021-02-12T12:00:00+08:00,23 度C
2021-02-12T15:00:00+08:00,24 度C
2021-02-12T18:00:00+08:00,22 度C
2021-02-12T21:00:00+08:00,20 度C
