<a href="https://colab.research.google.com/github/Yjaywang/test/blob/main/website_repeat_crawl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import requests
import csv
# 引用 BlockingScheduler 類別
from apscheduler.schedulers.blocking import BlockingScheduler


In [5]:
def get_api_data(API_URL):
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'
    }
    # 發出網路請求
    resp = requests.get(API_URL, headers=headers)

    # 使用 json 方法可以將回傳值從 JSON 格式轉成 Python dict 字典格式方便存取
    data = resp.json()
    return data

def get_parse_data(data):
    location_records = data['records']['location']

    # 宣告一個暫存列表 list
    row_list = []

    # 一一取出 r-ent 區塊
    for location_record in location_records:
        # 從 JSON 檔案轉成 dict/list 中取值
        lat = location_record['lat']
        lon = location_record['lon']
        location_name = location_record['locationName']
        station_id = location_record['stationId']
        time = location_record['time']['obsTime']
        ELE = location_record['weatherElement'][0]['elementValue']
        WDIR = location_record['weatherElement'][1]['elementValue']
        WDSD = location_record['weatherElement'][2]['elementValue']
        TEMP = location_record['weatherElement'][3]['elementValue']
        HUMD = location_record['weatherElement'][4]['elementValue']
        PRES = location_record['weatherElement'][5]['elementValue']
        # 將資料整理成一個 dict
        data = {}
        data['lat'] = lat
        data['lon'] = lon
        data['locationName'] = location_name
        data['stationId'] = station_id
        data['obstime'] = time
        data['ELE'] = ELE
        data['WDIR'] = WDIR
        data['WDSD'] = WDSD
        data['TEMP'] = TEMP
        data['HUMD'] = HUMD
        data['PRES'] = PRES
        

        # 存入 row_list 方便之後寫入 csv 檔案使用
        row_list.append(data)

    return row_list

def save_data_to_csv(row_list):
    # CSV 檔案第一列標題記得要和 dict 的 key 相同，不然會出現錯誤
    headers = ['lat', 'lon', 'locationName', 'stationId', 'obstime', 'ELE', 'WDIR', 'WDSD', 'TEMP', 'HUMD', 'PRES']

    # 使用檔案 with ... open 開啟 write (w) 寫入檔案模式，透過 csv 模組將資料寫入
    with open('weather.csv', 'w') as output_file:
        dict_writer = csv.DictWriter(output_file, headers)
        # 寫入標題
        dict_writer.writeheader()
        # 寫入值
        dict_writer.writerows(row_list)


In [6]:
# 創建一個 Scheduler 物件實例
sched = BlockingScheduler({'apscheduler.timezone': 'Asia/Taipei'})

# decorator 設定 Scheduler 的類型和參數，例如 interval 間隔多久執行
@sched.scheduled_job('interval', seconds=10)
def timed_job():
    print('每 10 秒執行一次程式工作區塊')
    API_URL = 'https://opendata.cwb.gov.tw/api/v1/rest/datastore/O-A0001-001?Authorization=rdec-key-123-45678-011121314'
    data = get_api_data(API_URL)
    row_list = get_parse_data(data)
    save_data_to_csv(row_list)

# 開始執行
sched.start()

每 10 秒執行一次程式工作區塊
每 10 秒執行一次程式工作區塊
每 10 秒執行一次程式工作區塊


KeyboardInterrupt: ignored