In [6]:
import requests
import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option("mode.chained_assignment", None)
from datetime import datetime, date
import playwright
from concurrent.futures import ThreadPoolExecutor


 
### 車輛登記數&新領牌照數量
- 檔案路徑：`"D:\kc.hsu\OneDrive - Bridgestone\數據\市場資訊\交通運輸市場數據\車輛市場數據_power_bi.xlsx"`


|大類|小類|城市|資料類型|數量|年月日|區域|
|----|---|----|-------|----|-----|----|

1. 大類：大客車，大貨車，小客車，小貨車
2. 小類：大客車自用……分類
3. 資料類型：車輛登記數，新領牌照數量
4. 年月日：西元


#### function設計概念為由一個list來儲放新車和車輛總數的api，並準備一個全台顯示和區域的dictionary 



In [40]:
# 創建一個需要爬取的網址list並帶上年月參數
today = date.today().strftime("%Y%m")
query_date = str(int(today[:4]) - 1911) + today[4:]
site_list = {"registered_vehicle": "https://stat.motc.gov.tw/mocdb/stmain.jsp?sys=220&ym=10201&ymt={}&kind=21&type=9&funid=b330102&cycle=1&outmode=0&compmode=0&outkind=6&fldlst=0001011011010111010110000000000&codspc0=2,4,8,1,11,1,14,14,29,2,&rdm=ULceLxXx", 
"new_vehicle": "https://stat.motc.gov.tw/mocdb/stmain.jsp?sys=220&ym=10201&ymt={}&kind=21&type=9&funid=b330105&cycle=1&outmode=0&compmode=0&outkind=6&fldlst=0001011011010111010110000000000&codspc0=2,20,23,2,&rdm=A7Wniqtn",
"tourist": "https://stat.motc.gov.tw/mocdb/stmain.jsp?sys=220&ym=10201&ymt={}&kind=21&type=1&funid=b710401&cycle=1&outmode=0&compmode=0&outkind=6&fldspc=2,10,13,2,16,5,22,1,24,1,&codspc0=1,8,&rdm=rrxgjjqj",
"trans_num": "https://stat.thb.gov.tw/hb01/webMain.aspx?sys=220&ym=10201&ymt={}&kind=21&type=1&funid=3130005&cycle=1&outmode=0&compmode=0&outkind=6&fldspc=0,17,&codspc0=1,22,&rdm=R49947",
"trans_vehicle": "https://stat.thb.gov.tw/hb01/webMain.aspx?sys=220&ym=10201&ymt={}&kind=21&type=1&funid=3130003&cycle=1&outmode=0&compmode=0&outkind=6&fldspc=0,17,&codspc0=1,22,&rdm=R156357", 
"trans_business": "https://stat.thb.gov.tw/hb01/webMain.aspx?sys=220&ym=10201&ymt={}&kind=21&type=1&funid=3120001&cycle=1&outmode=0&compmode=0&outkind=6&fldspc=0,13,&rdm=R161428",
"bus_business": "https://stat.thb.gov.tw/hb01/webMain.aspx?sys=220&ym=10201&ymt={}&kind=21&type=1&funid=3110001&cycle=1&outmode=0&compmode=0&outkind=6&fldspc=0,16,&rdm=R166517",
"sea_shipment": "https://stat.motc.gov.tw/mocdb/stmain.jsp?sys=220&ym=10201&ymt={}&kind=21&type=1&funid=b420502&cycle=1&outmode=0&compmode=0&outkind=6&fldlst=011&codspc0=1,5,&codlst1=011&rdm=hyiom9pa"}

# 準備好headers
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"}

# 區域字典
area_dict = {'新北市': '北北基', '台北市': '北北基', '桃園市': '桃竹苗', '台中市': '中彰投', '台南市': '雲嘉南', '高雄市': '高屏東', '宜蘭縣': '宜花',
            '新竹縣': '桃竹苗', '苗栗縣': '桃竹苗', '彰化縣': '中彰投', '南投縣': '中彰投', '雲林縣': '雲嘉南', '嘉義縣': '雲嘉南', '屏東縣': '高屏東',
            '台東縣': '高屏東', '花蓮縣': '宜花', '澎湖縣': '離島', '基隆市': '北北基', '新竹市': '桃竹苗', '金門縣': '外島', '連江縣': '外島', '嘉義市': '雲嘉南'}

# 建立最新的類型和連結字典
latest_date = {k: v.format(query_date) for k, v in site_list.items()}


In [41]:
# 建立回應的function
def get_data(url):
    res = requests.get(url, headers=headers)
    return(res.text)

In [42]:
# 建立function來獲取所有的數據存入字典中
def retrieveData(data_dict):
    for key, value in data_dict.items():
        data_dict[key] = get_data(value)
    return data_dict

In [45]:
def registered_vehicle(content):
    df = pd.read_html(content)[1]
    df.rename(columns={"Unnamed: 0": "大類", "Unnamed: 1": "城市"}, inplace=True)
    df["城市"] = df["城市"].map(lambda x: x.replace("臺", "台"))
    reshape_df = df.groupby(["大類", "城市"]).sum().stack().reset_index(name="數量")
    filtered_df = reshape_df[(~reshape_df["level_2"].str.endswith("年")) & (~reshape_df["level_2"].str.contains("~"))]
    filtered_df["月"] = filtered_df.loc[:, "level_2"].map(lambda x: "0" + x.split(" ")[-1].replace("月", "") if len(x.split(" ")[-1].replace("月", "")) == 1 else x.split(" ")[-1].replace("月", ""))
    filtered_df["資料類型"] = "車輛登記數"  
    filtered_df["年"] = filtered_df.loc[:, "level_2"].map(lambda x: str(int(x.split("年")[0]) + 1911))
    filtered_df["年月"] = filtered_df["年"] + filtered_df["月"]
    filtered_df["年月日"] = pd.to_datetime(filtered_df["年月"], format="%Y%m")
    filtered_df["catg_list"] = filtered_df.loc[:, "大類"].map(lambda x: x.split(" "))
    filtered_df["車輛類型"] = filtered_df.loc[:, "catg_list"].map(lambda x: x[-1] if len(x) == 3 else x[0])
    filtered_df["小類"] = filtered_df.loc[:, "車輛類型"].map(lambda x: x.replace("/", "")).map(lambda x: "其他大客車" if "(不含遊覽車)" in x else "小貨車" if "(不含租賃)" in x else x)
    filtered_df["用途"] = filtered_df.loc[:, "catg_list"].map(lambda x: x[-1]).map(lambda x: x.replace("/", "")).map(lambda x: "營業" if "營業" in x else x)
    filtered_df["大類"] = filtered_df.loc[:, "catg_list"].map(lambda x: x[0])
    filtered_df["區域"] = filtered_df["城市"].map(area_dict)
    result = filtered_df[["大類", "用途", "小類", "城市", "資料類型", "數量", "年月日", "區域"]]
    result = result[result["數量"] != "-"]
    return result

def new_vehicle(content):
    df = pd.read_html(content)[1]
    df.rename(columns={"Unnamed: 0": "大類", "Unnamed: 1": "城市"}, inplace=True)
    df["城市"] = df["城市"].map(lambda x: x.replace("臺", "台"))
    reshape_df = df.groupby(["大類", "城市"]).sum().stack().reset_index(name="數量")
    filtered_df = reshape_df[(~reshape_df["level_2"].str.endswith("年")) & (~reshape_df["level_2"].str.contains("~"))]
    filtered_df["月"] = filtered_df.loc[:, "level_2"].map(lambda x: "0" + x.split(" ")[-1].replace("月", "") if len(x.split(" ")[-1].replace("月", "")) == 1 else x.split(" ")[-1].replace("月", ""))
    filtered_df["資料類型"] = "新領牌照數量"  
    filtered_df["年"] = filtered_df.loc[:, "level_2"].map(lambda x: str(int(x.split("年")[0]) + 1911))
    filtered_df["年月"] = filtered_df["年"] + filtered_df["月"]
    filtered_df["年月日"] = pd.to_datetime(filtered_df["年月"], format="%Y%m")
    filtered_df["catg_list"] = filtered_df.loc[:, "大類"].map(lambda x: x.split(" "))
    filtered_df["車輛類型"] = filtered_df.loc[:, "catg_list"].map(lambda x: x[-1] if len(x) == 3 else x[0])
    filtered_df["小類"] = filtered_df.loc[:, "車輛類型"].map(lambda x: x.replace("/", "")).map(lambda x: "其他大客車" if "(不含遊覽車)" in x else "小貨車" if "(不含租賃)" in x else x)
    filtered_df["用途"] = filtered_df.loc[:, "catg_list"].map(lambda x: x[-1]).map(lambda x: x.replace("/", "")).map(lambda x: "營業" if "營業" in x else x)
    filtered_df["大類"] = filtered_df.loc[:, "catg_list"].map(lambda x: x[0])
    filtered_df["區域"] = filtered_df["城市"].map(area_dict)
    result = filtered_df[["大類", "用途", "小類", "城市", "資料類型", "數量", "年月日", "區域"]]
    result = result[result["數量"] != "-"]
    return result

def tourist(data):
    pass

def trans_num(data):
    pass

def trans_vehicle(data):
    pass

def trans_business(data):
    pass

def bus_business(data):
    pass

def sea_shipment(data):
    pass

dict_keys(['registerd_vehicle', 'new_vehicle', 'tourist', 'trans_num', 'trans_vehicle', 'trans_business', 'bus_business', 'sea_shipment'])

In [None]:
# 建立key為數據類型名稱，value為對應處理function的字典
