In [1]:
import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option("mode.chained_assignment", None)
import warnings
warnings.filterwarnings("ignore")
from selectolax.parser import HTMLParser
import requests
import lxml
import abc
import datetime
import urllib

In [None]:
class GovData(abc.ABC):
    city_keys = ['新北市', '台北市', '桃園市', '台中市', '台南市', '高雄市', '宜蘭縣', '新竹縣', '苗栗縣',
       '彰化縣', '南投縣', '雲林縣', '嘉義縣', '屏東縣', '台東縣', '花蓮縣', '澎湖縣', '基隆市',
       '新竹市', '金門縣', '連江縣']
    city_values = ["北北基", "北北基", "桃竹苗", "中彰投", "雲嘉南", "高屏東", "宜花", "桃竹苗", "桃竹苗", "中彰投", "中彰投", "雲嘉南", "雲嘉南", "高屏東", "高屏東", "宜花", "離島", "北北基", "桃竹苗", "外島", "外島"]
    area_dict = dict(zip(city_keys, city_values))
    
    category_keys = ['大客車自用', '遊覽車', '大客車營業(不含遊覽車)', '大貨車自用', '大貨車營業', '小客車自用', '小客車租賃',
       '計程車', '其他營業小客車', '小貨車', '小貨車租賃', '小貨車營業(不含租賃)']
    category_values = ["大客車", "大客車", "大客車", "大貨車", "大貨車", "小客車", "小客車", "小客車", "小客車", "小貨車", "小貨車", "小貨車"]
    category_dict = dict(zip(category_keys, category_values))
    
    url = "https://stat.motc.gov.tw/mocdb/stmain.jsp"
    headers = {"cookie": "JSESSIONID=1107E8C014813D59A34F08779B9FFC8D; TS01933308=019f8797941a2907282b453f4118d8dd5029cef9402978d929deb57040d9185335949ba6846b9228245b822de4d644d9f61722140b; TS016fe53d=019f8797941a2907282b453f4118d8dd5029cef9402978d929deb57040d9185335949ba6846b9228245b822de4d644d9f61722140b; TSc42f079b027=08a6890af4ab2000572c6c32fe1142c668a79dc26a5ee595bff0f0ad595df5a2400e0b694dea6b4e08a50f0bb41130006dde59173619c1494c392c8939be2c898ec28ca73d55a75635fa7a1843508836fcbb91761bf21b8a93d5f64edf3e7baa"}
    def __init__(self):
        pass
    
    def get_latest_month(self):
        pass
    
    def parse(self, query):
        response = requests.get(self.url, headers=self.headers, params=query).text
        df = pd.read_html(response)[1]
        keep_columns = [i for i in df.columns if len(i) > 5 and "(" not in i]
        df = df[keep_columns]
        df.rename(columns={"Unnamed: 0": "小類", "Unnamed: 1": "城市"}, inplace=True)
        return df


class CarPark(GovData):
    
    car_park_query = {"sys":"220","ym":"10701","ymt":"11109","kind":"21","type":"9","funid":"b330102","cycle":"41","outmode":"0","compmode":"0","outkind":"6","fldspc":"3,1,5,2,8,2,11,1,13,4,19,2,","codspc0":"2,4,8,1,11,1,14,14,29,2,","rdm":"ayaehpie"}
    
    def __init__(self, ymt="11109"):
        self.df = self.get_data()
        self.car_park_query["ymt"] = ymt
        
    
    def get_data(self):
        df = self.parse(self.car_park_query)
        df.insert(2, "資料類型", "車輛登記數")
        df["城市"] = df["城市"].map(lambda x: x.replace("臺", "台") if "臺" in x else x.replace("嘉義市", "嘉義縣") if "嘉義市" in x else x)
        df = df.set_index(["小類", "城市", "資料類型"]).stack().rename_axis(["小類", "城市", "資料類型", "年月日"]).reset_index(name="數量")
        df["年"] = df["年月日"].map(lambda x: str(int(x.split("年")[0]) + 1911))
        df["月"] = df["年月日"].map(lambda x: int(x.split("年")[-1].strip().replace("月", "")))
        df["月"] = df["月"].map(lambda x: "0" + str(x) if len(str(x)) != 2 else str(x))
        df["年月日"] = pd.to_datetime(df["年"] + df["月"], format="%Y%m")
        df = df[["小類", "城市", "資料類型", "數量", "年月日"]]
        df.insert(0, "大類", df["小類"].map(self.category_dict))
        df["區域"] = df["城市"].map(self.area_dict)
        df["數量"] = df["數量"].str.replace("-", "")
        return df
        
               
    
class NewCar(GovData):
    querystring = {"sys":"220","ym":"10701","ymt":"11109","kind":"21","type":"9","funid":"b330105","cycle":"41","outmode":"0","compmode":"0","outkind":"6","fldlst":"0001011011010111010110000000000","codspc0":"2,20,23,2,","rdm":"jyrnlU5h"}
    def __init__(self, ymt="11109"):
        self.df = self.get_data()
        self.querystring["ymt"] = ymt
        
    def get_data(self):
        df = self.parse(self.querystring)
        df.insert(2, "資料類型", "新領牌照數量")
        df["城市"] = df["城市"].map(lambda x: x.replace("臺", "台") if "臺" in x else x.replace("嘉義市", "嘉義縣") if "嘉義市" in x else x)
        df = df.set_index(["小類", "城市", "資料類型"]).stack().rename_axis(["小類", "城市", "資料類型", "年月日"]).reset_index(name="數量")
        df["年"] = df["年月日"].map(lambda x: str(int(x.split("年")[0]) + 1911))
        df["月"] = df["年月日"].map(lambda x: int(x.split("年")[-1].strip().replace("月", "")))
        df["月"] = df["月"].map(lambda x: "0" + str(x) if len(str(x)) != 2 else str(x))
        df["年月日"] = pd.to_datetime(df["年"] + df["月"], format="%Y%m")
        df = df[["小類", "城市", "資料類型", "數量", "年月日"]]
        df.insert(0, "大類", df["小類"].map(self.category_dict))
        df["區域"] = df["城市"].map(self.area_dict)
        df["數量"] = df["數量"].str.replace("-", "0")
        return df
    
class CombineData:
    def __init__(self, data):
        self.data = data
    
    
    def combined_data(self):
        df = pd.concat(self.data).reset_index(drop=True)
        return df
    

class RoadShipment(GovData):
    querystring = {"sys":"220","ym":"10700","ymt":"11109","kind":"21","type":"1","funid":"b310401","cycle":"41","outmode":"0","compmode":"0","outkind":"6","fldspc":"0,5,","rdm":"xloIJjil"}
    def __init__(self, ymt="11109"):
        self.df = self.get_data()
        self.querystring["ymt"] = ymt
        
    def parse(self, query):
        response = requests.get(self.url, headers=self.headers, params=query).text
        df = pd.read_html(response)[1]
        keep_columns = [i for i in df.columns if len(i) > 5 and "(" not in i]
        df = df[keep_columns]
        df.drop("Unnamed: 1", axis=1, inplace=True)
        df.rename(columns={"Unnamed: 0": "類型"}, inplace=True)
        return df
        
    def get_data(self):
        df = self.parse(self.querystring)
        df = df.set_index(["類型"]).stack().rename_axis(["類型", "年月日"]).reset_index(name="數值")
        df["年"] = df["年月日"].map(lambda x: str(int(x.split("年")[0]) + 1911))
        df["月"] = df["年月日"].map(lambda x: int(x.split("年")[-1].strip().replace("月", "")))
        df["月"] = df["月"].map(lambda x: "0" + str(x) if len(str(x)) != 2 else str(x))
        df["年月日"] = pd.to_datetime(df["年"] + df["月"], format="%Y%m")
        df = df[["類型", "數值", "年月日"]]
        return df
    

class RoadPassenger(GovData):
    querystring = {"sys":"220","ym":"10701","ymt":"11109","kind":"21","type":"1","funid":"b310101","cycle":"41","outmode":"0","compmode":"0","outkind":"6","fldspc":"0,11,","codlst0":"011","rdm":"bLcyBlnb"}
    def __init__(self, ymt="11109"):
        self.df = self.get_data()
        self.querystring["ymt"] = ymt
        
    def parse(self, query):
        response = requests.get(self.url, headers=self.headers, params=query).text
        df = pd.read_html(response)[1]
        keep_columns = [i for i in df.columns if len(i) > 5 and "(" not in i]
        df = df[keep_columns]
        df.rename(columns={"Unnamed: 0": "類型", "Unnamed: 1": "公路類型"}, inplace=True)
        return df
        
    def get_data(self):
        df = self.parse(self.querystring)
        df = df.set_index(["類型", "公路類型"]).stack().rename_axis(["類型", "公路類型", "年月日"]).reset_index(name="數值")
        df["年"] = df["年月日"].map(lambda x: str(int(x.split("年")[0]) + 1911))
        df["月"] = df["年月日"].map(lambda x: int(x.split("年")[-1].strip().replace("月", "")))
        df["月"] = df["月"].map(lambda x: "0" + str(x) if len(str(x)) != 2 else str(x))
        df["年月日"] = pd.to_datetime(df["年"] + df["月"], format="%Y%m")
        df = df[["類型", "公路類型", "數值", "年月日"]]
        return df
        
class SeaShipment(GovData):
    querystring = {"pg":"3","sys":"220","ym":"10701","ymt":"11109","kind":"21","type":"1","funid":"b420502","cycle":"41","outmode":"0","compmode":"0","outkind":"6","fldlst":"011","codspc0":"1,5,","codlst1":"011","rdm":"mxeaXb5I"}
    def __init__(self, ymt="11109"):
        self.df = self.get_data()
        self.querystring["ymt"] = ymt
        
    def parse(self, query):
        response = requests.get(self.url, headers=self.headers, params=query).text
        df = pd.read_html(response)[1]
        keep_columns = [i for i in df.columns if len(i) > 5 and "(" not in i]
        df = df[keep_columns]
        df.rename(columns={"Unnamed: 0": "進出港", "Unnamed: 1": "櫃別"}, inplace=True)
        return df
        
    def get_data(self):
        df = self.parse(self.querystring)
        df = df.set_index(["進出港", "櫃別"]).stack().rename_axis(["進出港", "櫃別", "年月日"]).reset_index(name="TEU")
        df["年"] = df["年月日"].map(lambda x: str(int(x.split("年")[0]) + 1911))
        df["月"] = df["年月日"].map(lambda x: int(x.split("年")[-1].strip().replace("月", "")))
        df["月"] = df["月"].map(lambda x: "0" + str(x) if len(str(x)) != 2 else str(x))
        df["年月日"] = pd.to_datetime(df["年"] + df["月"], format="%Y%m")
        port_status = df["櫃別"].str.split("/", expand=True)
        df["港口"] = port_status[0]
        df["櫃別"] = port_status[1]
        df = df[["進出港", "港口", "櫃別", "TEU", "年月日"]]
        return df

class FreewayData(GovData):
    querystring = {"pg":"3","sys":"220","ym":"10701","ymt":"11109","kind":"21","type":"1","funid":"b320601","cycle":"41","outmode":"0","compmode":"0","outkind":"6","fldspc":"4,2,","codspc0":"1,5,","codlst1":"0111","rdm":"yjid9pWz"}
    def __init__(self, ymt="11109"):
        self.df = self.get_data()
        self.querystring["ymt"] = ymt
        
    def parse(self, query):
        response = requests.get(self.url, headers=self.headers, params=query).text
        df = pd.read_html(response)[1]
        keep_columns = [i for i in df.columns if len(i) > 5 and "(" not in i]
        df = df[keep_columns]
        df.rename(columns={"Unnamed: 0": "方向", "Unnamed: 1": "車輛類型"}, inplace=True)
        return df
        
    def get_data(self):
        df = self.parse(self.querystring)
        df = df.set_index(["方向", "車輛類型"]).stack().rename_axis(["方向", "車輛類型", "年月日"]).reset_index(name="延車公里")
        df["年"] = df["年月日"].map(lambda x: str(int(x.split("年")[0]) + 1911))
        df["月"] = df["年月日"].map(lambda x: int(x.split("年")[-1].strip().replace("月", "")))
        df["月"] = df["月"].map(lambda x: "0" + str(x) if len(str(x)) != 2 else str(x))
        df["年月日"] = pd.to_datetime(df["年"] + df["月"], format="%Y%m")
        highway_id = df["車輛類型"].str.split("/", expand=True)
        df["國道編號"] = highway_id[0]
        df["車輛類型"] = highway_id[1]
        df = df[["方向", "國道編號", "車輛類型", "延車公里", "年月日"]]
        return df
    
class FreewayVol(GovData):
    querystring = {"pg":"3","sys":"220","ym":"10701","ymt":"11109","kind":"21","type":"1","funid":"b320601","cycle":"41","outmode":"0","compmode":"0","outkind":"6","fldspc":"1,2,","codspc0":"1,5,","codlst1":"0111","rdm":"epxN9WXl"}
    def __init__(self, ymt="11109"):
        self.df = self.get_data()
        self.querystring["ymt"] = ymt
        
    def parse(self, query):
        response = requests.get(self.url, headers=self.headers, params=query).text
        df = pd.read_html(response)[1]
        keep_columns = [i for i in df.columns if len(i) > 5 and "(" not in i]
        df = df[keep_columns]
        df.rename(columns={"Unnamed: 0": "方向", "Unnamed: 1": "車輛類型"}, inplace=True)
        return df
    
    def get_data(self):
        df = self.parse(self.querystring)
        df = df.set_index(["方向", "車輛類型"]).stack().rename_axis(["方向", "車輛類型", "年月日"]).reset_index(name="通行輛次")
        df["年"] = df["年月日"].map(lambda x: str(int(x.split("年")[0]) + 1911))
        df["月"] = df["年月日"].map(lambda x: int(x.split("年")[-1].strip().replace("月", "")))
        df["月"] = df["月"].map(lambda x: "0" + str(x) if len(str(x)) != 2 else str(x))
        df["年月日"] = pd.to_datetime(df["年"] + df["月"], format="%Y%m")
        highway_id = df["車輛類型"].str.split("/", expand=True)
        df["國道編號"] = highway_id[0]
        df["車輛類型"] = highway_id[1]
        df = df[["方向", "國道編號", "車輛類型", "通行輛次", "年月日"]]
        return df
    
class Tourism(FreewayVol):
    querystring = {"sys":"220","ym":"10701","ymt":"11109","kind":"21","type":"1","funid":"b710401","cycle":"41","outmode":"0","compmode":"0","outkind":"6","fldspc":"2,10,13,2,16,5,22,1,24,1,","codspc0":"1,9,","rdm":"rej3blcn"}
    def __init__(self, ymt):
        super().__init__()
        
    def parse(self, query):
        response = requests.get(self.url, headers=self.headers, params=query).text
        df = pd.read_html(response)[1]
        keep_columns = [i for i in df.columns if len(i) > 5 and "(" not in i]
        df = df[keep_columns]
        df.rename(columns={"Unnamed: 0": "國家", "Unnamed: 1": "來台目的"}, inplace=True)
        return df
    
    def get_data(self):
        df = self.parse(self.querystring)
        df = df.set_index(["國家", "來台目的"]).stack().rename_axis(["國家", "來台目的", "年月日"]).reset_index(name="人數")
        df["年"] = df["年月日"].map(lambda x: str(int(x.split("年")[0]) + 1911))
        df["月"] = df["年月日"].map(lambda x: int(x.split("年")[-1].strip().replace("月", "")))
        df["月"] = df["月"].map(lambda x: "0" + str(x) if len(str(x)) != 2 else str(x))
        df["年月日"] = pd.to_datetime(df["年"] + df["月"], format="%Y%m")
        df = df[["國家", "來台目的", "人數", "年月日"]]
        return df
    

In [None]:
year = datetime.date.today().year - 1911
month = datetime.date.today().month - 1
ymt = str(year) + str(month)

In [None]:
data = [CarPark(ymt).df, NewCar(ymt).df]
result = CombineData(data).combined_data()
result.to_excel(r"D:\kc.hsu\OneDrive - Bridgestone\數據\市場資訊\交通運輸市場數據\車輛市場數據_power_bi.xlsx", index=False)

In [None]:
RoadPassenger(ymt).df.to_excel(r"D:\kc.hsu\OneDrive - Bridgestone\數據\市場資訊\交通運輸市場數據\客運運輸客運量_power_bi.xlsx", index=False)

In [None]:
RoadShipment(ymt).df.to_excel(r"D:\kc.hsu\OneDrive - Bridgestone\數據\市場資訊\交通運輸市場數據\公路貨運量_power_bi.xlsx", index=False)

In [None]:
SeaShipment(ymt).df.to_excel(r"D:\kc.hsu\OneDrive - Bridgestone\數據\市場資訊\交通運輸市場數據\海運數據_power_bi.xlsx", index=False)

In [None]:
highway = pd.merge(FreewayVol(ymt).df, FreewayData(ymt).df, on=["方向", "國道編號", "車輛類型", "年月日"], how="left")
highway.to_excel(r"D:\kc.hsu\OneDrive - Bridgestone\數據\市場資訊\交通運輸市場數據\高速公路延車公里數據_power_bi.xlsx", index=False)

In [None]:
Tourism(ymt).df.to_excel(r"D:\kc.hsu\OneDrive - Bridgestone\數據\市場資訊\交通運輸市場數據\旅客來台目的人數_power_bi.xlsx", index=False)

In [None]:
# 石油消耗量
url = "https://www.esist.org.tw/Database/List"

querystring = {"yearType":"0","PeriodType":"M","Start":"10701","End":"11110","UnitType":"0","EnergySelectedValue":"5_1_6","PageId":"2"}

headers = {"cookie": "ASP.NET_SessionId=j3rhrvpf334dx35ecne1s4rp"}

response = requests.post(url, params=querystring, headers=headers).text
parser = HTMLParser(response)

base_url = "https://www.esist.org.tw"
href = parser.css_first("ul.result_table_download > li > a").attributes["href"]
file_url = base_url + href

urllib.request.urlretrieve(file_url, r"D:\kc.hsu\OneDrive - Bridgestone\數據\市場資訊\石油消費量.xlsx")

df = pd.read_excel(r"D:\kc.hsu\OneDrive - Bridgestone\數據\市場資訊\石油消費量.xlsx", header=1)
col_list = df.columns.tolist()
col_list.remove("日期")
df = df[col_list]
df = df.stack().reset_index(name="石油消耗量").rename(columns={"level_0": "pass", "level_1": "年月日"})
df["年"] = df["年月日"].str.split("年", expand=True)[0]
df["月"] = df["年月日"].str.split("年", expand=True)[1].str.replace("月", "")
df["年月日"] = pd.to_datetime(df["年"] + df["月"], format="%Y%m").dt.date
df = df[["年月日", "石油消耗量"]]
df.to_excel(r"D:\kc.hsu\OneDrive - Bridgestone\數據\市場資訊\交通運輸市場數據\每月石油消耗量_power_bi.xlsx", index=False)

In [None]:
# 零售批發業銷售額數據
df = pd.read_excel(r"D:\kc.hsu\OneDrive - Bridgestone\數據\市場資訊\交通運輸市場數據\批發零售業銷售額_power_bi.xlsx", index_col=[0, 1])
df = df.stack().rename_axis(["產業別", "產業明細", "年月日"]).reset_index(name="銷售額")
df["年月日"] = pd.to_datetime((df["年月日"].str.split("年", expand=True)[0].astype(int) + 1911).astype(str) + (df["年月日"].str.split("年", expand=True)[1].str.replace("月", "").map(lambda x: "0" + x if len(x) == 1 else x)), format="%Y%m")
df.to_excel(r"D:\kc.hsu\OneDrive - Bridgestone\數據\市場資訊\交通運輸市場數據\批發零售業銷售額_power_bi.xlsx", index=False)

In [None]:
# 國民統計相關數據
df = pd.read_excel(r"D:\kc.hsu\OneDrive - Bridgestone\數據\市場資訊\交通運輸市場數據\國民相關統計數據_power_bi.xlsx", index_col=0).stack().rename_axis(["指標", "年季"]).reset_index(name="統計值")
df = df[df["年季"].str.len() > 5]
df["年季"] = df["年季"].map(lambda x: str(1911 + int(x[0:3])) + x[3:])
df.to_excel(r"D:\kc.hsu\OneDrive - Bridgestone\數據\市場資訊\交通運輸市場數據\國民相關統計數據_power_bi.xlsx", index=False)

In [None]:
# 消費者物價指數
df = pd.read_excel(r"D:\kc.hsu\OneDrive - Bridgestone\數據\市場資訊\交通運輸市場數據\消費者物價指數YoY_power_bi.xlsx", index_col=[0]).stack().rename_axis(["年", "月"]).reset_index(name="YoY指數")
df["年月日"] = pd.to_datetime(((df["年"] + 1911).astype(str)) + (df["月"].str.replace("月", "").map(lambda x: "0" + str(x) if len(x) == 1 else x).astype(str)), format="%Y%m")
df = df[["年月日", "YoY指數"]]
df["YoY指數"] = df["YoY指數"] / 100
df.to_excel(r"D:\kc.hsu\OneDrive - Bridgestone\數據\市場資訊\交通運輸市場數據\消費者物價指數YoY_power_bi.xlsx", index=False)