## We should watch [this YT video](https://www.youtube.com/watch?v=VkjqViooMtQ) to know how to set up the IAM and connector part to Azure.

In [0]:
# %sql
# DROP TABLE IF EXISTS kenworkspace.tw_stocks_db.bronze_daily;
# DROP TABLE IF EXISTS kenworkspace.tw_stocks_db.bronze_monthly;

## Read daily and monthly data files from Azure [Data Lake Storage] and build corresponding bronze tables for them.

In [0]:
from functools import reduce

# 建立年份列表
years = list(range(2010, 2025))
folders = [("daily", "D"), ("monthly", "M")]

# 建立檔案完整路徑列表

for folder in folders:
    file_paths = [
        f"abfss://twstocks@kenspractice.dfs.core.windows.net/{folder[0]}/2330_{folder[1]}_{year}.parquet"
        for year in years
    ]

    # 讀取所有檔案並存成 DataFrame 列表
    dfs = [spark.read.parquet(path) for path in file_paths]

    # 合併所有 DataFrame，按欄位名稱對齊
    df_combined = reduce(lambda df1, df2: df1.unionByName(df2), dfs)
    df_combined = df_combined.orderBy("date")

    # 印出欄位架構以確認
    # df_combined.printSchema()

    # 顯示部分資料
    # display(df_combined)

    df_combined.write \
        .format("delta") \
        .mode("overwrite") \
        .saveAsTable(f"kenworkspace.tw_stocks_db.bronze_{folder[0]}")

## Query the table [bronze_daily]

In [0]:
%sql
SELECT *
FROM kenworkspace.tw_stocks_db.bronze_daily

## Query the table [bronze_monthly]

In [0]:
%sql
SELECT *
FROM kenworkspace.tw_stocks_db.bronze_monthly