# 散戶小台/微台淨未平倉抓取  
* Open API 取得數據（限定當天） 
  https://openapi.taifex.com.tw/#/%E8%B3%87%E6%96%99%E6%9F%A5%E8%A9%A2API/get_MarketDataOfMajorInstitutionalTradersDetailsOfFuturesContractsBytheDate
* 直接網頁查詢 [首頁 > 交易資訊 > 三大法人 > 查詢 > 區分各期貨契約 > 依日期](https://www.taifex.com.tw/cht/3/futContractsDate)
* 多天資料下載 [交易資訊 > 三大法人 > 下載 > 區分各期貨契約 > 依日期](https://www.taifex.com.tw/cht/3/futContractsDateView)  
* 把三大法人未平倉量相加即是

In [1]:
import os
import pandas as pd
import requests
import duckdb

In [2]:
# 引用自建公用模組
from proj_util_pkg.settings import ProjEnvSettings

## 公用參數設定

In [3]:
# 欄數全展開
pd.set_option("display.max_columns", None)

In [4]:
# 新增偽裝成chrome瀏覽器的標頭
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}

## 外部資料讀取  
### 方法1: 透過期交所 open api  

In [5]:
# 取得散戶小台淨未平倉口數資料
major_futures_contracts_by_date = requests.get(
    "https://openapi.taifex.com.tw/v1/MarketDataOfMajorInstitutionalTradersDetailsOfFuturesContractsBytheDate", 
    verify=False,
    headers=headers
).json()



In [6]:
# json 轉成 dataframe
major_futures_contracts_by_date_df = pd.DataFrame(major_futures_contracts_by_date)
print(major_futures_contracts_by_date_df.shape)

# 篩選小型臺指期貨
mjfut_df = major_futures_contracts_by_date_df[major_futures_contracts_by_date_df['ContractCode'] == '小型臺指期貨']
mjfut_df = mjfut_df[["Date", "OpenInterest(Net)"]]

# 轉換OpenInterest(Net)為int, Date轉型date
mjfut_df["OpenInterest(Net)"] = mjfut_df["OpenInterest(Net)"].astype(int)
mjfut_df["Date"] = pd.to_datetime(mjfut_df["Date"])

# mjfut_df group by date, sum OpenInterest(Net)
mjfut_df_grouped = mjfut_df.groupby("Date").agg({"OpenInterest(Net)": "sum"}).reset_index()
mjfut_df_grouped

(45, 15)


Unnamed: 0,Date,OpenInterest(Net)
0,2024-10-01,-4797


In [8]:
# 取得最後一筆交易日，作為方法2的查詢終止日條件
last_txn_date = mjfut_df_grouped.tail(1)["Date"].values[0]
last_txn_date = pd.to_datetime(last_txn_date)

last_txn_date

Timestamp('2024-10-01 00:00:00')

### 方法2: 透過爬蟲方式下載期交所網站檔案取得數據

In [9]:
# 共用參數設定
FUTURES_DATA_FILENAME = 'futures_data.csv'  # 輸出檔案名稱
FUTURES_DATA_FILENAME

'futures_data.csv'

In [10]:
def get_futures_data_from_taifex(input_txn_date, commodity_id):
    """ 取得期交所三大法人未平倉量資料 """

    # 設定目標URL
    url = 'https://www.taifex.com.tw/cht/3/futContractsDateDown'

    # 瀏覽器的 headers，模擬從瀏覽器發送的請求
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5',
        'Accept-Encoding': 'gzip, deflate, br',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'Referer': 'https://www.taifex.com.tw/cht/3/futContractsDate',
        'Origin': 'https://www.taifex.com.tw'
    }

    # 設定POST的表單資料
    # 手動指定日期區間範例
    # form_data = {
    #     'firstDate': '2021/09/20 00:00',
    #     'lastDate': '2024/09/20 00:00',
    #     'queryStartDate': '2024/09/01',
    #     'queryEndDate': '2024/09/20',
    #     'commodityId': 'MXF'
    # }

    input_month_1st = input_txn_date.replace(day=1).strftime("%Y/%m/%d")
    input_txndate_prev_3y = input_txn_date - pd.DateOffset(years=3)
    input_txndate_prev_3y = input_txndate_prev_3y.strftime("%Y/%m/%d")
    form_data = {
        'firstDate': f'{input_txndate_prev_3y} 00:00',
        'lastDate': f'{input_txn_date.strftime("%Y/%m/%d")} 00:00',
        'queryStartDate': input_month_1st,
        'queryEndDate': input_txn_date.strftime("%Y/%m/%d"),
        'commodityId': commodity_id
    }
    print(form_data)

    # 發送POST請求，帶入 headers 和表單資料
    response = requests.post(url, data=form_data, headers=headers)

    # # 輸出檔案名稱
    # filename = 'futures_data.csv'

    # 確認請求是否成功
    if response.status_code == 200:
        # 將CSV檔案儲存到本地
        with open(FUTURES_DATA_FILENAME, 'wb') as file:
            file.write(response.content)
        print(f'CSV檔案已成功下載並儲存為{FUTURES_DATA_FILENAME}')
    else:
        print(f'下載失敗，狀態碼: {response.status_code}')


In [11]:
def _read_futures_data_from_taifex(commodity_name):
    """ 讀取期交所三大法人未平倉量資料，並回傳group by後的df """

    # 讀取 CSV 檔案
    major_futures_contracts_by_date_df = pd.read_csv(FUTURES_DATA_FILENAME, encoding='big5')
    
    # 篩選臺指期貨
    mjfut_df = major_futures_contracts_by_date_df[["日期", "多空未平倉口數淨額"]]

    # 轉換'多空未平倉口數淨額'為int, '日期'轉型date
    mjfut_df.loc[:, "多空未平倉口數淨額"] = mjfut_df["多空未平倉口數淨額"].astype(int)
    mjfut_df.loc[:,"日期"] = pd.to_datetime(mjfut_df["日期"])
    mjfut_df.columns = ["Date", commodity_name]
    
    # mjfut_df group by date, sum OpenInterest(Net)
    mjfut_df_grouped = mjfut_df.groupby("Date").agg({commodity_name: "sum"}).reset_index()
    
    return mjfut_df_grouped


In [12]:
# 設定淨未平倉量抓取目標資訊，以dict格式設定
tw_futures_target_dict = {
    "MXF": "散戶小台淨未平倉口數",
    "TMF": "散戶微台淨未平倉口數"
}

futures_data_df = pd.DataFrame()
# 跑迴圈，依據tw_futures_target_dict.items()的key, value，分別執行get_futures_data_from_taifex
for commodity_id, commodity_name in tw_futures_target_dict.items():
    get_futures_data_from_taifex(last_txn_date, commodity_id)
    mjfut_df_grouped = _read_futures_data_from_taifex(commodity_name)
    
    # 合併數據到futures_data_df
    if futures_data_df.empty:
        futures_data_df = mjfut_df_grouped
    else:
        futures_data_df = pd.merge(futures_data_df, mjfut_df_grouped, on='Date', how='outer')
    
    # 刪除 CSV 檔案
    os.remove(FUTURES_DATA_FILENAME)

print(futures_data_df)

{'firstDate': '2021/10/01 00:00', 'lastDate': '2024/10/01 00:00', 'queryStartDate': '2024/10/01', 'queryEndDate': '2024/10/01', 'commodityId': 'MXF'}
CSV檔案已成功下載並儲存為futures_data.csv
{'firstDate': '2021/10/01 00:00', 'lastDate': '2024/10/01 00:00', 'queryStartDate': '2024/10/01', 'queryEndDate': '2024/10/01', 'commodityId': 'TMF'}
CSV檔案已成功下載並儲存為futures_data.csv
        Date  散戶小台淨未平倉口數  散戶微台淨未平倉口數
0 2024-10-01       -4797       -9650


## 資料留存ＤＢ

In [13]:
# 設定資料庫路徑
TWSTOCK_DATA_ROOT = os.environ.get("hist_data_path")
twstock_db_path = f"{TWSTOCK_DATA_ROOT}/twstock.duckdb"

In [14]:
# 連線資料庫
conn_duckdb = duckdb.connect(twstock_db_path)

In [15]:
table_name = "tw_retail_investors_net_open_interest"

In [16]:
# 針對pc_ratio_df，以Date單筆先確認tw_pc_ratio表中，欄位Date沒有重複資料時，才進行單筆insert
for i in range(len(futures_data_df)):
    try:
        futures_data_df.iloc[i:i+1].to_sql(table_name, conn_duckdb, if_exists="append", index=False)
    except Exception as e:
        # print(e)
        pass


  futures_data_df.iloc[i:i+1].to_sql(table_name, conn_duckdb, if_exists="append", index=False)


In [17]:
# 查詢tw_pc_ratio表中所有資料
conn_duckdb.execute(f"SELECT * FROM {table_name} order by Date desc").fetch_df()

Unnamed: 0,Date,散戶小台淨未平倉口數,散戶微台淨未平倉口數
0,2024-10-01,-4797,-9650
1,2024-09-30,-6923,-13272
2,2024-09-27,-6445,-4239
3,2024-09-26,-3299,1647
4,2024-09-25,-755,3499
5,2024-09-24,-489,6152
6,2024-09-23,-7189,3665
7,2024-09-20,-4826,1942
8,2024-09-19,-5187,6236
9,2024-09-18,-7945,-3537


In [18]:
# 關閉資料庫連線
conn_duckdb.close()