In [1]:
import requests
import pandas as pd

from datetime import datetime, timedelta
import time
import json
import re

import sqlite3

In [2]:
def convert_date(date_str):
    year = int(date_str[:3]) + 1911
    month = int(date_str[4:6])
    day = int(date_str[7:9])
    return f"{year}-{month:02d}-{day:02d}"

def save_to_database(df, db_name="stocks.db"):
    conn = sqlite3.connect(db_name)
    df.to_sql("stocks", conn, if_exists="replace", index=False)
    conn.close()
# from 1 to 13
refs = {
    "第一款": 1,
    "第二款": 2,
    "第三款": 3,
    "第四款": 4,
    "第五款": 5,
    "第六款": 6,
    "第七款": 7,
    "第八款": 8,
    "第九款": 9,
    "第十款": 10,
    "第十一款": 11,
    "第十二款": 12,
    "第十三款": 13
}

In [52]:
def convert_date(date_str):
    try:
        # date_str = date_str.replace("~", "～")  # 半形轉全形
        start_date, end_date = date_str.split("～")
        
        start_year = int(start_date[:3]) + 1911
        start_month = int(start_date[4:6])
        start_day = int(start_date[7:9])
        
        end_year = int(end_date[:3]) + 1911
        end_month = int(end_date[4:6])
        end_day = int(end_date[7:9])
        return f"{start_year}-{start_month:02d}-{start_day:02d}～{end_year}-{end_month:02d}-{end_day:02d}"
    except Exception as e:
        print(f"❌ Error converting: {repr(date_str)} -> {e}")
        return None   # 或者直接回傳原始字串


In [76]:
datas=[]
with open("TSE_punished.json", "r", encoding="utf-8") as f:
    TSE_data = json.load(f)
    datas.append(TSE_data)
with open("OTC_punished.json", "r", encoding="utf-8") as f:
    OTC_data = json.load(f)
    # datas.append(OTC_data)

for idx, data_source in enumerate(datas):
    if data_source == TSE_data:
        data = pd.DataFrame(data_source["data"], columns=data_source["fields"])
        data.drop(columns=['處置措施','處置內容','備註'], inplace=True)
        data['Source'] = 'TSE'
    if data_source == OTC_data:
        data = pd.DataFrame(data_source['tables'][0]["data"], columns=data_source['tables'][0]["fields"])
        data.drop(columns=['收盤價','本益比',' ','處置內容'], inplace=True)
        data.rename(columns={
            "處置原因": "處置條件",
            "處置起訖時間": "處置起迄時間"
        }, inplace=True)
        data['Source'] = 'OTC'
        data=data[['編號', '公布日期', '證券代號', '證券名稱', '累計', '處置條件', '處置起迄時間', 'Source']]
    data['處置起迄時間'] = data['處置起迄時間'].str.replace("~", "～")  # 半形轉全形
    
    data['處置起迄時間']=data['處置起迄時間'].apply(convert_date)

    data[['處置起始時間', '處置結束時間']] = data['處置起迄時間'].str.split('～', n=1, expand=True)
    
data



Unnamed: 0,編號,公布日期,證券代號,證券名稱,累計,處置條件,處置起迄時間,Source,處置起始時間,處置結束時間
0,1,114/08/19,1717,長興,1,連續三次,2025-08-20～2025-09-02,TSE,2025-08-20,2025-09-02
1,2,114/08/19,1802,台玻,1,連續三次及當日沖銷標準,2025-08-20～2025-09-04,TSE,2025-08-20,2025-09-04
2,3,114/08/21,2380,虹光,1,連續三次,2025-08-22～2025-09-04,TSE,2025-08-22,2025-09-04
3,4,114/08/18,2630,亞航,1,連續五次及當日沖銷標準,2025-08-19～2025-09-03,TSE,2025-08-19,2025-09-03
4,5,114/08/18,3694,海華,1,連續三次及當日沖銷標準,2025-08-19～2025-09-03,TSE,2025-08-19,2025-09-03
5,6,114/08/25,4722,國精化,2,連續三次,2025-08-26～2025-09-08,TSE,2025-08-26,2025-09-08
6,6,114/08/20,4722,國精化,2,連續三次,2025-08-21～2025-09-03,TSE,2025-08-21,2025-09-03
7,7,114/08/18,4989,榮科,1,連續三次,2025-08-19～2025-09-01,TSE,2025-08-19,2025-09-01
8,8,114/08/21,6213,聯茂,1,連續五次,2025-08-22～2025-09-04,TSE,2025-08-22,2025-09-04
9,9,114/08/14,6215,和椿,1,連續五次及當日沖銷標準,2025-08-15～2025-09-01,TSE,2025-08-15,2025-09-01
