In [1]:
import os
from urllib.request import urlretrieve
import zipfile
from datetime import datetime, timedelta
import pandas as pd

# 设置下载目录
download_dir = os.path.join(os.getcwd(), "dailydata","tx_zip")
today = datetime.today()
yesterday = today

# 循环下载多天数据
for daycnt in range(1, 5):
    path = yesterday.strftime('%Y_%m_%d')
    name = 'Daily_' + path + '.zip'
    download_path = 'https://www.taifex.com.tw/file/taifex/Dailydownload/DailydownloadCSV/' + name
    
    #save_dir = os.path.join(download_dir, name.replace('.zip', ''))
    
    # 检查并创建目录
    os.makedirs(download_dir, exist_ok=True)
    
    try:
        # 下载 ZIP 文件
        filename, _ = urlretrieve(download_path)
        print(f"下载完成：{download_path}")
        
        # 解压缩并读取 CSV
        with zipfile.ZipFile(filename) as z:
            # 获取 ZIP 中的 CSV 文件名
            csv_name = next(f for f in z.namelist() if f.lower().endswith('.csv'))
            
            # 解压缩并读取CSV
            with z.open(csv_name) as f:
                df = pd.read_csv(f, encoding='big5', low_memory=False)
            
            print(f"CSV 文件已读取，共 {len(df)} 笔数据，开始整理数据")
        
        # 筛选台指期货商品
        df['商品代號'] = df['商品代號'].str.strip()
        df_tx = df[df['商品代號'] == 'TX'].copy()  # 使用.copy()避免SettingWithCopyWarning
        
        # 排除差价合约
        df_tx.loc[:, '到期月份(週別)'] = df_tx['到期月份(週別)'].astype(str)
        df_tx = df_tx[~df_tx['到期月份(週別)'].str.contains('/')]
        front_month = str(pd.to_numeric(df_tx['到期月份(週別)']).min())
        
        # 仅保留近月合约
        df_tx = df_tx[df_tx['到期月份(週別)'].str.strip() == front_month]
        
        # 确保价格是数值型态
        df_tx['成交價格'] = pd.to_numeric(df_tx['成交價格'], errors='coerce')
        
        # 处理日期时间
        df_tx['datetime'] = pd.to_datetime(
            df_tx['成交日期'].astype(str) + ' ' + 
            df_tx['成交時間'].astype(str).str.zfill(6),
            format='%Y%m%d %H%M%S'
        )
        
        # 设定时间索引
        df_tx.set_index('datetime', inplace=True)
        
        # 整理1分K 开高低收
        ohlc = df_tx.resample('1Min').agg({
            '成交價格': [
                ('開盤價', 'first'),
                ('最高價', 'max'),
                ('最低價', 'min'),
                ('收盤價', 'last')
            ]
        })
        
        # 删除没数据K线
        ohlc = ohlc.dropna()
        ohlc.reset_index(inplace=True)
        
        # 重设栏位
        ohlc.columns = ['datetime', '開盤價', '最高價', '最低價', '收盤價']
        
        # 增加1分钟 15:01为第一根K线
        ohlc['datetime'] = ohlc['datetime'] + pd.Timedelta(minutes=1)  # 修正这里的拼写错误
        
        # 新增日期和时间栏位
        ohlc['日期'] = ohlc['datetime'].dt.strftime('%Y-%m-%d')
        ohlc['時間'] = ohlc['datetime'].dt.strftime('%H:%M:%S')
        
        # 转成Multichart使用的txt mapping
        result = ohlc[['日期', '時間', '開盤價', '最高價', '最低價', '收盤價']]
        txt_content = result.apply(
            lambda row: f"{row['日期']},{row['時間']},{row['開盤價']},{row['最高價']},{row['最低價']},{row['收盤價']}",
            axis=1
        ).str.cat(sep='\n')
        
        # 创建txt文件目录
        #txt_file = os.path.join(download_path, 'tx_1min')
        #os.makedirs(txt_file, exist_ok=True)
        
        # 保存txt文件
        tx_dir = os.path.join(download_dir, yesterday.strftime('%Y%m%d') + '.txt')
        
        with open(tx_dir, 'w', encoding='utf-8') as f:
            f.write(txt_content)
            
        print(f"已完成处理并保存至：{tx_dir}")
        
    except Exception as e:
        print(f"处理过程中发生错误：{e}")
        
    finally:
        yesterday = yesterday - timedelta(days=1)
        # 清理临时文件
        if os.path.exists(filename):
            os.remove(filename)

下载完成：https://www.taifex.com.tw/file/taifex/Dailydownload/DailydownloadCSV/Daily_2024_10_27.zip
处理过程中发生错误：File is not a zip file
下载完成：https://www.taifex.com.tw/file/taifex/Dailydownload/DailydownloadCSV/Daily_2024_10_26.zip
处理过程中发生错误：File is not a zip file
下载完成：https://www.taifex.com.tw/file/taifex/Dailydownload/DailydownloadCSV/Daily_2024_10_25.zip
CSV 文件已读取，共 405067 笔数据，开始整理数据
处理过程中发生错误：[Errno 22] Invalid argument: 'https://www.taifex.com.tw/file/taifex/Dailydownload/DailydownloadCSV/Daily_2024_10_25.zip\\20241025.txt'
下载完成：https://www.taifex.com.tw/file/taifex/Dailydownload/DailydownloadCSV/Daily_2024_10_24.zip
CSV 文件已读取，共 574284 笔数据，开始整理数据
处理过程中发生错误：[Errno 22] Invalid argument: 'https://www.taifex.com.tw/file/taifex/Dailydownload/DailydownloadCSV/Daily_2024_10_24.zip\\20241024.txt'


In [1]:
import pandas as pd
from datetime import datetime,timedelta,date
import os
from urllib.request import urlretrieve
import zipfile
from pathlib import Path

In [None]:
today = datetime.today()
yesterday = today

path = yesterday.strftime('%Y_%m_%d')
path = '2024_10_23'
name = 'Daily_'+ path
download_path = 'https://www.taifex.com.tw/file/taifex/Dailydownload/DailydownloadCSV/' + name + '.zip'
download_dir = os.path.join(os.getcwd(),"dailydata")

save_dir = os.path.join(download_dir,name )
save_path = os.path.join(save_dir, f'{name}.zip')
os.makedirs(save_dir,exist_ok=True)

print(download_path)
print(save_dir)


In [4]:
filename,headers = urlretrieve(download_path, save_path)

In [None]:
# 下載 ZIP 檔案
print(f"開始下載：{download_path}")

#下載
filename,_ = urlretrieve(download_path)
print(f"下載完成：{save_dir}")

# 解壓縮並讀取 CSV
with zipfile.ZipFile(filename) as z:
    # 取得 ZIP 中的 CSV 檔名
    csv_name = [x for x in z.namelist() if x.lower().endswith('.csv')][0]
    # 解壓縮 CSV 到相同目錄
    #csv_path = os.path.join(download_dir, csv_file)
    #zip_ref.extract(csv_file, download_dir)
    #print(f"已解壓縮 CSV 檔案：{csv_path}")
    with z.open(csv_name) as f:
        df = pd.read_csv(f,encoding='big5')

In [None]:
#urlretrieve(download_path, save_dir)

In [None]:
 # 讀取 CSV 檔案（使用 pandas）
print(f"CSV 檔案已讀取，共 {len(df)} 筆資料，開始整理資料")
df['商品代號'] = df['商品代號'].str.strip()
#篩選'TX'
df_tx = df[df['商品代號'] == 'TX']

#排除差價合約
df_tx['到期月份(週別)'] = df_tx['到期月份(週別)'].astype(str)
df_tx = df_tx[~df_tx['到期月份(週別)'].str.contains('/')]
front_month = str(pd.to_numeric(df_tx['到期月份(週別)']).min())
front_month
#僅保留近月合約
df_tx = df_tx[df_tx['到期月份(週別)'].str.strip() == front_month]

# 確保價格是數值型態
df_tx['成交價格'] = pd.to_numeric(df_tx['成交價格'], errors='coerce')

# 處理日期時間
df_tx['datetime'] = pd.to_datetime(
    df_tx['成交日期'].astype(str) + ' ' + 
    df_tx['成交時間'].astype(str).str.zfill(6),
    format='%Y%m%d %H%M%S')
df_tx

In [None]:
# 設定時間索引
df_tx.set_index('datetime', inplace=True)
df_tx

In [6]:
#整理1分K 開高低收
ohlc = df_tx.resample('1Min').agg({
    '成交價格': [
        ('開盤價', 'first'),
        ('最高價', 'max'),
        ('最低價', 'min'),
        ('收盤價', 'last')
    ]
})
#刪除沒資料K
ohlc = ohlc.dropna()
ohlc.reset_index(inplace=True)



In [None]:
ohlc

In [None]:
ohlc.columns = ['datetime','開盤價', '最高價', '最低價', '收盤價']
ohlc

In [None]:
#增加1分鐘 15:01為第一根K棒
ohlc['datetime'] = ohlc['datetime'] + pd.Timedelta(minutes =1)
ohlc

In [None]:
ohlc['日期'] = ohlc['datetime'].dt.strftime('%Y-%m-%d')
ohlc['時間'] = ohlc['datetime'].dt.strftime('%H:%M:%S')
ohlc

In [None]:
result= ohlc[['日期','時間','開盤價','最高價','最低價','收盤價']]
result

In [15]:
txt_content = result.apply(
    lambda row: f"{row['日期']},{row['時間']},{row['開盤價']},{row['最高價']},{row['最低價']},{row['收盤價']}",
    axis = 1
).str.cat(sep='\n')

In [None]:
txt_content

In [18]:
txt_file = os.path.join(save_dir, f'TX.txt')

with open(txt_file,'w',encoding='utf-8') as f:
    f.write(txt_content)

In [None]:
try:
    # 下載 ZIP 檔案
    print(f"開始下載：{download_path}")
    filename,_ = urlretrieve(download_path)
    print(f"下載完成：{save_dir}")
 
    # 解壓縮並讀取 CSV
    with zipfile.ZipFile(filename) as z:
        # 取得 ZIP 中的 CSV 檔名
        csv_name = [x for x in z.namelist() if x.lower().endswith('.csv')][0]

        # 解壓縮 CSV 到相同目錄
        #csv_path = os.path.join(download_dir, csv_file)
        #zip_ref.extract(csv_file, download_dir)

        #print(f"已解壓縮 CSV 檔案：{csv_path}")
        with z.open(csv_name) as f:
            df = pd.read_csv(f,encoding='big5')

        # 讀取 CSV 檔案（使用 pandas）

        print(f"CSV 檔案已讀取，共 {len(df)} 筆資料，開始整理資料")
        df['商品代號'] = df['商品代號'].str.strip()
        df_tx = df[df['商品代號'] == 'TX']
        df_tx['datetime'] = pd.to_datetime(df_tx['成交日期'].astype(str) + ' ' + df_tx['成交時間'].astype(str).str.zfill(6),format='%Y%m%d %H%M%S')

        #僅保留近月合約
        front_month = df_tx['到期月份(週別)'].str.strip().min()
        df_tx = df_tx[df_tx['到期月份(週別)'].str.strip() == front_month]
        df_tx.set_index('datetime',inplace=True)
        
        #整理1分K 開高低收
        ohlc = df_tx.resample('1Min').agg({
            '成交價格': [
                ('開盤價', 'first'),
                ('最高價', 'max'),
                ('最低價', 'min'),
                ('收盤價', 'last')
            ]
        })

        #刪除沒資料K
        ohlc = ohlc.dropna()
        ohlc.reset_index(inplace=True)

        #增加1分鐘 15:01為第一根K棒
        ohlc['datetime'] = ohlc['datetime'] + pd.timedelta(mintues =1)
        ohlc['日期'] = ohlc['datetime'].dt.strftime('%Y/%m/%d')
        ohlc['時間'] = ohlc['datetime'].dt.strftime('%H;%M')


        #轉成Multichart使用的txt mapping
        
        
        # 可以選擇是否要刪除原始的 ZIP 檔案
        # os.remove(save_dir)
        # print("已刪除 ZIP 檔案")
except Exception as e:
    print(f"處理過程中發生錯誤：{e}")

In [3]:
today = datetime.today()
download_time = today.strftime('%Y_%m_%d')
name = 'Daily_'+'2024_10_28'+'.zip'
download_path = 'https://www.taifex.com.tw/file/taifex/Dailydownload/DailydownloadCSV/' + name
current_dir = os.getcwd()
download_dir = os.path.join(current_dir,"dailydata")
save_dir = os.path.join(download_dir,name)

#確保下載目錄存在



In [None]:
try:
    # 下載 ZIP 檔案
    print(f"開始下載：{download_path}")
    urlretrieve(download_path, save_dir)
    print(f"下載完成：{save_dir}")
    
    # 解壓縮並讀取 CSV
    with zipfile.ZipFile(save_dir, 'r') as zip_ref:
        # 取得 ZIP 中的 CSV 檔名
        csv_file = next(f for f in zip_ref.namelist() if f.lower().endswith('.csv'))
        
        # 解壓縮 CSV 到相同目錄
        csv_path = os.path.join(download_dir, csv_file)
        zip_ref.extract(csv_file, download_dir)
        
        print(f"已解壓縮 CSV 檔案：{csv_path}")
        
        # 讀取 CSV 檔案（使用 pandas）
        try:
            # 先嘗試 utf-8
            df = pd.read_csv(csv_path)
        except UnicodeDecodeError:
            # 如果失敗，嘗試 big5
            df = pd.read_csv(csv_path, encoding='big5')
        
        print(f"CSV 檔案已讀取，共 {len(df)} 筆資料")
        
        # 可以選擇是否要刪除原始的 ZIP 檔案
        # os.remove(save_dir)
        # print("已刪除 ZIP 檔案")
        
except Exception as e:
    print(f"處理過程中發生錯誤：{e}")

# 如果你需要對 DataFrame 進行其他處理
# 例如：儲存為新的 CSV 檔案
# df.to_csv(os.path.join(download_dir, 'processed.csv'), index=False)

In [None]:
csv_path = os.path.join(download_dir,'Daily_2024_10_28.csv')
df = pd.read_csv(csv_path, encoding='big5')
df['商品代號'] = df['商品代號'].str.strip()
df

In [None]:
df_tx = df[df['商品代號']== 'TX']
df_tx

In [None]:
df_tx['datetime'] = pd.to_datetime(df_tx['成交日期'].astype(str) + ' ' + df_tx['成交時間'].astype(str).str.zfill(6),format='%Y%m%d %H%M%S')
df_tx

In [None]:
front_month = df_tx['到期月份(週別)'].str.strip().min()
front_month

In [None]:
df_tx = df_tx[df_tx['到期月份(週別)'].str.strip() == front_month]
df_tx

In [9]:
#df_tx = df_tx[(df_tx['datetime'].dt.time >= start_time) & (df_tx['datetime'].dt.time <= end_time)]

df_tx.set_index('datetime',inplace=True)

In [None]:
df_tx

In [None]:
price_filter = df_tx[df_tx['成交價格'] == 23538]
price_filter

In [11]:
ohlc = df_tx.resample('1Min').agg({
    '成交價格': [
        ('開盤價', 'first'),
        ('最高價', 'max'),
        ('最低價', 'min'),
        ('收盤價', 'last')
    ]
})

In [None]:
ohlc

In [None]:
t = ohlc.dropna()
t

In [17]:
t.reset_index(inplace=True)

In [None]:
t['datetime'] = t['datetime'] + pd.Timedelta(minutes=1)
t

In [None]:
df_tx.head(653)

In [14]:
test = os.path.join(download_dir,f'TX_tick.csv')
df_tx.to_csv(test,index=False,encoding='utf-8-sig')

In [None]:
tpd = df_tx.groupby(pd.Grouper(freq='1Min'))

In [None]:
tt = pd.DataFrame({
            '開盤價': tpd['成交價格'].first(),
            '最高價': tpd['成交價格'].max(),
            '最低價': tpd['成交價格'].min(),
            '收盤價': tpd['成交價格'].last()
        })
tt

In [9]:
ohlc = df_tx.resample('1Min').agg({
    '成交價格': [
        ('開盤價', 'first'),
        ('最高價', 'max'),
        ('最低價', 'min'),
        ('收盤價', 'last')
    ]
})

In [None]:
ohlc

In [None]:
filename,headers = urlretrieve(download_path,save_dir)
print(f"檔案下載到:{filename}")
print(f"檔案資訊:{headers}")

In [3]:
with zipfile.ZipFile(save_dir,'r') as zip_ref:
    file_list = zip_ref.namelist()
    rpt_file = next((f for f in file_list if f.lower().endswith('.rpt')),None)

    if not rpt_file:
        print('無檔案')

    content = zip_ref.read(rpt_file).decode('big5')


In [None]:
print(content)

In [None]:
rows = [row.strip().split(',') for row in content.strip().split('\n')]
columns = ['日期', '商品代號', '到期月份', '時間', '價格', '數量']
df = pd.DataFrame([row[:6] for row in rows[1:]], columns=columns)  # 只取前6個欄位
df = df[df['商品代號'].str.strip() == 'TX']
df

In [None]:
# 將價格轉換為浮點數
df['價格'] = pd.to_numeric(df['價格'], errors='coerce')
        
 # 組合日期和時間
df['完整時間'] = pd.to_datetime(
            df['日期'] + ' ' + df['時間'], 
            format='%Y%m%d %H%M%S'
        )
df

In [None]:
# 設定一分鐘的時間區間
df['分鐘'] = df['完整時間'].dt.floor('1min')
        
# 根據分鐘區間計算 OHLC
ohlc = df.groupby('分鐘').agg({
            '價格': ['first', 'max', 'min', 'last']
        }).reset_index()
# 分離日期和時間
ohlc['日期'] = ohlc['分鐘'].dt.strftime('%Y/%m/%d')
ohlc['時間'] = ohlc['分鐘'].dt.strftime('%H%M')        
ohlc

In [None]:
df

In [None]:
result = pd.DataFrame({
            '日期': ohlc['日期'],
            '時間': ohlc['時間'],
            '開盤價': ohlc['價格']['first'],
            '最高價': ohlc['價格']['max'],
            '最低價': ohlc['價格']['min'],
            '收盤價': ohlc['價格']['last']
        })
result

In [None]:
os.getcwd()

In [None]:
today = datetime.today()

for daynum in range(1,61):
    path = today.strftime('%Y_%m_%d')
    name = 'Daily_'+path+'.zip'
    print(name)
    try:
        urlretrieve('https://www.taifex.com.tw/file/taifex/Dailydownload/Dailydownload/'+name)
    except IOError as IOE:
        print('fault')
    
    yesterday = yesterday - timedelta(1)