In [None]:
import os 
import json
import requests
import pandas as pd
from io import StringIO
from time import sleep
from random import randint
from datetime import datetime, timedelta

In [2]:
def toNumeric(x):
    try:
        return float(str(x).replace(',', ''))
    except:
        return -1
    
def strToFloat(pdf, num_cols):
    tmp = pdf.copy()
    for col in num_cols:
        tmp.loc[:, col] = tmp[col].apply(toNumeric)
    return tmp

def toRocYear(date_str, sep='/'):
    YEAR_OFFSET = 1911
    split = date_str.split(sep)
    ori_year = split[0]
    new_year = str(int(ori_year) - YEAR_OFFSET)
    new_date_str = date_str.replace(ori_year, new_year)
    return new_date_str

now = datetime.now()
start_dt = now.strftime('%Y/%m/%d')
end_dt = now.strftime('%Y/%m/%d')
output_path = "/Users/fang/stock_data/basic_data"
# output_path = "C:\\Users\\Fang\\PycharmProjects\\股票分析\\stock_data\\basic_data"
# output_path = "C:\\Users\\Flora\\Documents\\股票分析\\stock_data\\basic_data"

if not os.path.exists(output_path):
    os.makedirs(output_path, exist_ok=True)

dt = datetime.strptime(start_dt, '%Y/%m/%d')
end_dt = datetime.strptime(end_dt, '%Y/%m/%d')

tpex_url = 'https://www.tpex.org.tw/web/stock/aftertrading/daily_close_quotes/stk_quote_result.php?l=zh-tw&d=%s'
twse_url = 'https://www.twse.com.tw/exchangeReport/MI_INDEX?response=csv&date=%s&type=ALL'
tpex_header = ['股票代號', '名稱', '收盤價', '漲跌價差', '開盤價', '最高價', '最低價', '均價', '成交股數', '成交金額', 
               '成交筆數', '最後買價', '最後買量', '最後賣價', '最後賣量', '發行股數', '次日參考價', '次日漲停價', '次日跌停價']
header_list = ['股票代號', '成交股數', '成交金額', '開盤價', '最高價', '最低價', '收盤價', '漲跌價差', '成交筆數']
header_str = '股票代號,日期,成交股數,成交金額,開盤價,最高價,最低價,收盤價,漲跌價差,漲跌幅,成交筆數\n'
while dt <= end_dt:
    if dt.weekday() in range(0, 5):
        print(dt.strftime('%Y/%m/%d'))
        
        roc_dt = toRocYear(dt.strftime('%Y/%m/%d'))
        tpex_resp = requests.get(tpex_url % roc_dt)
        json_obj = json.loads(tpex_resp.text)
        aaData = json_obj['aaData']
        tpex_pdf = pd.DataFrame(aaData, columns=tpex_header)
        tpex_pdf = tpex_pdf[tpex_pdf['股票代號'].str.len() == 4]
        tpex_pdf = tpex_pdf[header_list]
        
        twse_resp = requests.get(twse_url % dt.strftime('%Y%m%d'))
        if twse_resp.text != '':
            twse_pdf = pd.read_csv(StringIO(twse_resp.text.replace("=", "")), header=["證券代號" in l for l in twse_resp.text.split("\n")].index(True)-1)
            twse_pdf = twse_pdf[twse_pdf['證券代號'].str.len() == 4]
            twse_pdf['漲跌(+/-)'] = twse_pdf['漲跌(+/-)'].map(lambda x: 1 if x=='+' else -1)
            twse_pdf['漲跌價差'] = twse_pdf['漲跌價差']*twse_pdf['漲跌(+/-)']
            twse_pdf = twse_pdf.rename(columns={'證券代號':'股票代號', '證券名稱':'名稱'})
            twse_pdf = twse_pdf[header_list]
        else:
            dt = dt + timedelta(days=1)
            sleep(randint(3, 5))
            continue
        
        pdf = tpex_pdf.append(twse_pdf)
        pdf = strToFloat(pdf, ['收盤價', '開盤價', '漲跌價差', '最高價', '最低價', '成交股數', '成交金額', '成交筆數'])
        pdf['前日收盤價'] = pdf['收盤價']-pdf['漲跌價差']
        pdf['漲跌幅'] = pdf['漲跌價差']/pdf['前日收盤價']*100
        pdf = pdf.round(2)

        for row in pdf.iterrows():
            data = row[1]
            stock_no = data['股票代號']
            volume = data['成交股數']
            transaction = data['成交筆數']
            turnover = data['成交金額']
            close_price = data['收盤價']
            open_price = data['開盤價']
            high_price = data['最高價']
            low_price = data['最低價']
            change_price = data['漲跌價差']
            change_ratio = data['漲跌幅']
            row = "{},{},{},{},{},{},{},{},{},{},{}\n".format(stock_no, dt.strftime('%Y-%m-%d'), volume, turnover, 
                                                   open_price, high_price, low_price, close_price,
                                                   change_price, change_ratio, transaction)
            file_path = output_path + os.sep + stock_no + '.csv'
            
            if os.path.exists(file_path):
                file = open(file_path, 'a')
                file.write(row)
            else:
                file = open(file_path, 'w')
                file.write(header_str)
                file.write(row)
            file.close()
    dt = dt + timedelta(days=1)
    sleep(randint(1, 4))