In [2]:
import os 
import json
import requests
import pandas as pd
from io import StringIO
from time import sleep
from random import randint
from datetime import datetime, timedelta

In [8]:
def toNumeric(x):
    try:
        return float(str(x).replace(',', ''))
    except:
        return -1
    
def strToFloat(pdf, num_cols):
    tmp = pdf.copy()
    for col in num_cols:
        tmp.loc[:, col] = tmp[col].apply(toNumeric)
    return tmp

def toRocYear(date_str, sep='/'):
    YEAR_OFFSET = 1911
    split = date_str.split(sep)
    ori_year = split[0]
    new_year = str(int(ori_year) - YEAR_OFFSET)
    new_date_str = date_str.replace(ori_year, new_year)
    return new_date_str

start_dt = '2020/07/15'
end_dt = '2020/07/15'
output_path = "/Users/fang/stock_data/basic_data"
# output_path = "C:\\Users\\Fang\\PycharmProjects\\股票分析\\stock_data\\basic_data"
# output_path = "C:\\Users\\Flora\\Documents\\股票分析\\stock_data\\basic_data"

if not os.path.exists(output_path):
    os.makedirs(output_path, exist_ok=True)

dt = datetime.strptime(start_dt, '%Y/%m/%d')
end_dt = datetime.strptime(end_dt, '%Y/%m/%d')

tpex_url = 'https://www.tpex.org.tw/web/stock/aftertrading/daily_close_quotes/stk_quote_result.php?l=zh-tw&d=%s'
twse_url = 'https://www.twse.com.tw/exchangeReport/MI_INDEX?response=csv&date=%s&type=ALL'
tpex_header = ['股票代號', '名稱', '收盤價', '漲跌價差', '開盤價', '最高價', '最低價', '均價', '成交股數', '成交金額', 
               '成交筆數', '最後買價', '最後買量', '最後賣價', '最後賣量', '發行股數', '次日參考價', '次日漲停價', '次日跌停價']
header_list = ['股票代號', '成交股數', '成交金額', '開盤價', '最高價', '最低價', '收盤價', '漲跌價差', '成交筆數']
header_str = '股票代號,日期,成交股數,成交金額,開盤價,最高價,最低價,收盤價,漲跌價差,漲跌幅,成交筆數\n'
while dt <= end_dt:
    if dt.weekday() in range(0, 5):
        print(dt.strftime('%Y/%m/%d'))
        
        roc_dt = toRocYear(dt.strftime('%Y/%m/%d'))
        tpex_resp = requests.get(tpex_url % roc_dt)
        json_obj = json.loads(tpex_resp.text)
        aaData = json_obj['aaData']
        tpex_pdf = pd.DataFrame(aaData, columns=tpex_header)
        tpex_pdf = tpex_pdf[tpex_pdf['股票代號'].str.len() == 4]
        tpex_pdf = tpex_pdf[header_list]
        
        twse_resp = requests.get(twse_url % dt.strftime('%Y%m%d'))
        if twse_resp.text != '':
            twse_pdf = pd.read_csv(StringIO(twse_resp.text.replace("=", "")), header=["證券代號" in l for l in twse_resp.text.split("\n")].index(True)-1)
            twse_pdf = twse_pdf[twse_pdf['證券代號'].str.len() == 4]
            twse_pdf['漲跌(+/-)'] = twse_pdf['漲跌(+/-)'].map(lambda x: 1 if x=='+' else -1)
            twse_pdf['漲跌價差'] = twse_pdf['漲跌價差']*twse_pdf['漲跌(+/-)']
            twse_pdf = twse_pdf.rename(columns={'證券代號':'股票代號', '證券名稱':'名稱'})
            twse_pdf = twse_pdf[header_list]
        else:
            dt = dt + timedelta(days=1)
            sleep(randint(3, 5))
            continue
        
        pdf = tpex_pdf.append(twse_pdf)
        pdf = strToFloat(pdf, ['收盤價', '開盤價', '漲跌價差', '最高價', '最低價', '成交股數', '成交金額', '成交筆數'])
        pdf['前日收盤價'] = pdf['收盤價']-pdf['漲跌價差']
        pdf['漲跌幅'] = pdf['漲跌價差']/pdf['前日收盤價']*100
        pdf = pdf.round(2)

        for row in pdf.iterrows():
            data = row[1]
            stock_no = data['股票代號']
            volume = data['成交股數']
            transaction = data['成交筆數']
            turnover = data['成交金額']
            close_price = data['收盤價']
            open_price = data['開盤價']
            high_price = data['最高價']
            low_price = data['最低價']
            change_price = data['漲跌價差']
            change_ratio = data['漲跌幅']
            row = "{},{},{},{},{},{},{},{},{},{},{}\n".format(stock_no, dt.strftime('%Y-%m-%d'), volume, turnover, 
                                                   open_price, high_price, low_price, close_price,
                                                   change_price, change_ratio, transaction)
            file_path = output_path + os.sep + stock_no + '.csv'
            
            if os.path.exists(file_path):
                file = open(file_path, 'a')
                file.write(row)
            else:
                file = open(file_path, 'w')
                file.write(header_str)
                file.write(row)
            file.close()
    dt = dt + timedelta(days=1)
    sleep(randint(1, 4))

2020/07/15


In [24]:
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'}


In [99]:
headers = {'cache-control': 'max-age=0',
'cookie': 'cf_clearance=4122fca9d7c55c6e33e15c0492a35c9a35f89da1-1594739376-0-1z973bc509z1a52a93azfd7ef126-150; __cfduid=db357d501a43b15bb9a4bb0a706eaf5ab1594739376; BID=B52353EA-C16A-4C79-B437-72B44A106599; 085F0A94-2C8A-4817-82E9-74152948EEB5=1; static-085F0A94-2C8A-4817-82E9-74152948EEB5=1; BrowserMode=Web; hblid=VvDZ4gUxDPHMiCXC3h7B70H6tFBVaoAL; _smt_uid=5f0dcab4.2c2b83f0; _fbp=fb.1.1594739380295.1099859670; _hjid=497f3ece-97a4-4393-82b2-fd2797da6ef6; _okdetect=%7B%22token%22%3A%2215947393806860%22%2C%22proto%22%3A%22https%3A%22%2C%22host%22%3A%22www.wantgoo.com%22%7D; _ga=GA1.2.366745891.1594739381; _gid=GA1.2.738501783.1594739381; olfsk=olfsk13041462922606728; _ok=8391-691-10-7433; wcsid=O0RXxgFJr9WdlG1W3h7B70H6oAGLBrb4; _hjAbsoluteSessionInProgress=1; _okbk=cd4%3Dtrue%2Cvi5%3D0%2Cvi4%3D1594816051136%2Cvi3%3Dactive%2Cvi2%3Dfalse%2Cvi1%3Dfalse%2Ccd8%3Dchat%2Ccd6%3D0%2Ccd5%3Daway%2Ccd3%3Dfalse%2Ccd2%3D0%2Ccd1%3D0%2C; __gads=ID=22b22356bd6664bf:T=1594816095:S=ALNI_MZs6U8VIrqVrC_TuK24TaaS4ucyjA; _gat_gtag_UA_6993262_2=1; _oklv=1594816523332%2CO0RXxgFJr9WdlG1W3h7B70H6oAGLBrb4',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'none',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36',
'Content-Type':'application/json;charset=UTF-8'}
session = requests.Session()
resp = session.get("https://www.wantgoo.com/stock/twstock/class?id=2", headers=headers)

In [105]:
session = requests.Session()

In [108]:
headers = {'cache-control': 'max-age=0',
'cookie': 'cf_clearance=4122fca9d7c55c6e33e15c0492a35c9a35f89da1-1594739376-0-1z973bc509z1a52a93azfd7ef126-150; __cfduid=db357d501a43b15bb9a4bb0a706eaf5ab1594739376; BID=B52353EA-C16A-4C79-B437-72B44A106599; 085F0A94-2C8A-4817-82E9-74152948EEB5=1; static-085F0A94-2C8A-4817-82E9-74152948EEB5=1; BrowserMode=Web; hblid=VvDZ4gUxDPHMiCXC3h7B70H6tFBVaoAL; _smt_uid=5f0dcab4.2c2b83f0; _fbp=fb.1.1594739380295.1099859670; _hjid=497f3ece-97a4-4393-82b2-fd2797da6ef6; _okdetect=%7B%22token%22%3A%2215947393806860%22%2C%22proto%22%3A%22https%3A%22%2C%22host%22%3A%22www.wantgoo.com%22%7D; _ga=GA1.2.366745891.1594739381; _gid=GA1.2.738501783.1594739381; olfsk=olfsk13041462922606728; _ok=8391-691-10-7433; wcsid=O0RXxgFJr9WdlG1W3h7B70H6oAGLBrb4; _okbk=cd4%3Dtrue%2Cvi5%3D0%2Cvi4%3D1594816051136%2Cvi3%3Dactive%2Cvi2%3Dfalse%2Cvi1%3Dfalse%2Ccd8%3Dchat%2Ccd6%3D0%2Ccd5%3Daway%2Ccd3%3Dfalse%2Ccd2%3D0%2Ccd1%3D0%2C; __gads=ID=22b22356bd6664bf:T=1594816095:S=ALNI_MZs6U8VIrqVrC_TuK24TaaS4ucyjA; _okac=654f004a07f29c5fff4e10d9e720d070; _okla=1; _fbc=fb.1.1594817389108.IwAR3yhsvHpkM-FyJIzHbHgrS_AJVbgopMK9BSSUeCsKAtOvb5KB3OvUxrLkY; _hjAbsoluteSessionInProgress=1; _oklv=1594819155034%2CO0RXxgFJr9WdlG1W3h7B70H6oAGLBrb4',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36',
'x-requested-with':'XMLHttpRequest'}
payload = {'types':'8', 
           'stockno': 2330, 
           'brokerid':9800,
           'dtStart': '20200715',
           'dtEnd': '20200715'}
r = session.post('https://www.wantgoo.com/stock/adl/agentsataapi', headers=headers, data=payload)
r

<Response [200]>

In [109]:
r.text

'[{"券商名稱":0,"buycount":148,"buyprice":369,"sellcount":48,"sellprice":369,"css":"up","balance":"+100","avgprice":369},{"券商名稱":0,"buycount":138,"buyprice":368.5,"sellcount":55,"sellprice":368.5,"css":"up","balance":"+83","avgprice":368.5},{"券商名稱":0,"buycount":55,"buyprice":362,"sellcount":30,"sellprice":362,"css":"up","balance":"+25","avgprice":362},{"券商名稱":0,"buycount":24,"buyprice":366.5,"sellcount":2,"sellprice":366.5,"css":"up","balance":"+22","avgprice":366.5},{"券商名稱":0,"buycount":28,"buyprice":365.5,"sellcount":16,"sellprice":365.5,"css":"up","balance":"+12","avgprice":365.5},{"券商名稱":0,"buycount":1,"buyprice":361.5,"sellcount":1,"sellprice":361.5,"css":"up","balance":"+0","avgprice":361.5},{"券商名稱":0,"buycount":92,"buyprice":368,"sellcount":97,"sellprice":368,"css":"dn","balance":"-5","avgprice":368},{"券商名稱":0,"buycount":16,"buyprice":370,"sellcount":26,"sellprice":370,"css":"dn","balance":"-10","avgprice":370},{"券商名稱":0,"buycount":40,"buyprice":367.5,"sellcount":62,"sellprice":367.

In [89]:
a

'[{"券商名稱":0,"buycount":1084,"buyprice":368,"sellcount":605,"sellprice":368,"css":"up","balance":"+479","avgprice":368},{"券商名稱":0,"buycount":777,"buyprice":355,"sellcount":348,"sellprice":355,"css":"up","balance":"+429","avgprice":355},{"券商名稱":0,"buycount":620,"buyprice":368.5,"sellcount":307,"sellprice":368.5,"css":"up","balance":"+313","avgprice":368.5},{"券商名稱":0,"buycount":600,"buyprice":367.5,"sellcount":320,"sellprice":367.5,"css":"up","balance":"+280","avgprice":367.5},{"券商名稱":0,"buycount":479,"buyprice":356.5,"sellcount":285,"sellprice":356.5,"css":"up","balance":"+194","avgprice":356.5},{"券商名稱":0,"buycount":642,"buyprice":369.5,"sellcount":490,"sellprice":369.5,"css":"up","balance":"+152","avgprice":369.5},{"券商名稱":0,"buycount":682,"buyprice":369,"sellcount":532,"sellprice":369,"css":"up","balance":"+150","avgprice":369},{"券商名稱":0,"buycount":693,"buyprice":357,"sellcount":562,"sellprice":357,"css":"up","balance":"+131","avgprice":357},{"券商名稱":0,"buycount":170,"buyprice":365.5,"se

'[{"券商名稱":0,"buycount":1084,"buyprice":368,"sellcount":605,"sellprice":368,"css":"up","balance":"+479","avgprice":368},{"券商名稱":0,"buycount":777,"buyprice":355,"sellcount":348,"sellprice":355,"css":"up","balance":"+429","avgprice":355},{"券商名稱":0,"buycount":620,"buyprice":368.5,"sellcount":307,"sellprice":368.5,"css":"up","balance":"+313","avgprice":368.5},{"券商名稱":0,"buycount":600,"buyprice":367.5,"sellcount":320,"sellprice":367.5,"css":"up","balance":"+280","avgprice":367.5},{"券商名稱":0,"buycount":479,"buyprice":356.5,"sellcount":285,"sellprice":356.5,"css":"up","balance":"+194","avgprice":356.5},{"券商名稱":0,"buycount":642,"buyprice":369.5,"sellcount":490,"sellprice":369.5,"css":"up","balance":"+152","avgprice":369.5},{"券商名稱":0,"buycount":682,"buyprice":369,"sellcount":532,"sellprice":369,"css":"up","balance":"+150","avgprice":369},{"券商名稱":0,"buycount":693,"buyprice":357,"sellcount":562,"sellprice":357,"css":"up","balance":"+131","avgprice":357},{"券商名稱":0,"buycount":170,"buyprice":365.5,"se