In [1]:
import requests
from bs4 import BeautifulSoup
from collections import defaultdict
import yfinance as yf
import os
from datetime import datetime as DT


HEADER = {'USER-AGENT': "Mozilla/5.0"}
FMT = "https://tw.stock.yahoo.com/class-quote?sectorId=%d&exchange=TAI"

SECTORS ={1:"cement",
        2:"food",
        3:"plastic",
        4:"textile",
        6:"electric",
        7:"electricalcable",
        37:"chemistry",
        38:"biotech",
        9:"glass",
        10:"paper",
        11:"steel",
        12:"rubber",
        13:"motor",
        40:"semiconductor",
        41:"computer",
        42:"photoelectric",
        43:"communication",
        44:"electronicparts",
        45:"electricappliance",
        46:"itservice",
        47:"otherelectric",
        19:"construction",
        20:"shipping",
        21:"sightseeing",
        22:"finance",
        24:"departmentstore",
        39:"gasoline",
        }

DIR= os.getcwd() + "/stock"
INDEX_MAP_FILE = 'index_map_tw.txt'
FEAT_INDEX_FILE = "quotes.txt"

In [2]:
!grep @ index.txt | awk -F ' ' '{print $2}' > quotes.txt

In [3]:
def log(fmt):
    print("[INFO]", fmt)


In [4]:
def get_yh_text(sector):
    queryStr = FMT % sector
    log("GET :" + queryStr)
    r = requests.get(queryStr, headers=HEADER)
    if (r.status_code == requests.codes.ok):
        return r.text

In [5]:
def get_indexes_from_sector(html):
    soup = BeautifulSoup(html)
    tbl = soup.find('div', class_='Pos(r) Ov(h) ClassQuotesTable')
    ch = tbl.find_all('div', class_='Lh(20px) Fw(600) Fz(16px) Ell')
    ind = tbl.find_all('span', class_='Fz(14px) C(#979ba7) Ell')
    index = []
    for z in zip(ch,ind):
        index.append((z[0].text,z[1].text))
    return index

In [6]:
def get_indexes_from_yh():
    indexes = {}
    for sector, name in SECTORS.items():
        text = get_yh_text(sector)
        indexes[name] = get_indexes_from_sector(text)
    return indexes
        

In [7]:
def print_indexes_statics(indexes, silent=False):
    count = defaultdict(lambda : 0)
    i = 0
    summ = 0
    for x, vec in indexes.items():
        count[x] = len(vec)
        summ += len(vec)
    for key, val in count.items():
        i += 1
        if (not silent):
            print(i, key, val)
    return summ

In [18]:
def query(symbol, period="99y"):
    df = yf.Ticker(symbol)
    d = df.history(period=period)
    return d

def dump(dfData, path):
    dfData.to_pickle(path)


In [9]:
def collect_data_tw(indexes):
    summ = print_indexes_statics(indexes, True)
    indexFName = DIR + "/" + INDEX_MAP_FILE
    os.makedirs(DIR, exist_ok=True)

    fp = open(indexFName, "w+", encoding='utf8')
    totalCnt = 1

    log('Start ' + str(DT.now()))
    for cate, vec in indexes.items():
        fp.write(f"{cate}={len(vec)}\n")
        
        cateDir = DIR + "/" + cate
        os.makedirs(cateDir, exist_ok=True)
        
        for ind, symbolPair in enumerate(vec):
            chn, symbol = symbolPair
            dest = cateDir + "/" + symbol + ".pickle"
            df = query(symbol)
            fp.write(f"{chn},{symbol}\n")
            log(f"Succeed to collect {chn}:{symbol}, data since {df.index[0]}")
            log(f"{totalCnt}/{summ}: Dumping {chn} {symbol} to {dest}")
            dump(df, dest)
            totalCnt += 1
    log('End ' + str(DT.now()))
            
        

In [10]:
indexes = get_indexes_from_yh()

[INFO] GET :https://tw.stock.yahoo.com/class-quote?sectorId=1&exchange=TAI
[INFO] GET :https://tw.stock.yahoo.com/class-quote?sectorId=2&exchange=TAI
[INFO] GET :https://tw.stock.yahoo.com/class-quote?sectorId=3&exchange=TAI
[INFO] GET :https://tw.stock.yahoo.com/class-quote?sectorId=4&exchange=TAI
[INFO] GET :https://tw.stock.yahoo.com/class-quote?sectorId=6&exchange=TAI
[INFO] GET :https://tw.stock.yahoo.com/class-quote?sectorId=7&exchange=TAI
[INFO] GET :https://tw.stock.yahoo.com/class-quote?sectorId=37&exchange=TAI
[INFO] GET :https://tw.stock.yahoo.com/class-quote?sectorId=38&exchange=TAI
[INFO] GET :https://tw.stock.yahoo.com/class-quote?sectorId=9&exchange=TAI
[INFO] GET :https://tw.stock.yahoo.com/class-quote?sectorId=10&exchange=TAI
[INFO] GET :https://tw.stock.yahoo.com/class-quote?sectorId=11&exchange=TAI
[INFO] GET :https://tw.stock.yahoo.com/class-quote?sectorId=12&exchange=TAI
[INFO] GET :https://tw.stock.yahoo.com/class-quote?sectorId=13&exchange=TAI
[INFO] GET :https:/

In [11]:
print_indexes_statics(indexes)

1 cement 8
2 food 23
3 plastic 22
4 textile 30
5 electric 30
6 electricalcable 15
7 chemistry 28
8 biotech 30
9 glass 5
10 paper 7
11 steel 30
12 rubber 11
13 motor 30
14 semiconductor 30
15 computer 30
16 photoelectric 30
17 communication 30
18 electronicparts 30
19 electricappliance 23
20 itservice 11
21 otherelectric 30
22 construction 30
23 shipping 25
24 sightseeing 17
25 finance 30
26 departmentstore 19
27 gasoline 8


612

In [12]:
import datetime
str(datetime.datetime.now())

'2023-07-05 00:41:02.427002'

In [13]:
collect_data_tw(indexes)

[INFO] Start 2023-07-05 00:41:05.012064
[INFO] Succeed to collect 台泥:1101.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 1/612: Dumping 台泥 1101.TW to C:\Users\Public\Choi\workspace\data\stock/stock/cement/1101.TW.pickle
[INFO] Succeed to collect 亞泥:1102.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 2/612: Dumping 亞泥 1102.TW to C:\Users\Public\Choi\workspace\data\stock/stock/cement/1102.TW.pickle
[INFO] Succeed to collect 嘉泥:1103.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 3/612: Dumping 嘉泥 1103.TW to C:\Users\Public\Choi\workspace\data\stock/stock/cement/1103.TW.pickle
[INFO] Succeed to collect 環泥:1104.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 4/612: Dumping 環泥 1104.TW to C:\Users\Public\Choi\workspace\data\stock/stock/cement/1104.TW.pickle
[INFO] Succeed to collect 幸福:1108.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 5/612: Dumping 幸福 1108.TW to C:\Users\Public\Choi\workspace\data\stock/stock/cement/1108.TW.pickle
[INFO] Succeed to collect 信大:1109.TW, data since 2000-0

[INFO] Succeed to collect 地球:1324.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 46/612: Dumping 地球 1324.TW to C:\Users\Public\Choi\workspace\data\stock/stock/plastic/1324.TW.pickle
[INFO] Succeed to collect 恆大:1325.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 47/612: Dumping 恆大 1325.TW to C:\Users\Public\Choi\workspace\data\stock/stock/plastic/1325.TW.pickle
[INFO] Succeed to collect 台化:1326.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 48/612: Dumping 台化 1326.TW to C:\Users\Public\Choi\workspace\data\stock/stock/plastic/1326.TW.pickle
[INFO] Succeed to collect 再生-KY:1337.TW, data since 2011-08-17 00:00:00+08:00
[INFO] 49/612: Dumping 再生-KY 1337.TW to C:\Users\Public\Choi\workspace\data\stock/stock/plastic/1337.TW.pickle
[INFO] Succeed to collect 勝悅-KY:1340.TW, data since 2013-12-06 00:00:00+08:00
[INFO] 50/612: Dumping 勝悅-KY 1340.TW to C:\Users\Public\Choi\workspace\data\stock/stock/plastic/1340.TW.pickle
[INFO] Succeed to collect 富林-KY:1341.TW, data since 2018-12-24 00:00:00+0

[INFO] Succeed to collect 華城:1519.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 91/612: Dumping 華城 1519.TW to C:\Users\Public\Choi\workspace\data\stock/stock/electric/1519.TW.pickle
[INFO] Succeed to collect 日馳:1526.TW, data since 2000-03-28 00:00:00+08:00
[INFO] 92/612: Dumping 日馳 1526.TW to C:\Users\Public\Choi\workspace\data\stock/stock/electric/1526.TW.pickle
[INFO] Succeed to collect 鑽全:1527.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 93/612: Dumping 鑽全 1527.TW to C:\Users\Public\Choi\workspace\data\stock/stock/electric/1527.TW.pickle
[INFO] Succeed to collect 恩德:1528.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 94/612: Dumping 恩德 1528.TW to C:\Users\Public\Choi\workspace\data\stock/stock/electric/1528.TW.pickle
[INFO] Succeed to collect 樂事綠能:1529.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 95/612: Dumping 樂事綠能 1529.TW to C:\Users\Public\Choi\workspace\data\stock/stock/electric/1529.TW.pickle
[INFO] Succeed to collect 亞崴:1530.TW, data since 2000-01-04 00:00:00+08:00
[

[INFO] Succeed to collect 和桐:1714.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 135/612: Dumping 和桐 1714.TW to C:\Users\Public\Choi\workspace\data\stock/stock/chemistry/1714.TW.pickle
[INFO] Succeed to collect 長興:1717.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 136/612: Dumping 長興 1717.TW to C:\Users\Public\Choi\workspace\data\stock/stock/chemistry/1717.TW.pickle
[INFO] Succeed to collect 中纖:1718.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 137/612: Dumping 中纖 1718.TW to C:\Users\Public\Choi\workspace\data\stock/stock/chemistry/1718.TW.pickle
[INFO] Succeed to collect 三晃:1721.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 138/612: Dumping 三晃 1721.TW to C:\Users\Public\Choi\workspace\data\stock/stock/chemistry/1721.TW.pickle
[INFO] Succeed to collect 台肥:1722.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 139/612: Dumping 台肥 1722.TW to C:\Users\Public\Choi\workspace\data\stock/stock/chemistry/1722.TW.pickle
[INFO] Succeed to collect 中碳:1723.TW, data since 2000-01-04 00:00:00+0

[INFO] Succeed to collect 訊映:4155.TW, data since 2012-06-20 00:00:00+08:00
[INFO] 180/612: Dumping 訊映 4155.TW to C:\Users\Public\Choi\workspace\data\stock/stock/biotech/4155.TW.pickle
[INFO] Succeed to collect 承業醫:4164.TW, data since 2012-10-03 00:00:00+08:00
[INFO] 181/612: Dumping 承業醫 4164.TW to C:\Users\Public\Choi\workspace\data\stock/stock/biotech/4164.TW.pickle
[INFO] Succeed to collect 佐登-KY:4190.TW, data since 2015-10-21 00:00:00+08:00
[INFO] 182/612: Dumping 佐登-KY 4190.TW to C:\Users\Public\Choi\workspace\data\stock/stock/biotech/4190.TW.pickle
[INFO] Succeed to collect 華廣:4737.TW, data since 2010-12-23 00:00:00+08:00
[INFO] 183/612: Dumping 華廣 4737.TW to C:\Users\Public\Choi\workspace\data\stock/stock/biotech/4737.TW.pickle
[INFO] Succeed to collect 台耀:4746.TW, data since 2011-03-01 00:00:00+08:00
[INFO] 184/612: Dumping 台耀 4746.TW to C:\Users\Public\Choi\workspace\data\stock/stock/biotech/4746.TW.pickle
[INFO] Succeed to collect 光麗-KY:6431.TW, data since 2014-12-04 00:00:00+

[INFO] Succeed to collect 長榮鋼:2211.TW, data since 2020-01-13 00:00:00+08:00
[INFO] 225/612: Dumping 長榮鋼 2211.TW to C:\Users\Public\Choi\workspace\data\stock/stock/steel/2211.TW.pickle
[INFO] Succeed to collect 豐達科:3004.TW, data since 2002-02-26 00:00:00+08:00
[INFO] 226/612: Dumping 豐達科 3004.TW to C:\Users\Public\Choi\workspace\data\stock/stock/steel/3004.TW.pickle
[INFO] Succeed to collect 三星:5007.TW, data since 2007-12-31 00:00:00+08:00
[INFO] 227/612: Dumping 三星 5007.TW to C:\Users\Public\Choi\workspace\data\stock/stock/steel/5007.TW.pickle
[INFO] Succeed to collect 東明-KY:5538.TW, data since 2013-12-16 00:00:00+08:00
[INFO] 228/612: Dumping 東明-KY 5538.TW to C:\Users\Public\Choi\workspace\data\stock/stock/steel/5538.TW.pickle
[INFO] Succeed to collect 南港:2101.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 229/612: Dumping 南港 2101.TW to C:\Users\Public\Choi\workspace\data\stock/stock/rubber/2101.TW.pickle
[INFO] Succeed to collect 泰豐:2102.TW, data since 2000-01-04 00:00:00+08:00
[INF

[INFO] Succeed to collect 麗正:2302.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 270/612: Dumping 麗正 2302.TW to C:\Users\Public\Choi\workspace\data\stock/stock/semiconductor/2302.TW.pickle
[INFO] Succeed to collect 聯電:2303.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 271/612: Dumping 聯電 2303.TW to C:\Users\Public\Choi\workspace\data\stock/stock/semiconductor/2303.TW.pickle
[INFO] Succeed to collect 華泰:2329.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 272/612: Dumping 華泰 2329.TW to C:\Users\Public\Choi\workspace\data\stock/stock/semiconductor/2329.TW.pickle
[INFO] Succeed to collect 台積電:2330.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 273/612: Dumping 台積電 2330.TW to C:\Users\Public\Choi\workspace\data\stock/stock/semiconductor/2330.TW.pickle
[INFO] Succeed to collect 旺宏:2337.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 274/612: Dumping 旺宏 2337.TW to C:\Users\Public\Choi\workspace\data\stock/stock/semiconductor/2337.TW.pickle
[INFO] Succeed to collect 光罩:2338.TW, data since

[INFO] Succeed to collect 廣達:2382.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 314/612: Dumping 廣達 2382.TW to C:\Users\Public\Choi\workspace\data\stock/stock/computer/2382.TW.pickle
[INFO] Succeed to collect 精元:2387.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 315/612: Dumping 精元 2387.TW to C:\Users\Public\Choi\workspace\data\stock/stock/computer/2387.TW.pickle
[INFO] Succeed to collect 研華:2395.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 316/612: Dumping 研華 2395.TW to C:\Users\Public\Choi\workspace\data\stock/stock/computer/2395.TW.pickle
[INFO] Succeed to collect 友通:2397.TW, data since 2000-01-17 00:00:00+08:00
[INFO] 317/612: Dumping 友通 2397.TW to C:\Users\Public\Choi\workspace\data\stock/stock/computer/2397.TW.pickle
[INFO] Succeed to collect 映泰:2399.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 318/612: Dumping 映泰 2399.TW to C:\Users\Public\Choi\workspace\data\stock/stock/computer/2399.TW.pickle
[INFO] Succeed to collect 輔信:2405.TW, data since 2000-01-04 00:00:00+08:00


[INFO] Succeed to collect 揚明光:3504.TW, data since 2007-01-26 00:00:00+08:00
[INFO] 358/612: Dumping 揚明光 3504.TW to C:\Users\Public\Choi\workspace\data\stock/stock/photoelectric/3504.TW.pickle
[INFO] Succeed to collect 晶彩科:3535.TW, data since 2008-01-31 00:00:00+08:00
[INFO] 359/612: Dumping 晶彩科 3535.TW to C:\Users\Public\Choi\workspace\data\stock/stock/photoelectric/3535.TW.pickle
[INFO] Succeed to collect 台揚:2314.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 360/612: Dumping 台揚 2314.TW to C:\Users\Public\Choi\workspace\data\stock/stock/communication/2314.TW.pickle
[INFO] Succeed to collect 東訊:2321.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 361/612: Dumping 東訊 2321.TW to C:\Users\Public\Choi\workspace\data\stock/stock/communication/2321.TW.pickle
[INFO] Succeed to collect 友訊:2332.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 362/612: Dumping 友訊 2332.TW to C:\Users\Public\Choi\workspace\data\stock/stock/communication/2332.TW.pickle
[INFO] Succeed to collect 智邦:2345.TW, data sin

[INFO] Succeed to collect 凱美:2375.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 401/612: Dumping 凱美 2375.TW to C:\Users\Public\Choi\workspace\data\stock/stock/electronicparts/2375.TW.pickle
[INFO] Succeed to collect 台光電:2383.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 402/612: Dumping 台光電 2383.TW to C:\Users\Public\Choi\workspace\data\stock/stock/electronicparts/2383.TW.pickle
[INFO] Succeed to collect 群光:2385.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 403/612: Dumping 群光 2385.TW to C:\Users\Public\Choi\workspace\data\stock/stock/electronicparts/2385.TW.pickle
[INFO] Succeed to collect 正崴:2392.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 404/612: Dumping 正崴 2392.TW to C:\Users\Public\Choi\workspace\data\stock/stock/electronicparts/2392.TW.pickle
[INFO] Succeed to collect 毅嘉:2402.TW, data since 2000-01-17 00:00:00+08:00
[INFO] 405/612: Dumping 毅嘉 2402.TW to C:\Users\Public\Choi\workspace\data\stock/stock/electronicparts/2402.TW.pickle
[INFO] Succeed to collect 環科:2413.TW, 

[INFO] Succeed to collect 凌群:2453.TW, data since 2001-05-22 00:00:00+08:00
[INFO] 444/612: Dumping 凌群 2453.TW to C:\Users\Public\Choi\workspace\data\stock/stock/itservice/2453.TW.pickle
[INFO] Succeed to collect 華經:2468.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 445/612: Dumping 華經 2468.TW to C:\Users\Public\Choi\workspace\data\stock/stock/itservice/2468.TW.pickle
[INFO] Succeed to collect 資通:2471.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 446/612: Dumping 資通 2471.TW to C:\Users\Public\Choi\workspace\data\stock/stock/itservice/2471.TW.pickle
[INFO] Succeed to collect 敦陽科:2480.TW, data since 2000-01-05 00:00:00+08:00
[INFO] 447/612: Dumping 敦陽科 2480.TW to C:\Users\Public\Choi\workspace\data\stock/stock/itservice/2480.TW.pickle
[INFO] Succeed to collect 零壹:3029.TW, data since 2000-01-24 00:00:00+08:00
[INFO] 448/612: Dumping 零壹 3029.TW to C:\Users\Public\Choi\workspace\data\stock/stock/itservice/3029.TW.pickle
[INFO] Succeed to collect 傳奇:4994.TW, data since 2011-12-13 00:00:00

[INFO] Succeed to collect 名軒:1442.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 488/612: Dumping 名軒 1442.TW to C:\Users\Public\Choi\workspace\data\stock/stock/construction/1442.TW.pickle
[INFO] Succeed to collect 大將:1453.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 489/612: Dumping 大將 1453.TW to C:\Users\Public\Choi\workspace\data\stock/stock/construction/1453.TW.pickle
[INFO] Succeed to collect 怡華:1456.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 490/612: Dumping 怡華 1456.TW to C:\Users\Public\Choi\workspace\data\stock/stock/construction/1456.TW.pickle
[INFO] Succeed to collect 三洋實業:1472.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 491/612: Dumping 三洋實業 1472.TW to C:\Users\Public\Choi\workspace\data\stock/stock/construction/1472.TW.pickle
[INFO] Succeed to collect 寶徠:1805.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 492/612: Dumping 寶徠 1805.TW to C:\Users\Public\Choi\workspace\data\stock/stock/construction/1805.TW.pickle
[INFO] Succeed to collect 潤隆:1808.TW, data since 20

[INFO] Succeed to collect 慧洋-KY:2637.TW, data since 2010-12-01 00:00:00+08:00
[INFO] 532/612: Dumping 慧洋-KY 2637.TW to C:\Users\Public\Choi\workspace\data\stock/stock/shipping/2637.TW.pickle
[INFO] Succeed to collect 宅配通:2642.TW, data since 2013-03-12 00:00:00+08:00
[INFO] 533/612: Dumping 宅配通 2642.TW to C:\Users\Public\Choi\workspace\data\stock/stock/shipping/2642.TW.pickle
[INFO] Succeed to collect 長榮航太:2645.TW, data since 2022-02-18 00:00:00+08:00
[INFO] 534/612: Dumping 長榮航太 2645.TW to C:\Users\Public\Choi\workspace\data\stock/stock/shipping/2645.TW.pickle
[INFO] Succeed to collect 遠雄港:5607.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 535/612: Dumping 遠雄港 5607.TW to C:\Users\Public\Choi\workspace\data\stock/stock/shipping/5607.TW.pickle
[INFO] Succeed to collect 四維航:5608.TW, data since 2003-08-25 00:00:00+08:00
[INFO] 536/612: Dumping 四維航 5608.TW to C:\Users\Public\Choi\workspace\data\stock/stock/shipping/5608.TW.pickle
[INFO] Succeed to collect 龍德造船:6753.TW, data since 2021-01-

[INFO] Succeed to collect 玉山金:2884.TW, data since 2002-01-29 00:00:00+08:00
[INFO] 576/612: Dumping 玉山金 2884.TW to C:\Users\Public\Choi\workspace\data\stock/stock/finance/2884.TW.pickle
[INFO] Succeed to collect 元大金:2885.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 577/612: Dumping 元大金 2885.TW to C:\Users\Public\Choi\workspace\data\stock/stock/finance/2885.TW.pickle
[INFO] Succeed to collect 兆豐金:2886.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 578/612: Dumping 兆豐金 2886.TW to C:\Users\Public\Choi\workspace\data\stock/stock/finance/2886.TW.pickle
[INFO] Succeed to collect 台新金:2887.TW, data since 2002-02-19 00:00:00+08:00
[INFO] 579/612: Dumping 台新金 2887.TW to C:\Users\Public\Choi\workspace\data\stock/stock/finance/2887.TW.pickle
[INFO] Succeed to collect 新光金:2888.TW, data since 2002-02-20 00:00:00+08:00
[INFO] 580/612: Dumping 新光金 2888.TW to C:\Users\Public\Choi\workspace\data\stock/stock/finance/2888.TW.pickle
[INFO] Succeed to collect 國票金:2889.TW, data since 2002-03-26 00:00:00+

In [14]:
def get_index_from_file(filename):
    ##
    indexes = []
    fp = open(filename, "r")
    for i in fp.readlines():
        if i == "":
            break
        indexes.append(i.strip())
    fp.close()
    return indexes

def checkValid(indexList):
    cnt = len(indexList)
    for i, symbol in enumerate(indexList):
        df = yf.Ticker(symbol)
        f = df.history(period="1d", start="2023-05-24", end="2023-05-26")
        try:
            print(f"{i+1}/{cnt} OK {symbol} {f.index[0]}")
        except:
            print(f"{i+1}/{cnt} FAIL {symbol} {df.info} {f}")
            #return False

def collect_data_deps(checkValid=False):
    #
    fName = FEAT_INDEX_FILE
    indexes = get_index_from_file(fName)
    log('Start ' + str(DT.now()))
    if checkValid:
        ret = checkValid(indexes)
        if (not ret):
            return
    total = len(indexes)
    cnt = 1
    cateDir = DIR + "/deps"
    os.makedirs(cateDir, exist_ok=True)

    
    for ind, symbol in enumerate(indexes):
        dest = cateDir + "/" + symbol + ".pickle"
        df = query(symbol)
    
        log(f"Succeed to collect {symbol}, data since {df.index[0]}")
        log(f"{cnt}/{total}: Dumping {symbol} to {dest}")
        dump(df, dest)
        cnt += 1

    log('End ' + str(DT.now()))

In [15]:
collect_data_deps()

[INFO] Start 2023-07-05 00:44:56.680203
[INFO] Succeed to collect ^DJI, data since 1992-01-02 00:00:00-05:00
[INFO] 1/219: Dumping ^DJI to C:\Users\Public\Choi\workspace\data\stock/stock/deps/^DJI.pickle
[INFO] Succeed to collect ^GSPC, data since 1927-12-30 00:00:00-05:00
[INFO] 2/219: Dumping ^GSPC to C:\Users\Public\Choi\workspace\data\stock/stock/deps/^GSPC.pickle
[INFO] Succeed to collect ^IXIC, data since 1971-02-05 00:00:00-05:00
[INFO] 3/219: Dumping ^IXIC to C:\Users\Public\Choi\workspace\data\stock/stock/deps/^IXIC.pickle
[INFO] Succeed to collect ^VIX, data since 1990-01-02 00:00:00-06:00
[INFO] 4/219: Dumping ^VIX to C:\Users\Public\Choi\workspace\data\stock/stock/deps/^VIX.pickle
[INFO] Succeed to collect ^SOX, data since 1994-05-04 00:00:00-04:00
[INFO] 5/219: Dumping ^SOX to C:\Users\Public\Choi\workspace\data\stock/stock/deps/^SOX.pickle
[INFO] Succeed to collect ^N225, data since 1965-01-05 00:00:00+09:00
[INFO] 6/219: Dumping ^N225 to C:\Users\Public\Choi\workspace\da

[INFO] Succeed to collect ^NDX, data since 1985-10-01 00:00:00-04:00
[INFO] 50/219: Dumping ^NDX to C:\Users\Public\Choi\workspace\data\stock/stock/deps/^NDX.pickle
[INFO] Succeed to collect ^SP100, data since 1982-08-02 00:00:00-04:00
[INFO] 51/219: Dumping ^SP100 to C:\Users\Public\Choi\workspace\data\stock/stock/deps/^SP100.pickle
[INFO] Succeed to collect ^HUI, data since 1996-06-04 00:00:00-04:00
[INFO] 52/219: Dumping ^HUI to C:\Users\Public\Choi\workspace\data\stock/stock/deps/^HUI.pickle
[INFO] Succeed to collect ^XAU, data since 1983-12-19 00:00:00-05:00
[INFO] 53/219: Dumping ^XAU to C:\Users\Public\Choi\workspace\data\stock/stock/deps/^XAU.pickle
[INFO] Succeed to collect ^DJGSP, data since 2005-11-01 00:00:00-05:00
[INFO] 54/219: Dumping ^DJGSP to C:\Users\Public\Choi\workspace\data\stock/stock/deps/^DJGSP.pickle
[INFO] Succeed to collect ^GSPTSE, data since 1979-06-29 00:00:00-04:00
[INFO] 55/219: Dumping ^GSPTSE to C:\Users\Public\Choi\workspace\data\stock/stock/deps/^GSP

[INFO] Succeed to collect LE=F, data since 2002-03-04 00:00:00-05:00
[INFO] 98/219: Dumping LE=F to C:\Users\Public\Choi\workspace\data\stock/stock/deps/LE=F.pickle
[INFO] Succeed to collect CC=F, data since 2000-01-03 00:00:00-05:00
[INFO] 99/219: Dumping CC=F to C:\Users\Public\Choi\workspace\data\stock/stock/deps/CC=F.pickle
[INFO] Succeed to collect KC=F, data since 2000-01-03 00:00:00-05:00
[INFO] 100/219: Dumping KC=F to C:\Users\Public\Choi\workspace\data\stock/stock/deps/KC=F.pickle
[INFO] Succeed to collect CT=F, data since 2000-01-03 00:00:00-05:00
[INFO] 101/219: Dumping CT=F to C:\Users\Public\Choi\workspace\data\stock/stock/deps/CT=F.pickle
[INFO] Succeed to collect OJ=F, data since 2021-09-07 00:00:00-04:00
[INFO] 102/219: Dumping OJ=F to C:\Users\Public\Choi\workspace\data\stock/stock/deps/OJ=F.pickle
[INFO] Succeed to collect SB=F, data since 2000-03-01 00:00:00-05:00
[INFO] 103/219: Dumping SB=F to C:\Users\Public\Choi\workspace\data\stock/stock/deps/SB=F.pickle
[INFO]

[INFO] Succeed to collect MRK, data since 1962-01-02 00:00:00-05:00
[INFO] 148/219: Dumping MRK to C:\Users\Public\Choi\workspace\data\stock/stock/deps/MRK.pickle
[INFO] Succeed to collect PFE, data since 1972-06-01 00:00:00-04:00
[INFO] 149/219: Dumping PFE to C:\Users\Public\Choi\workspace\data\stock/stock/deps/PFE.pickle
[INFO] Succeed to collect WBA, data since 1980-03-17 00:00:00-05:00
[INFO] 150/219: Dumping WBA to C:\Users\Public\Choi\workspace\data\stock/stock/deps/WBA.pickle
[INFO] Succeed to collect MMM, data since 1962-01-02 00:00:00-05:00
[INFO] 151/219: Dumping MMM to C:\Users\Public\Choi\workspace\data\stock/stock/deps/MMM.pickle
[INFO] Succeed to collect WMT, data since 1972-08-25 00:00:00-04:00
[INFO] 152/219: Dumping WMT to C:\Users\Public\Choi\workspace\data\stock/stock/deps/WMT.pickle
[INFO] Succeed to collect KO, data since 1962-01-02 00:00:00-05:00
[INFO] 153/219: Dumping KO to C:\Users\Public\Choi\workspace\data\stock/stock/deps/KO.pickle
[INFO] Succeed to collect

[INFO] Succeed to collect 2610.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 197/219: Dumping 2610.TW to C:\Users\Public\Choi\workspace\data\stock/stock/deps/2610.TW.pickle
[INFO] Succeed to collect 2633.TW, data since 2007-12-31 00:00:00+08:00
[INFO] 198/219: Dumping 2633.TW to C:\Users\Public\Choi\workspace\data\stock/stock/deps/2633.TW.pickle
[INFO] Succeed to collect 5706.TW, data since 2007-12-31 00:00:00+08:00
[INFO] 199/219: Dumping 5706.TW to C:\Users\Public\Choi\workspace\data\stock/stock/deps/5706.TW.pickle
[INFO] Succeed to collect 2748.TW, data since 2015-11-02 00:00:00+08:00
[INFO] 200/219: Dumping 2748.TW to C:\Users\Public\Choi\workspace\data\stock/stock/deps/2748.TW.pickle
[INFO] Succeed to collect 2912.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 201/219: Dumping 2912.TW to C:\Users\Public\Choi\workspace\data\stock/stock/deps/2912.TW.pickle
[INFO] Succeed to collect 2206.TW, data since 2000-01-04 00:00:00+08:00
[INFO] 202/219: Dumping 2206.TW to C:\Users\Public\Ch

In [19]:
## partial fix
indexes
cat = "electricappliance"
symbol = "3312.TW"
dest = "stock/" + cat + "/" + symbol +".pickle"

df = query(symbol, "max")
log(f"Succeed to collect {cat}:{symbol}, data since {df.index[0]} {df.index[-1]}")
log(f"Dumping {cat} {symbol} to {dest}")
dump(df, dest)

[INFO] Succeed to collect electricappliance:3312.TW, data since 2011-12-29 00:00:00+08:00 2023-07-04 00:00:00+08:00
[INFO] Dumping electricappliance 3312.TW to stock/electricappliance/3312.TW.pickle
