In [115]:
import crawlers as cs
import crawler_decorator as cd

In [116]:
import numpy as np
import pandas as pd

@cd.try_loop_decorator(times=5, sleep_time=2)
@cd.record_error_decorator('stock_crawler_error.log')
def stock_crawler(stock_id, year, month):
    res_dict=cs.get_stock_month_price(stock_id, year, month)
    
    # 沒有報錯但沒有抓到資料
    if not 'data' in res_dict: raise ValueError('size=0')
    
    values_np=np.array(res_dict['data'])
    col_list=res_dict['fields']
    stock_df=pd.DataFrame(values_np, columns=col_list)
    stock_df['股票代號']=stock_id
    stock_df=stock_df.drop("漲跌價差", axis = 1)
    return stock_df

In [117]:
from copy import deepcopy

@cd.record_error_decorator('format_error.log')
def stock_df_format(stock_df):
    new_df=deepcopy(stock_df)
    
    # 將欄位名稱變成英文的
    replace_dict={
        # 日期＆成交量
        '日期':'date', '成交股數':'volume', '成交金額':'value',
        # 開高低收
        '開盤價':'open','最高價':'high','最低價':'low','收盤價':'close',
        # 其他
        '成交筆數':'record','股票代號':'stock_id'
    }
    new_df.columns=list(map(replace_dict.__getitem__, new_df.columns))
    
    # 將date欄位的日期改成mysql格式
    def change_date_format(old_date):
        # '110/01/04' -> '2021-01-04'
        old_date_list = old_date.split('/')
        year = int(old_date_list[0])+1911
        return f"{year}-{old_date_list[1]}-{old_date_list[2]}"
    new_df['date']=new_df['date'].apply(change_date_format)
    
    # 將數值欄位的「,」去掉並數值化
    new_df['volume'] = new_df['volume'].apply(lambda x: int(x.replace(',','')))
    new_df['value'] = new_df['value'].apply(lambda x: int(x.replace(',','')))
    new_df['open'] = new_df['open'].apply(lambda x: float(x.replace(',','')))
    new_df['high'] = new_df['high'].apply(lambda x: float(x.replace(',','')))
    new_df['low'] = new_df['low'].apply(lambda x: float(x.replace(',','')))
    new_df['close'] = new_df['close'].apply(lambda x: float(x.replace(',','')))
    new_df['record'] = new_df['record'].apply(lambda x: int(x.replace(',','')))
    
    return new_df

In [118]:
import requests

def get_0050_list():
    payload={
    'action': 'GetShareholdingDetails',
    'stockId': '0050'
    }

    res=requests.post( "https://www.cmoney.tw/etf/ashx/e210.ashx", data=payload)
    res=res.json()
    
    s0050_list=[ d['CommKey'] for d in res['Data']]
    return s0050_list

In [None]:
total_df=pd.DataFrame()

for stock_id in get_0050_list()[:5]:
    for year in range(2021,2023):
        for month in range(1,3):
            y=str(year)
            m='0'+str(month) if month<10 else str(month)
            
            print(stock_id,y,m)
            
            df=stock_crawler(stock_id=stock_id, year=y, month=m)
            if isinstance(df, pd.core.frame.DataFrame):
                df=stock_df_format(df)
                if isinstance(df, pd.core.frame.DataFrame):
                    total_df=pd.concat([total_df,df],axis=0,ignore_index=True)
            
    total_df.to_csv('list0050.csv',index=False)