In [3]:
import sys,os
sys.path.append("..")
import django
django.setup()
import pandas as pd
from io import StringIO
import requests
import datetime

In [69]:
class CrawlStockIndexVolTW:
    def __init__(self, date):
        self.date = date
        self.date_str = date.strftime("%Y%m%d")
        self.target_name = "台股指數成交量資訊"
        self.sub_market = ["sii", "otc"]  
    
    def sii_vol(self):

        r = requests.post('http://www.twse.com.tw/exchangeReport/BFIAMU?response=csv&date=' + self.date_str )

        content = r.text.replace('=', '')

        lines = content.split('\n')
        lines = list(filter(lambda l:len(l.split('",')) > 4, lines))

        content = "\n".join(lines)
        if content == '':
            return None
        df = pd.read_csv(StringIO(content))
        df = df.astype(str)
        df = df.apply(lambda s: s.str.replace(',', ''))


        df = df.rename(columns={'分類指數名稱':'stock_id','成交股數':'turnover_vol','成交金額':'turnover_price','成交筆數':'turnover_num'})
        df['date'] = pd.to_datetime(self.date)
        df.loc[:,['turnover_vol','turnover_price','turnover_num']]=df.loc[:,['turnover_vol','turnover_price','turnover_num']].apply(lambda s:pd.to_numeric(s, errors='coerce'))
        df=df.drop(columns=['漲跌指數','Unnamed: 5'])
        df['stock_id']=df['stock_id'].apply(lambda s:'上市'+s)
        return df
    
    def sii_statistic(self):
        r = requests.post('http://www.twse.com.tw/exchangeReport/MI_INDEX?response=csv&date=' + self.date_str + '&type=MS' )
        content = r.text.replace('=', '')

        lines = content.split('\n')
        lines = list(filter(lambda l:len(l.split('",'))<6, lines))
        lines=lines[1:]

        content = "\n".join(lines)
        if content == '':
            return None
        df = pd.read_csv(StringIO(content))
        df = df.astype(str)
        df = df.apply(lambda s: s.str.replace(',', ''))
        df = df.rename(columns={'成交統計':'stock_id','成交金額(元)':'turnover_price',
                                  '成交股數(股)':'turnover_vol','成交筆數':'turnover_num'})
        df=df.drop(columns={'Unnamed: 4'})
        df['date'] = pd.to_datetime(self.date)
        df.iloc[:,1:4]=df.iloc[:,1:4].apply(lambda s:pd.to_numeric(s, errors='coerce'))
        df=df.dropna()
        df['stock_id']=df['stock_id'].apply(lambda s:'上市'+s[s.index(".")+1:] if "." in s else '上市'+s)
        return df

    def otc_statistic(self):
        y = str(int(self.date.strftime("%Y")) - 1911)
        date_str = y + "/" + self.date.strftime("%m") + "/" + self.date.strftime("%d")
        link = 'https://www.tpex.org.tw/web/stock/aftertrading/market_statistics/statistics_result.php?l=zh-tw&t=D&o=htm&d='+date_str
        r = requests.get(link)
        lines = r.text.replace("\r", "").split("\n")
        if len(lines)<35:
            return None
        df = pd.read_html(StringIO("\n".join(lines[3:])), header=None)[0]
        df=pd.DataFrame(df)
        df.columns = df.columns.get_level_values(1)
        df = df.astype(str)
        df = df.apply(lambda s: s.str.replace(',', ''))
        df = df.rename(columns={'成交統計':'stock_id','成交金額(元)':'turnover_price',
                                  '成交股數(股)':'turnover_vol','成交筆數':'turnover_num'})
        df=df.loc[:,['stock_id','turnover_vol','turnover_price','turnover_num']]
        df['date'] = pd.to_datetime(self.date)
        df.iloc[:,1:4]=df.iloc[:,1:4].apply(lambda s:pd.to_numeric(s, errors='coerce'))
        df=df.dropna()
        df['stock_id']=df['stock_id'].apply(lambda s:'上櫃'+s[s.index(".")+1:] if "." in s else '上櫃'+s)
        return df

    def crawl_main(self):
        try:
            df = pd.concat([self.sii_vol(), self.sii_statistic(), self.otc_statistic()],sort=False)
        except ValueError:
            return None
        return df
    
z=CrawlStockIndexVolTW(datetime.datetime(2005,4,25)).crawl_main()   
z

Unnamed: 0,stock_id,turnover_vol,turnover_price,turnover_num,date
0,上市水泥類,9335000.0,141027000.0,2281.0,2005-04-25
1,上市食品類,8592000.0,84301000.0,1910.0,2005-04-25
2,上市塑膠類,97436000.0,2409965000.0,16202.0,2005-04-25
3,上市紡織纖維類,35967000.0,446891000.0,6764.0,2005-04-25
4,上市電機機械類,18422000.0,355481000.0,5289.0,2005-04-25
5,上市電器電纜類,21221000.0,241379000.0,3178.0,2005-04-25
6,上市化學工業類,18905000.0,418448000.0,4880.0,2005-04-25
7,上市玻璃陶瓷類,4187000.0,41002000.0,886.0,2005-04-25
8,上市造紙類,8800000.0,94430000.0,1892.0,2005-04-25
9,上市鋼鐵類,46750000.0,1077050000.0,9983.0,2005-04-25
