In [1]:
import sys,os
sys.path.append("..")
import django
django.setup()
from io import StringIO
import requests
import datetime
from crawlers.models import *
from crawlers.finlab.import_tools import *


In [13]:
class CrawlStockTiiMarketReportTW:
    def __init__(self, date):
        self.date = date
        self.date_str = date.strftime("%Y%m%d")
        self.target_name = "台股三大法人全市場日報資訊"
        self.sub_market = ["sii", "otc"]
        self.format = "time_series"

    def crawl_sii(self):
        r = requests.get('http://www.twse.com.tw/fund/BFI82U?response=csv&dayDate=' + self.date_str + '&type=day')
        try:
            df = pd.read_csv(StringIO(r.text), header=1).dropna(how='all', axis=1).dropna(how='any')
        except pd.errors.EmptyDataError:
            return None
        df = df.astype(str).apply(lambda s: s.str.replace(',', ''))
        df.iloc[:, 1:] = df.iloc[:, 1:].apply(lambda s: pd.to_numeric(s, errors='coerce')).dropna(how='all', axis=1)
        
        df = df.rename(
            columns={'單位名稱': 'stock_id', '買進金額': 'buy_price',
                     '賣出金額': 'sell_price', '買賣差額': 'net'})
        df = df.set_index(['stock_id'])
        
        df = df.reset_index()
        df["date"] = self.date.date()
        df['market']='上市'
        return df

    def crawl_otc(self):
        y = str(int(self.date.strftime("%Y")) - 1911)
        date_str = y + "/" + self.date.strftime("%m") + "/" + self.date.strftime("%d")
        r = requests.get(
            'http://www.tpex.org.tw/web/stock/3insti/3insti_summary/3itrdsum_result.php?l=zh-tw&o=csv&se=EW&t=D&p=0&d='
            + date_str + '&s=0,asc')
        try:
            df = pd.read_csv(StringIO(r.text), header=1)
        except pd.errors.ParserError:
            return None
        if len(df) < 3:
            return None
        df = df.astype(str).apply(lambda s: s.str.replace(',', '').str.replace('', ''))
        df.iloc[:, 1:] = df.iloc[:, 1:].apply(lambda s: pd.to_numeric(s, errors='coerce')).dropna(how='all', axis=1)
        df = df.rename(
            columns={'單位名稱': 'stock_id', '買進金額(元)': 'buy_price',
                     '賣出金額(元)': 'sell_price', '買賣超(元)': 'net'})
        df["date"] = self.date.date()
        df['stock_id'] = [col.replace('\u3000', '') for col in df['stock_id']]
        df = df.set_index(['stock_id'])
        df = df.reset_index()
        df['market']='上櫃'
        df=df.dropna(thresh=4)
        return df

    def crawl_main(self):
        try:
            df = pd.concat([self.crawl_sii(), self.crawl_otc()], sort=False)
        except ValueError:
            return None
        return df
    

import datetime
df=CrawlStockTiiMarketReportTW(datetime.datetime(2009,1,5)).crawl_main()
df

Unnamed: 0,stock_id,buy_price,sell_price,net,date,market
0,自營商,2411797740,960463992,1451333748,2009-01-05,上市
1,投信,2447486420,2088292973,359193447,2009-01-05,上市
2,外資,13356112384,9214738283,4141374101,2009-01-05,上市
3,合計,18215396544,12263495248,5951901296,2009-01-05,上市
0,合計,518319285,394890206,123429079,2009-01-05,上櫃
1,外資,126933645,88122573,38811072,2009-01-05,上櫃
2,投信,240822330,205354551,35467779,2009-01-05,上櫃
3,自營商,150563310,101413082,49150228,2009-01-05,上櫃


In [15]:
df=CrawlStockTiiMarketReportTW(datetime.datetime(2009,1,12)).crawl_otc()
df

In [None]:
df=CrawlStockTiiMarketReportTW(datetime.datetime(2020,4,29)).crawl_main()
add_to_sql(StockTiiMarketReportTW,df)