In [1]:
import sys,os
sys.path.append("..")
import django
django.setup()
import pandas as pd
from io import StringIO
import requests
import datetime
from crawlers.models import *
from crawlers.finlab.import_tools import *

In [2]:
class CrawlStockTiiMarketReportTW:
    def __init__(self, date):
        self.date = date
        self.date_str = date.strftime("%Y%m%d")
        self.target_name = "台股三大法人全市場日報資訊"
        self.sub_market = ["sii", "otc"]

    def crawl_sii(self):
        r= requests.get('http://www.twse.com.tw/fund/BFI82U?response=csv&dayDate='+self.date_str+'&type=day')
        try:
            df = pd.read_csv(StringIO(r.text), header=1).dropna(how='all', axis=1).dropna(how='any')
        except pd.errors.EmptyDataError:
            return None

        df = df.astype(str).apply(lambda s: s.str.replace(',',''))
        df.iloc[:,1:]=df.iloc[:,1:].apply(lambda s: pd.to_numeric(s, errors='coerce')).dropna(how='all', axis=1)
        df = df.rename(
                       columns={'單位名稱':'stock_id','買進金額':'buy_price',
                                '賣出金額':'sell_price','買賣差額':'net'})

        df = df.set_index(['stock_id'])
        df = df.rename(index={
                          '自營商(自行買賣)':'上市自營商_自行買賣','自營商(避險)':'上市自營商_避險',
                          '投信':'上市投信','外資及陸資(不含外資自營商)':'上市外資及陸資_不含外資自營商',
                          '外資自營商':'上市外資自營商','合計':'上市三大法人合計'})
        df=df.T
        df['上市外資及陸資合計']= df['上市外資及陸資_不含外資自營商']+ df['上市外資自營商']
        df=df.T.reset_index()
        df["date"] = pd.to_datetime(self.date)
        return df
    
    def crawl_otc(self):

        y = str(int(self.date.strftime("%Y")) - 1911)
        date_str = y + "/" + self.date.strftime("%m") + "/" + self.date.strftime("%d")
        r = requests.get('http://www.tpex.org.tw/web/stock/3insti/3insti_summary/3itrdsum_result.php?l=zh-tw&o=csv&se=EW&t=D&d='+date_str+'&s=0,asc')
        try:
            df = pd.read_csv(StringIO(r.text), header=1).dropna(how='all', axis=1).dropna(how='any')
        except pd.errors.ParserError:
            return None  
        df = df.astype(str).apply(lambda s: s.str.replace(',','').str.replace('',''))
        df.iloc[:,1:]=df.iloc[:,1:].apply(lambda s: pd.to_numeric(s, errors='coerce')).dropna(how='all', axis=1)
        df = df.rename(
                       columns={'單位名稱':'stock_id','買進金額(元)':'buy_price',
                                '賣出金額(元)':'sell_price','買賣超(元)':'net'})
        df["date"] = pd.to_datetime(self.date)
        df = df.set_index(['stock_id'])
        df = df.rename(index={'　外資及陸資(不含自營商)':'  上櫃外資及陸資＿不含自營商','　外資自營商':'上櫃外資自營商',
                              '　自營商(自行買賣)':'上櫃自營商_自行買賣','　自營商(避險)':'自營商_避險',
                              '三大法人合計*':'上櫃三大法人合計','外資及陸資合計':'上櫃外資及陸資合計',
                              '投信':'上櫃投信','自營商合計':'上櫃自營商合計'
                             
                             })
        df=df.reset_index()        

        return df

    def crawl_main(self):
        try:
            df = pd.concat([self.crawl_sii(),self.crawl_otc()],sort=False)
        except ValueError:
            return None
        return df
    

import datetime
df=CrawlStockTiiMarketReportTW(datetime.datetime(2020,4,29)).crawl_main()
df

Unnamed: 0,stock_id,buy_price,sell_price,net,date
0,上市自營商_自行買賣,1822489340,2738197405,-915708065,2020-04-29
1,上市自營商_避險,4766342425,5630686084,-864343659,2020-04-29
2,上市投信,2542950420,1442640872,1100309548,2020-04-29
3,上市外資及陸資_不含外資自營商,58331207463,40304383994,18026823469,2020-04-29
4,上市外資自營商,3859320,3390670,468650,2020-04-29
5,上市三大法人合計,67462989648,50115908355,17347081293,2020-04-29
6,上市外資及陸資合計,58335066783,40307774664,18027292119,2020-04-29
0,上櫃外資及陸資＿不含自營商,4425807269,4290301968,135505301,2020-04-29
1,上櫃外資自營商,0,0,0,2020-04-29
2,上櫃自營商_自行買賣,452083260,279586370,172496890,2020-04-29


In [4]:
df=CrawlStockTiiMarketReportTW(datetime.datetime(2020,4,29)).crawl_main()
add_to_sql(StockTiiMarketReportTW,df)

create  Stock_id:上市自營商_自行買賣
create  Stock_id:上市自營商_避險
create  Stock_id:上市投信
create  Stock_id:上市外資及陸資_不含外資自營商
create  Stock_id:上市外資自營商
create  Stock_id:上市三大法人合計
create  Stock_id:上市外資及陸資合計
create  Stock_id:  上櫃外資及陸資＿不含自營商
create  Stock_id:上櫃外資自營商
create  Stock_id:上櫃自營商_自行買賣
create  Stock_id:自營商_避險
create  Stock_id:上櫃三大法人合計
create  Stock_id:上櫃外資及陸資合計
create  Stock_id:上櫃投信
create  Stock_id:上櫃自營商合計
Finish <class 'crawlers.models.StockTiiMarketReportTW'>date:2020-04-29T00:00:00.000000000 bulk_create:15
Finish <class 'crawlers.models.StockTiiMarketReportTW'>date:2020-04-29T00:00:00.000000000 bulk_update:0


  result = self._query(query)
