In [None]:
import sys,os
sys.path.append("..")
import django
django.setup()
import pandas as pd
from io import StringIO
import requests
import datetime
from crawlers.models import *
from crawlers.finlab.import_tools import *

In [None]:
class CrawlStockTiiTW:
    def __init__(self, date):
        self.date = date
        self.date_str = date.strftime("%Y%m%d")
        self.target_name = "台股三大法人個股買賣超資訊"
        self.sub_market = ["sii", "otc", "rotc"]

    def crawl_sii(self):
        r= requests.get('http://www.tse.com.tw/fund/T86?response=csv&date='+self.date_str+'&selectType=ALLBUT0999')
        try:
            df = pd.read_csv(StringIO(r.text), header=1).dropna(how='all', axis=1).dropna(how='any')
        except pd.errors.EmptyDataError:
            return None

        df = df.astype(str).apply(lambda s: s.str.replace(',',''))
        df['證券代號'] = df['證券代號'].str.replace('=','').str.replace('"','')
        df.iloc[:,2:]=df.iloc[:,2:].apply(lambda s: pd.to_numeric(s, errors='coerce')).dropna(how='all', axis=1)
        df =  df.rename(columns={'證券代號':'stock_id','證券名稱':'stock_name',
                                  '外陸資買進股數(不含外資自營商)':'fm_buy','外陸資賣出股數(不含外資自營商)':'fm_sell',
                                  '外陸資買賣超股數(不含外資自營商)':'fm_net','外資自營商買進股數':'fd_buy',
                                  '外資自營商賣出股數':'fd_sell','外資自營商買賣超股數':'fd_net',
                                  '投信買進股數':'itc_buy','投信賣出股數':'itc_sell',
                                  '投信買賣超股數':'itc_net','自營商買賣超股數':'dealer_net',
                                  '自營商買進股數(自行買賣)':'dealer_ppt_buy','自營商賣出股數(自行買賣)':'dealer_ppt_sell',
                                  '自營商買賣超股數(自行買賣)':'dealer_ppt_net','自營商買進股數(避險)':'dealer_hedge_buy',
                                  '自營商賣出股數(避險)':'dealer_hedge_sell','自營商買賣超股數(避險)':'dealer_hedge_net', 
                                  '三大法人買賣超股數':'tii_net'
        })
        df['ft_net']=df['fm_net']+df['fd_net']
        df["date"] = pd.to_datetime(self.date).date()
        return df
    
    def crawl_otc(self):

        y = str(int(self.date.strftime("%Y")) - 1911)
        date_str = y + "/" + self.date.strftime("%m") + "/" + self.date.strftime("%d")
        r = requests.get('http://www.tpex.org.tw/web/stock/3insti/daily_trade/3itrade_hedge_result.php?l=zh-tw&o=csv&se=EW&t=D&d='+date_str+'&s=0,asc')
        try:
            df = pd.read_csv(StringIO(r.text), header=1).dropna(how='all', axis=1).dropna(how='any')
        except pd.errors.ParserError:
            return None  
        df = df.astype(str).apply(lambda s: s.str.replace(',',''))
        df['代號'] = df['代號'].str.replace('=','').str.replace('"','')
        df.iloc[:,2:]=df.iloc[:,2:].apply(lambda s: pd.to_numeric(s, errors='coerce')).dropna(how='all', axis=1)
        df =  df.rename(columns={ '代號':'stock_id','名稱':'stock_name',
                                  '外資及陸資(不含外資自營商)-買進股數':'fm_buy','外資及陸資(不含外資自營商)-賣出股數':'fm_sell',
                                  '外資及陸資(不含外資自營商)-買賣超股數':'fm_net','外資自營商-買進股數':'fd_buy',
                                  '外資自營商-賣出股數':'fd_sell','外資自營商-買賣超股數':'fd_net',
                                  '投信-買進股數':'itc_buy','投信-賣出股數':'itc_sell',
                                  '投信-買賣超股數':'itc_net','自營商-買賣超股數':'dealer_net',
                                  '自營商(自行買賣)-買進股數':'dealer_ppt_buy','自營商(自行買賣)-賣出股數':'dealer_ppt_sell',
                                  '自營商(自行買賣)-買賣超股數':'dealer_ppt_net','自營商(避險)-買進股數':'dealer_hedge_buy',
                                  '自營商(避險)-賣出股數':'dealer_hedge_sell','自營商(避險)-買賣超股數':'dealer_hedge_net', 
                                  '三大法人買賣超股數合計':'tii_net'
        })
        df['ft_net']=df['fm_net']+df['fd_net']
        df=df.drop(columns=['外資及陸資-買進股數','外資及陸資-賣出股數','外資及陸資-買賣超股數','自營商-買進股數','自營商-賣出股數'])
        df["date"] = pd.to_datetime(self.date).date()

        return df

    def crawl_rotc(self):
        r = requests.get(
            'https://www.tpex.org.tw/web/emergingstock/historical/daily/EMDaily_dl.php?l=zh-tw&f=EMdss006.' + self.date_str + '-C.csv')
        try:
            df = pd.read_csv(StringIO(r.text), header=3).dropna(how='all', axis=1).dropna(how='any')
        except pd.errors.ParserError:
            return None
        df = df.drop(columns=['HEADER'])
        df = df.astype(str).apply(lambda s: s.str.replace(',', ''))
        df['證券代號'] = df['證券代號'].apply(lambda s: s[:4])
        df.iloc[:, 2:] = df.iloc[:, 2:].apply(lambda s: pd.to_numeric(s, errors='coerce')).dropna(how='all', axis=1)
        df = df.rename(columns={'證券代號': 'stock_id', '證券名稱': 'stock_name',
                                '外資(股數)': 'ft_net', '投信(股數)': 'itc_net',
                                '自營商(股數)': 'dealer_net', '合計買賣超(股數)': 'tii_net',

                                })

        df["date"] = pd.to_datetime(self.date).date()
        df['stock_id'] = df['stock_id'].apply(lambda s: s[:s.index(' ')] if '" "' in s else s)
        df['stock_name'] = df['stock_name'].apply(lambda s: s[:s.index(' ')] if '" "' in s else s)

        return df

    def crawl_main(self):
        try:
            df = pd.concat([self.crawl_sii(), self.crawl_otc(), self.crawl_rotc()], sort=False)
        except ValueError:
            return None
        return df
    

import datetime
df=CrawlStockTiiTW(datetime.datetime(2020,4,29)).crawl_main()
df