In [1]:
import sys,os
sys.path.append("..")
import django
django.setup()
from io import StringIO
import requests
import datetime
import pandas as pd
import swifter
import time

# from crawlers.models import *
# from crawlers.finlab.import_tools import *

In [15]:
class CrawlStockTiiTW:
    def __init__(self, date):
        self.date = date
        self.date_str = date.strftime("%Y%m%d")
        self.target_name = "台股三大法人個股買賣超資訊"
        self.sub_market = ["sii", "otc", "rotc"]

    def crawl_sii(self):
        r = requests.get('http://www.tse.com.tw/fund/T86?response=csv&date=' + self.date_str + '&selectType=ALLBUT0999')
        try:
            df = pd.read_csv(StringIO(r.text), header=1).dropna(how='all', axis=1).dropna(how='any')
        except pd.errors.EmptyDataError:
            return None
        df = df.astype(str).apply(lambda s: s.str.replace(',', ''))
        df['證券代號'] = df['證券代號'].str.replace('=', '').str.replace('"', '')
        df[df.columns[2:]] = df[df.columns[2:]].apply(lambda s: pd.to_numeric(s, errors='coerce')).dropna(how='all', axis=1)
        if self.date > datetime.datetime(2017, 12, 17):
            df = df.rename(columns={'證券代號': 'stock_id', '證券名稱': 'stock_name',
                                    '外陸資買進股數(不含外資自營商)': 'fm_buy', '外陸資賣出股數(不含外資自營商)': 'fm_sell',
                                    '外陸資買賣超股數(不含外資自營商)': 'fm_net', '外資自營商買進股數': 'fd_buy',
                                    '外資自營商賣出股數': 'fd_sell', '外資自營商買賣超股數': 'fd_net',
                                    '投信買進股數': 'itc_buy', '投信賣出股數': 'itc_sell',
                                    '投信買賣超股數': 'itc_net', '自營商買賣超股數': 'dealer_net',
                                    '自營商買進股數(自行買賣)': 'dealer_ppt_buy', '自營商賣出股數(自行買賣)': 'dealer_ppt_sell',
                                    '自營商買賣超股數(自行買賣)': 'dealer_ppt_net', '自營商買進股數(避險)': 'dealer_hedge_buy',
                                    '自營商賣出股數(避險)': 'dealer_hedge_sell', '自營商買賣超股數(避險)': 'dealer_hedge_net',
                                    '三大法人買賣超股數': 'tii_net'
                                    })
            df['ft_net'] = df['fm_net'] + df['fd_net']
        else:
            df = df.rename(columns={'證券代號': 'stock_id', '證券名稱': 'stock_name',
                                    '外資買進股數': 'fm_buy', '外資賣出股數': 'fm_sell',
                                    '外資買賣超股數': 'fm_net',
                                    '投信買進股數': 'itc_buy', '投信賣出股數': 'itc_sell',
                                    '投信買賣超股數': 'itc_net', '自營商買賣超股數': 'dealer_net',
                                    '自營商買進股數(自行買賣)': 'dealer_ppt_buy', '自營商賣出股數(自行買賣)': 'dealer_ppt_sell',
                                    '自營商買賣超股數(自行買賣)': 'dealer_ppt_net', '自營商買進股數(避險)': 'dealer_hedge_buy',
                                    '自營商賣出股數(避險)': 'dealer_hedge_sell', '自營商買賣超股數(避險)': 'dealer_hedge_net',
                                    '三大法人買賣超股數': 'tii_net'
                                    })
            df['ft_net'] = df['fm_net']            
        df["date"] = pd.to_datetime(self.date).date()
        return df

    def crawl_otc(self):
        west_year = int(self.date.strftime("%Y"))
        y = str(west_year - 1911)
        date_str = y + "/" + self.date.strftime("%m") + "/" + self.date.strftime("%d")
        r = requests.get(
            'http://www.tpex.org.tw/web/stock/3insti/daily_trade/3itrade_hedge_result.php?l=zh-tw&o=csv&se=EW&t=D&d='
            + date_str + '&s=0,asc')
        try:
            df = pd.read_csv(StringIO(r.text), header=1).dropna(how='all', axis=1).dropna(how='any')
        except pd.errors.ParserError:
            return None
        df = df.astype(str).apply(lambda s: s.str.replace(',', ''))
        df['代號'] = df['代號'].str.replace('=', '').str.replace('"', '')
        df[df.columns[2:]]= df[df.columns[2:]].apply(lambda s: pd.to_numeric(s, errors='coerce')).dropna(how='all', axis=1)
        if self.date > datetime.datetime(2018, 1, 14):
            df = df.rename(columns={'代號': 'stock_id', '名稱': 'stock_name',
                                    '外資及陸資(不含外資自營商)-買進股數': 'fm_buy', '外資及陸資(不含外資自營商)-賣出股數': 'fm_sell',
                                    '外資及陸資(不含外資自營商)-買賣超股數': 'fm_net', '外資自營商-買進股數': 'fd_buy',
                                    '外資自營商-賣出股數': 'fd_sell', '外資自營商-買賣超股數': 'fd_net',
                                    '投信-買進股數': 'itc_buy', '投信-賣出股數': 'itc_sell',
                                    '投信-買賣超股數': 'itc_net', '自營商-買賣超股數': 'dealer_net',
                                    '自營商(自行買賣)-買進股數': 'dealer_ppt_buy', '自營商(自行買賣)-賣出股數': 'dealer_ppt_sell',
                                    '自營商(自行買賣)-買賣超股數': 'dealer_ppt_net', '自營商(避險)-買進股數': 'dealer_hedge_buy',
                                    '自營商(避險)-賣出股數': 'dealer_hedge_sell', '自營商(避險)-買賣超股數': 'dealer_hedge_net',
                                    '三大法人買賣超股數合計': 'tii_net'
                                    })
            df['ft_net'] = df['fm_net'] + df['fd_net']
            df = df.drop(columns=['外資及陸資-買進股數', '外資及陸資-賣出股數', '外資及陸資-買賣超股數', '自營商-買進股數', '自營商-賣出股數'])

        else:
            df = df.rename(columns={'代號': 'stock_id', '名稱': 'stock_name',
                                    '外資及陸資買股數': 'fm_buy', '外資及陸資賣股數': 'fm_sell',
                                    '外資及陸資淨買股數': 'fm_net',
                                    '投信買進股數': 'itc_buy', '投信賣股數': 'itc_sell',
                                    '投信淨買股數': 'itc_net', '自營淨買股數': 'dealer_net',
                                    '自營商(自行買賣)買股數': 'dealer_ppt_buy', '自營商(自行買賣)賣股數': 'dealer_ppt_sell',
                                    '自營商(自行買賣)淨買股數': 'dealer_ppt_net', '自營商(避險)買股數': 'dealer_hedge_buy',
                                    '自營商(避險)賣股數': 'dealer_hedge_sell', '自營商(避險)淨買股數': 'dealer_hedge_net',
                                    '三大法人買賣超股數': 'tii_net'
                                    })
            df['ft_net'] = df['fm_net']
        df["date"] = pd.to_datetime(self.date).date()
        return df

    def crawl_rotc(self):
        r = requests.get(
            'https://www.tpex.org.tw/web/emergingstock/historical/daily/EMDaily_dl.php?l=zh-tw&f=EMdss006.'
            + self.date_str + '-C.csv')
        try:
            df = pd.read_csv(StringIO(r.text), header=3).dropna(how='all', axis=1).dropna(how='any')
        except pd.errors.ParserError:
            return None
        df = df.drop(columns=['HEADER'])
        df = df.astype(str).apply(lambda s: s.str.replace(',', ''))
        try:
            df['證券代號'] = df['證券代號'].apply(lambda s: s[:4])
            df = df.rename(columns={'證券代號': 'stock_id', '證券名稱': 'stock_name',
                                    '外資(股數)': 'ft_net', '投信(股數)': 'itc_net',
                                    '自營商(股數)': 'dealer_net', '合計買賣超(股數)': 'tii_net',
                                    })
        except KeyError:
            df['股票代號'] = df['股票代號'].apply(lambda s: s[:4])
            df = df.rename(columns={'股票代號': 'stock_id', '名稱': 'stock_name',
                                    '外資': 'ft_net', '投信': 'itc_net',
                                    '自營商': 'dealer_net', '合計買賣超': 'tii_net',
                                    })
        df[df.columns[2:]] = df[df.columns[2:]].apply(lambda s: pd.to_numeric(s, errors='coerce'))
        df["date"] = self.date.date()
        df['stock_id'] = df['stock_id'].apply(lambda s: s[:s.index(' ')] if '" "' in s else s)
        df['stock_name'] = df['stock_name'].apply(lambda s: s[:s.index(' ')] if '" "' in s else s)
        return df

    def crawl_main(self):
        try:
            df = pd.concat([self.crawl_sii(), self.crawl_otc(), self.crawl_rotc()], sort=False)
        except ValueError:
            return None
        return df
    

import datetime
df=CrawlStockTiiTW(datetime.datetime(2017,12,18)).crawl_main()
df

Unnamed: 0,stock_id,stock_name,fm_buy,fm_sell,fm_net,fd_buy,fd_sell,fd_net,itc_buy,itc_sell,...,dealer_net,dealer_ppt_buy,dealer_ppt_sell,dealer_ppt_net,dealer_hedge_buy,dealer_hedge_sell,dealer_hedge_net,tii_net,ft_net,date
0,00677U,富邦VIX,7138000.0,70000.0,7068000.0,0.0,0.0,0.0,0.0,0.0,...,6197000.0,0.0,0.0,0.0,6538000.0,341000.0,6197000.0,13265000,7068000.0,2017-12-18
1,2356,英業達,11357000.0,1193000.0,10164000.0,0.0,0.0,0.0,302000.0,0.0,...,1244000.0,325000.0,5000.0,320000.0,1007000.0,83000.0,924000.0,11710000,10164000.0,2017-12-18
2,2891,中信金,10081000.0,5232000.0,4849000.0,0.0,0.0,0.0,4422000.0,0.0,...,11000.0,75000.0,23000.0,52000.0,55000.0,96000.0,-41000.0,9282000,4849000.0,2017-12-18
3,2888,新光金,14463000.0,6025000.0,8438000.0,0.0,0.0,0.0,0.0,0.0,...,-87000.0,390000.0,261000.0,129000.0,382000.0,598000.0,-216000.0,8351000,8438000.0,2017-12-18
4,2498,宏達電,6894000.0,1535000.0,5359000.0,0.0,0.0,0.0,0.0,0.0,...,2359000.0,892000.0,297000.0,595000.0,1907000.0,143000.0,1764000.0,7718000,5359000.0,2017-12-18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12,6575,心悅,,,,,,,,,...,,,,,,,,-38000,-38000.0,2017-12-18
13,6591,動力-KY,,,,,,,,,...,,,,,,,,-16000,-16000.0,2017-12-18
14,6617,共信-KY,,,,,,,,,...,,,,,,,,-80000,-80000.0,2017-12-18
15,6622,百聿數碼,,,,,,,,,...,,,,,,,,3000,3000.0,2017-12-18
