In [None]:
import sys,os
sys.path.append("..")
import django
django.setup()
import pandas as pd
from io import StringIO
import requests
import datetime
from crawlers.models import *
from crawlers.finlab.import_tools import *

In [2]:
class CrawlStockTiiTW:
    def __init__(self, date):
        self.date = date
        self.date_str = date.strftime("%Y%m%d")
        self.target_name = "台股三大法人個股買賣超資訊"
        self.sub_market = ["sii", "otc", "rotc"]

    def crawl_sii(self):
        r= requests.get('http://www.tse.com.tw/fund/T86?response=csv&date='+self.date_str+'&selectType=ALLBUT0999')
        try:
            df = pd.read_csv(StringIO(r.text), header=1).dropna(how='all', axis=1).dropna(how='any')
        except pd.errors.EmptyDataError:
            return None

        df = df.astype(str).apply(lambda s: s.str.replace(',',''))
        df['證券代號'] = df['證券代號'].str.replace('=','').str.replace('"','')
        df.iloc[:,2:]=df.iloc[:,2:].apply(lambda s: pd.to_numeric(s, errors='coerce')).dropna(how='all', axis=1)
        df =  df.rename(columns={'證券代號':'stock_id','證券名稱':'stock_name',
                                  '外陸資買進股數(不含外資自營商)':'fm_buy','外陸資賣出股數(不含外資自營商)':'fm_sell',
                                  '外陸資買賣超股數(不含外資自營商)':'fm_net','外資自營商買進股數':'fd_buy',
                                  '外資自營商賣出股數':'fd_sell','外資自營商買賣超股數':'fd_net',
                                  '投信買進股數':'itc_buy','投信賣出股數':'itc_sell',
                                  '投信買賣超股數':'itc_net','自營商買賣超股數':'dealer_net',
                                  '自營商買進股數(自行買賣)':'dealer_ppt_buy','自營商賣出股數(自行買賣)':'dealer_ppt_sell',
                                  '自營商買賣超股數(自行買賣)':'dealer_ppt_net','自營商買進股數(避險)':'dealer_hedge_buy',
                                  '自營商賣出股數(避險)':'dealer_hedge_sell','自營商買賣超股數(避險)':'dealer_hedge_net', 
                                  '三大法人買賣超股數':'tii_net'
        })
        df['ft_net']=df['fm_net']+df['fd_net']
        df["date"] = pd.to_datetime(self.date)
        return df
    
    def crawl_otc(self):

        y = str(int(self.date.strftime("%Y")) - 1911)
        date_str = y + "/" + self.date.strftime("%m") + "/" + self.date.strftime("%d")
        r = requests.get('http://www.tpex.org.tw/web/stock/3insti/daily_trade/3itrade_hedge_result.php?l=zh-tw&o=csv&se=EW&t=D&d='+date_str+'&s=0,asc')
        try:
            df = pd.read_csv(StringIO(r.text), header=1).dropna(how='all', axis=1).dropna(how='any')
        except pd.errors.ParserError:
            return None  
        df = df.astype(str).apply(lambda s: s.str.replace(',',''))
        df['代號'] = df['代號'].str.replace('=','').str.replace('"','')
        df.iloc[:,2:]=df.iloc[:,2:].apply(lambda s: pd.to_numeric(s, errors='coerce')).dropna(how='all', axis=1)
        df =  df.rename(columns={ '代號':'stock_id','名稱':'stock_name',
                                  '外資及陸資(不含外資自營商)-買進股數':'fm_buy','外資及陸資(不含外資自營商)-賣出股數':'fm_sell',
                                  '外資及陸資(不含外資自營商)-買賣超股數':'fm_net','外資自營商-買進股數':'fd_buy',
                                  '外資自營商-賣出股數':'fd_sell','外資自營商-買賣超股數':'fd_net',
                                  '投信-買進股數':'itc_buy','投信-賣出股數':'itc_sell',
                                  '投信-買賣超股數':'itc_net','自營商-買賣超股數':'dealer_net',
                                  '自營商(自行買賣)-買進股數':'dealer_ppt_buy','自營商(自行買賣)-賣出股數':'dealer_ppt_sell',
                                  '自營商(自行買賣)-買賣超股數':'dealer_ppt_net','自營商(避險)-買進股數':'dealer_hedge_buy',
                                  '自營商(避險)-賣出股數':'dealer_hedge_sell','自營商(避險)-買賣超股數':'dealer_hedge_net', 
                                  '三大法人買賣超股數合計':'tii_net'
        })
        df['ft_net']=df['fm_net']+df['fd_net']
        df=df.drop(columns=['外資及陸資-買進股數','外資及陸資-賣出股數','外資及陸資-買賣超股數','自營商-買進股數','自營商-賣出股數'])
        df["date"] = pd.to_datetime(self.date)

        return df

    def crawl_rotc(self):
        r = requests.get(
            'https://www.tpex.org.tw/web/emergingstock/historical/daily/EMDaily_dl.php?l=zh-tw&f=EMdss006.' + self.date_str + '-C.csv')
        try:
            df = pd.read_csv(StringIO(r.text), header=3).dropna(how='all', axis=1).dropna(how='any')
        except pd.errors.ParserError:
            return None
        df = df.drop(columns=['HEADER'])
        df = df.astype(str).apply(lambda s: s.str.replace(',', ''))
        df['證券代號'] = df['證券代號'].apply(lambda s: s[:4])
        df.iloc[:, 2:] = df.iloc[:, 2:].apply(lambda s: pd.to_numeric(s, errors='coerce')).dropna(how='all', axis=1)
        df = df.rename(columns={'證券代號': 'stock_id', '證券名稱': 'stock_name',
                                '外資(股數)': 'ft_net', '投信(股數)': 'itc_net',
                                '自營商(股數)': 'dealer_net', '合計買賣超(股數)': 'tii_net',

                                })

        df["date"] = pd.to_datetime(self.date)
        df['stock_id'] = df['stock_id'].apply(lambda s: s[:s.index(' ')] if '" "' in s else s)
        df['stock_name'] = df['stock_name'].apply(lambda s: s[:s.index(' ')] if '" "' in s else s)

        return df

    def crawl_main(self):
        try:
            df = pd.concat([self.crawl_sii(), self.crawl_otc(), self.crawl_rotc()], sort=False)
        except ValueError:
            return None
        return df
    

import datetime
df=CrawlStockTiiTW(datetime.datetime(2020,4,29)).crawl_main()
df

Unnamed: 0,stock_id,stock_name,fm_buy,fm_sell,fm_net,fd_buy,fd_sell,fd_net,itc_buy,itc_sell,...,dealer_net,dealer_ppt_buy,dealer_ppt_sell,dealer_ppt_net,dealer_hedge_buy,dealer_hedge_sell,dealer_hedge_net,tii_net,ft_net,date
0,2303,聯電,6.21595e+07,1.34866e+07,4.86728e+07,0,0,0,1.821e+06,3.361e+06,...,-4.557e+06,3000,1.698e+06,-1.695e+06,217000,3.079e+06,-2.862e+06,4.25758e+07,4.86728e+07,2020-04-29
1,2888,新光金,2.60063e+07,7.26003e+06,1.87463e+07,0,0,0,80000,0,...,250000,192000,86000,106000,496000,352000,144000,1.90763e+07,1.87463e+07,2020-04-29
2,2454,聯發科,2.15998e+07,4.62757e+06,1.69722e+07,0,0,0,310000,0,...,-341000,357000,599000,-242000,47000,146000,-99000,1.69412e+07,1.69722e+07,2020-04-29
3,2330,台積電,3.38358e+07,1.83106e+07,1.55252e+07,0,0,0,189000,20000,...,-1.392e+06,73000,1.154e+06,-1.081e+06,344000,655000,-311000,1.43022e+07,1.55252e+07,2020-04-29
4,3481,群創,3.1403e+07,1.74071e+07,1.39959e+07,0,0,0,0,0,...,-652000,553000,603000,-50000,22000,624000,-602000,1.33439e+07,1.39959e+07,2020-04-29
5,2883,開發金,1.4206e+07,4.2456e+06,9.9604e+06,0,0,0,1.645e+06,0,...,-320000,7000,708000,-701000,659000,278000,381000,1.12854e+07,9.9604e+06,2020-04-29
6,2884,玉山金,1.29602e+07,4.68761e+06,8.27258e+06,0,0,0,75000,0,...,794000,1.136e+06,393000,743000,51000,0,51000,9.14158e+06,8.27258e+06,2020-04-29
7,0056,元大高股息,35000,12000,23000,0,0,0,49000,0,...,8.514e+06,130000,0,130000,9.278e+06,894000,8.384e+06,8.586e+06,23000,2020-04-29
8,2882,國泰金,1.4607e+07,5.99154e+06,8.61546e+06,0,0,0,56000,0,...,-504000,20000,408000,-388000,75000,191000,-116000,8.16746e+06,8.61546e+06,2020-04-29
9,2881,富邦金,1.0222e+07,2.146e+06,8.076e+06,0,0,0,50000,0,...,-173645,58000,309000,-251000,88000,10645,77355,7.95236e+06,8.076e+06,2020-04-29


In [3]:

add_to_sql(StockTiiTW,df)

  result = self._query(query)


create  Stock_id:2303
create  Stock_id:2888
create  Stock_id:2454
create  Stock_id:2330
create  Stock_id:3481
create  Stock_id:2883
create  Stock_id:2884
create  Stock_id:0056
create  Stock_id:2882
create  Stock_id:2881
create  Stock_id:2603
create  Stock_id:2834
create  Stock_id:2002
create  Stock_id:2324
create  Stock_id:5880
create  Stock_id:2317
create  Stock_id:2618
create  Stock_id:2892
create  Stock_id:2344
create  Stock_id:0050
create  Stock_id:3037
create  Stock_id:2368
create  Stock_id:2890
create  Stock_id:9904
create  Stock_id:2823
create  Stock_id:3231
create  Stock_id:1303
create  Stock_id:1101
create  Stock_id:2313
create  Stock_id:2337
create  Stock_id:00852L
create  Stock_id:1314
create  Stock_id:4958
create  Stock_id:1216
create  Stock_id:2345
create  Stock_id:2353
create  Stock_id:1301
create  Stock_id:1326
create  Stock_id:1402
create  Stock_id:3189
create  Stock_id:00631L
create  Stock_id:2363
create  Stock_id:2867
create  Stock_id:3006
create  Stock_id:6505
create

create  Stock_id:6671
create  Stock_id:3591
create  Stock_id:00683L
create  Stock_id:2722
create  Stock_id:3383
create  Stock_id:2901
create  Stock_id:6591
create  Stock_id:1587
create  Stock_id:4999
create  Stock_id:6243
create  Stock_id:1539
create  Stock_id:6442
create  Stock_id:3432
create  Stock_id:8443
create  Stock_id:3593
create  Stock_id:6405
create  Stock_id:9912
create  Stock_id:4557
create  Stock_id:2468
create  Stock_id:1468
create  Stock_id:6573
create  Stock_id:2904
create  Stock_id:3054
create  Stock_id:6655
create  Stock_id:3229
create  Stock_id:2936
create  Stock_id:4148
create  Stock_id:6698
create  Stock_id:1589
create  Stock_id:9902
create  Stock_id:8103
create  Stock_id:2024
create  Stock_id:8442
create  Stock_id:1735
create  Stock_id:4755
create  Stock_id:2415
create  Stock_id:1410
create  Stock_id:6431
create  Stock_id:2881A
create  Stock_id:00742
create  Stock_id:2436
create  Stock_id:1541
create  Stock_id:1776
create  Stock_id:1312A
create  Stock_id:00688L
cre

create  Stock_id:1815
create  Stock_id:2035
create  Stock_id:2061
create  Stock_id:2065
create  Stock_id:2067
create  Stock_id:2070
create  Stock_id:2221
create  Stock_id:2596
create  Stock_id:2640
create  Stock_id:2641
create  Stock_id:2643
create  Stock_id:2726
create  Stock_id:2729
create  Stock_id:2732
create  Stock_id:2736
create  Stock_id:2743
create  Stock_id:2928
create  Stock_id:3067
create  Stock_id:3071
create  Stock_id:3078
create  Stock_id:3081
create  Stock_id:3083
create  Stock_id:3088
create  Stock_id:3092
create  Stock_id:3105
create  Stock_id:3118
create  Stock_id:3122
create  Stock_id:3131
create  Stock_id:3141
create  Stock_id:3144
create  Stock_id:3147
create  Stock_id:3152
create  Stock_id:3163
create  Stock_id:3169
create  Stock_id:3171
create  Stock_id:3176
create  Stock_id:3178
create  Stock_id:3191
create  Stock_id:3202
create  Stock_id:3205
create  Stock_id:3206
create  Stock_id:3211
create  Stock_id:3213
create  Stock_id:3217
create  Stock_id:3218
create  St

create  Stock_id:9949
create  Stock_id:9951
create  Stock_id:020001
create  Stock_id:020003
create  Stock_id:020010
create  Stock_id:020013
create  Stock_id:020014
create  Stock_id:1563
create  Stock_id:2246
create  Stock_id:3633
create  Stock_id:4151
create  Stock_id:4195
create  Stock_id:4431
create  Stock_id:4565
create  Stock_id:5222
create  Stock_id:6595
create  Stock_id:6598
create  Stock_id:6634
create  Stock_id:6645
create  Stock_id:6665
create  Stock_id:6692
create  Stock_id:6732
create  Stock_id:6736
create  Stock_id:8119
create  Stock_id:8179
create  Stock_id:8438
Finish <class 'crawlers.models.StockTiiTW'>date:2020-04-29T00:00:00.000000000 bulk_create:1716
Finish <class 'crawlers.models.StockTiiTW'>date:2020-04-29T00:00:00.000000000 bulk_update:0
