In [1]:
import sys,os
sys.path.append("..")
import django
django.setup()
import pandas as pd
from io import StringIO
import requests
import datetime
from crawlers.models import *
from crawlers.finlab.pioneers import *
from crawlers.finlab.import_tools import *

In [None]:
class CrawlBrokerInfoTW:
    def __init__(self):
        self.target_name = "台股券商資訊"
        self.format = "non_time_series"

    @staticmethod
    def headquarter_info():
        r = requests.get('https://www.twse.com.tw/zh/brokerService/brokerServiceAudit')
        html_df = pd.read_html(StringIO(r.text))
        df = pd.DataFrame(html_df[0])
        df['department'] = '總公司'
        df = df.drop(columns='分公司')
        return df

    @staticmethod
    def branch_info(broker_hq_id):
        url = 'https://www.twse.com.tw/brokerService/brokerServiceAudit?showType=list&stkNo=' + broker_hq_id +\
              '&focus=6'
        r = requests.get(url)
        html_df = pd.read_html(StringIO(r.text))
        df = pd.DataFrame(html_df[3])
        return df

    def crawl_main(self):
        broker_hq = self.headquarter_info()
        branch_data = pd.concat([self.branch_info(i) for i in broker_hq['證券商代號'].values])
        branch_data['department'] = '分公司'
        df_all = pd.concat([broker_hq, branch_data])
        df_all = df_all.rename(columns={'證券商代號': 'stock_id', '證券商名稱': 'broker_name',
                                        '開業日': 'date_of_establishment', '地址': 'address',
                                        '電話': 'phone'
                                        })
        df_all=df_all[df_all['stock_id']!='查無資料']
        return df_all
    
a=CrawlBrokerInfoTW()
df=a.broker_all_info()
df
    
            
    

In [2]:




stock_id='2330'
start_time=datetime.datetime(2020,4,25)


def broker_trade(stock_id,start_time,end_time=None):
    start_time = start_time.strftime("%Y-%m-%d")
    if end_time is None:
        end_time=start_time
    url='https://fubon-ebrokerdj.fbs.com.tw/z/zc/zco/zco.djhtm?a='+stock_id+'&e='+start_time+'&f='+start_time
    r = requests.post(url)
    html_df = pd.read_html(StringIO(r.text))
    df = pd.DataFrame(html_df[2])
    #holiday
    if len(df)<9:
        return  None
    df.columns=df.iloc[5]
    df=df.iloc[6:-3]
    buy_side=df.iloc[:,:5]
    buy_side=buy_side.rename(columns={'買超券商':'broker_name','買進':'buy_num',
                                      '賣出':'sell_num','買超':'net_bs',
                                      '佔成交比重':'transactions_pt'})
    sell_side=df.iloc[:,5:]
    sell_side=sell_side.rename(columns={'賣超券商':'broker_name','買進':'buy_num',
                                        '賣出':'sell_num','賣超':'net_bs',
                                        '佔成交比重':'transactions_pt'})

    df_all=pd.concat([buy_side,sell_side],sort=False).dropna()
    df_all.iloc[:,1:]=df_all.iloc[:,1:].apply(lambda s: pd.to_numeric(s.str.replace('%',''), errors="coerce"))
    df_all['net_bs']=df_all['buy_num']-df_all['sell_num']
    df_all['stock_id']=df_all['broker_name'].apply(lambda s:stock_id+'-'+s)
    df_all["date"] = pd.to_datetime(start_time)
    df_all['broker_name']=df_all['broker_name'].apply(lambda s:s.replace('證券',''))
    
    return df_all
    


df=broker_trade(stock_id,datetime.datetime(2020,4,23),end_time=None)
df

5,broker_name,buy_num,sell_num,net_bs,transactions_pt,stock_id,date
6,港商麥格理,3275,568,2707,6.86,2330-港商麥格理,2020-04-23
7,摩根大通,5015,2417,2598,6.58,2330-摩根大通,2020-04-23
8,台灣摩根士丹利,2978,1071,1907,4.83,2330-台灣摩根士丹利,2020-04-23
9,新加坡商瑞銀,1818,485,1333,3.38,2330-新加坡商瑞銀,2020-04-23
10,瑞士信貸,1481,316,1165,2.95,2330-瑞士信貸,2020-04-23
11,臺銀,743,7,736,1.86,2330-臺銀,2020-04-23
12,大和國泰,589,50,539,1.37,2330-大和國泰,2020-04-23
13,永全-南崁,347,2,345,0.87,2330-永全-南崁,2020-04-23
14,永豐金-中正,322,16,306,0.78,2330-永豐金-中正,2020-04-23
15,富邦-虎尾,207,25,182,0.46,2330-富邦-虎尾,2020-04-23


In [7]:
add_to_sql(BrokerTradeTW, df, 'broker_name')

update  Stock_id:2330-港商麥格理
update  Stock_id:2330-摩根大通
update  Stock_id:2330-台灣摩根士丹利
update  Stock_id:2330-新加坡商瑞銀
update  Stock_id:2330-瑞士信貸
update  Stock_id:2330-臺銀
update  Stock_id:2330-大和國泰
update  Stock_id:2330-永全-南崁
update  Stock_id:2330-永豐金-中正
update  Stock_id:2330-富邦-虎尾
update  Stock_id:2330-凱基-台北
update  Stock_id:2330-台灣匯立證券
update  Stock_id:2330-日盛-永康
update  Stock_id:2330-台新-高雄
update  Stock_id:2330-新光
update  Stock_id:2330-宏遠證券
update  Stock_id:2330-美林
update  Stock_id:2330-元富
update  Stock_id:2330-港商法國興業
update  Stock_id:2330-富邦證券
update  Stock_id:2330-花旗環球
update  Stock_id:2330-美商高盛
update  Stock_id:2330-康和
update  Stock_id:2330-凱基
update  Stock_id:2330-兆豐
update  Stock_id:2330-永豐金-萬盛
update  Stock_id:2330-國泰證券
update  Stock_id:2330-兆豐-東門
update  Stock_id:2330-國票
update  Stock_id:2330-統一
Finish <class 'crawlers.models.BrokerTradeTW'>date:2020-04-23T00:00:00.000000000 bulk_create:0
Finish <class 'crawlers.models.BrokerTradeTW'>date:2020-04-23T00:00:00.000000000 bulk_update:

In [6]:
BrokerInfoTW.objects.filter(broker_name='臺銀')

<QuerySet []>

# 匯入foreign,測試

In [None]:
model_name=BrokerTradeTW

fk_field_names = [field.name for field in model_name._meta.fields
                             if isinstance(field, models.ForeignKey)]

fk_field_names

In [None]:
fk_remote=[model_name._meta.get_field(i).remote_field.model  for i in fk_field_names]
fk_remote

In [None]:
model_name=BrokerTradeTW
fk_columns='broker_name'

def fk_import(model_name,item,*fk_columns):
    fk_field_names = [field.name for field in model_name._meta.fields
                      if isinstance(field, models.ForeignKey)]
    fk_remote = [model_name._meta.get_field(i).remote_field.model for i in fk_field_names]
    fk_obj = [m.objects.get(**{n: item[n]}) for m, n in zip(fk_remote, fk_columns)]
    fk_create_data = dict((m, n) for m, n in zip(fk_field_names, fk_obj))
    return  fk_create_data


fk_import(model_name,df.iloc[0],fk_columns)

In [None]:
model_name=BrokerTradeTW
fk_columns='broker_name'

def fk_update(model_name,item,*fk_columns):
    fk_field_names = [field.name for field in model_name._meta.fields
                      if isinstance(field, models.ForeignKey)]
    fk_remote = [model_name._meta.get_field(i).remote_field.model for i in fk_field_names]
    fk_obj = [m.objects.get(**{n: item[n]}) for m, n in zip(fk_remote, fk_columns)]

    return fk_obj 


fk_update(model_name,df.iloc[0],fk_columns)