In [1]:
import sys,os
sys.path.append("..")
import django
django.setup()
import datetime
import pandas as pd
import requests
from crawlers.finlab.data_process_tools import year_transfer

In [7]:
class CrawlCompanyBasicInfoTW:
    def __init__(self):
        self.target_name = "台股企業基本資訊"
        self.sub_market = ["sii", "otc", "rotc"]



    def crawl_main(self):
        data=[]
        market_category=self.sub_market
        for market in market_category:
            url = "https://mops.twse.com.tw/mops/web/ajax_t51sb01"
            form_data = {
                "encodeURIComponent": "1",
                "step": "1",
                "firstin": "1",
                "TYPEK": market
            }

            res = requests.post(url,data = form_data)
            res.encoding = "utf-8"
            df=pd.read_html(res.text)
            df=pd.DataFrame(df[0])
            data.append(df)

        df2=pd.concat(data)
        df2 = df2.astype(str)
        df2 = df2.apply(lambda s: s.str.replace(",", ""))
        df3=df2.loc[:,["公司代號","update_time","公司名稱","公司簡稱","產業類別","外國企業註冊地國","住址",
                       "董事長","總經理","發言人","發言人職稱","總機電話",
                       "成立日期","上市日期","上櫃日期","興櫃日期","實收資本額(元)","已發行普通股數或TDR原發行股數",
                       "私募普通股(股)","特別股(股)","普通股盈餘分派或虧損撥補頻率","股票過戶機構","簽證會計師事務所",
                       "公司網址","投資人關係聯絡電話","投資人關係聯絡電子郵件","英文簡稱"]]


        df3 = df3.rename(columns={
                                    "公司代號":"stock_id","公司名稱":"name",
                                    "公司簡稱":"short_name","產業類別":"category",
                                    "外國企業註冊地國":"registered_country","住址":"address",
                                    "董事長":"chairman","總經理":"ceo",
                                    "發言人":"spokesman","發言人職稱":"spokesman_title",
                                    "總機電話":"phone","成立日期":"establishment_date",
                                    "上市日期":"sii_date","上櫃日期":"otc_date",
                                    "興櫃日期":"rotc_date","已發行普通股數或TDR原發行股數":"shares_issued",
                                    "私募普通股(股)":"private_shares","特別股(股)":"special_shares",
                                    "普通股盈餘分派或虧損撥補頻率":"dividend_frequency","股票過戶機構":"stock_transfer_institution",
                                    "簽證會計師事務所":"visa_accounting_firm","公司網址":"website",
                                    "投資人關係聯絡電話":"investor_relations_contact","投資人關係聯絡電子郵件":"investor_relations_email",
                                    "英文簡稱":"english_abbreviation","實收資本額(元)":"capital"

                                    })
        #Data format Process
        df3=df3[df3["stock_id"]!="公司代號"]
        df3["registered_country"]=df3["registered_country"].apply(lambda s: s.replace("－", "台灣"))

        for share_column in ["capital","shares_issued","private_shares","special_shares"]:
            df3[share_column]=df3[share_column].apply(lambda s:pd.to_numeric(s, errors="coerce"))

        for date_column in ["establishment_date","sii_date","otc_date","rotc_date"]:
            df3[date_column]=df3[date_column].apply(lambda t:year_transfer(t))

        df3["update_time"]=datetime.datetime.now()
        df3 = df3.fillna('')
        return df3

In [8]:
C=CrawlCompanyBasicInfoTW()
C.crawl_main()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)


Unnamed: 0,stock_id,update_time,name,short_name,category,registered_country,address,chairman,ceo,spokesman,...,shares_issued,private_shares,special_shares,dividend_frequency,stock_transfer_institution,visa_accounting_firm,website,investor_relations_contact,investor_relations_email,english_abbreviation
0,1101,2020-04-16 14:04:48.591320,台灣水泥股份有限公司,台泥,水泥工業,台灣,台北市中山北路2段113號,張安平,李鐘培,黃健強,...,5465619204,0,200000000,每年,中國信託商業銀行代理部,勤業眾信聯合會計師事務所,http://www.taiwancement.com,02-25317099分機20358,ir@taiwancement.com,TCC
1,1102,2020-04-16 14:04:48.591320,亞洲水泥股份有限公司,亞泥,水泥工業,台灣,台北市大安區敦化南路2段207號30、31樓,徐旭東,李坤炎,周維崑,...,3361447198,0,0,每年,亞東證券股份有限公司,勤業眾信聯合會計師事務所,www.acc.com.tw,02 27338000 ext.8336,ir@acc.com.tw,ACC
2,1103,2020-04-16 14:04:48.591320,嘉新水泥股份有限公司,嘉泥,水泥工業,台灣,台北市中山北路2段96號,張剛綸,祁士鉅,王立心,...,774780548,0,0,每年,群益金鼎證券股份有限公司股代部,勤業眾信聯合會計師事務所,www.chcgroup.com.tw,(02)2551-5211#243,ir@chcgroup.com.tw,CHC
3,1104,2020-04-16 14:04:48.591320,環球水泥股份有限公司,環泥,水泥工業,台灣,台北市南京東路二段125號10樓,侯博義,侯博義,楊宗仁,...,653609192,0,0,每年,華南永昌綜合證券(股)股務代理部,勤業眾信聯合會計師事務所,www.ucctw.com,02-2507-7801,p1590@ucctw.com,UCC
4,1108,2020-04-16 14:04:48.591320,幸福水泥股份有限公司,幸福,水泥工業,台灣,台北市松江路237號15樓,陳兩傳,陳兩傳,溫秀菊,...,404738049,0,0,每半會計年度,永豐金證券股務代理,勤業眾信聯合會計師事務所,www.luckygrp.com.tw,25092188,wendy@luckygrp.com.tw,LUCKY CEMENT
5,1109,2020-04-16 14:04:48.591320,信大水泥股份有限公司,信大,水泥工業,台灣,台北市寶慶路37號7樓,楊智雄,楊大寬,吳連富,...,341957868,0,0,每年,中國信託商業銀行代理部,資誠聯合會計師事務所,www.hsingta.com.tw,02-23816731#160,june@hsingta.com.tw,HsingTa
6,1110,2020-04-16 14:04:48.591320,東南水泥股份有限公司,東泥,水泥工業,台灣,高雄市前金區五福三路21號4樓之1,陳敏斷,吳長直,黃薪翰,...,572000797,0,0,每年,元大證券股份有限公司,國富浩華聯合會計師事務所,www.southeastcement.com.tw,07-2711121,vance.huang@secement.com,TUNA CEMENT
7,1201,2020-04-16 14:04:48.591320,味全食品工業股份有限公司,味全,食品工業,台灣,台北市松江路125號,陳宏裕,張教華,黃國禎,...,506062914,0,0,每年,元大證券股務代理部,資誠聯合會計師事務所,http://www.weichuan.com.tw,(02)25065020,KC_Huang@weichuan.com.tw,WEI CHUAN
8,1203,2020-04-16 14:04:48.591320,味王股份有限公司,味王,食品工業,台灣,台北市中山北路二段79號5樓,陳清福,陳恭平,魏璟雄,...,240000000,0,0,每年,群益金鼎證券股份有限公司,大中國際聯合會計師事務所,http://www.vewong.com,(02)25717271-610,wei@vewong.com.tw,VE WONG
9,1210,2020-04-16 14:04:48.591320,大成長城企業股份有限公司,大成,食品工業,台灣,台南市永康區蔦松二街三號,韓家宇,莊坤炎,周叔恆,...,827339086,0,0,每年,中國信託商業銀行代理部,安侯建業會計師事務所,http://www.dachan.com,02-26577111,john@greatwall-group.com,GREATWALL


In [None]:
#pycharm 測試
import sys,os
sys.path.append("..")
import django
django.setup()
from crawlers.models import *
# from crawlers.finlab.pioneers import *
# from crawlers.finlab.import_tools import *

In [None]:
add_to_sql(CompanyBasicInfoTW, df)