In [10]:
from datetime import datetime
import pandas as pd
from bs4 import BeautifulSoup
import requests
import json
import time
from io import StringIO
import random

import pandas_datareader.data as web
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import pandas_profiling
import copy

sns.set()

## 取得財報 ##
* [公開資訊觀測站：單一公司案例文件查詢及下載](https://mops.twse.com.tw/mops/web/t203sb01)

In [96]:
def financial_statement(stock):

    now = datetime.now() #現在的時間
    year = now.strftime("%Y") #抓今年

    df_compsheet = pd.DataFrame()
    # df_compsheet.index.name = '會計項目Accounting Title'

    for y in range(int(year),2008,-1):
        ### 先與網站請求抓到每天的報價資料 ###
        print (y)
        if y > 2012:
            url = 'https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=' + stock + '&SYEAR=%d&SSEASON=4&REPORT_ID=C' %(y)
            print (url)

            try:
                getdata=pd.read_html(url,encoding='utf16',header=0)
                # 1: 資產負債表 
                # 2: 綜合損益表
                # 3: 現金流量表

                getdata[2].drop(getdata[2].head(1).index, inplace=True)
                getdata[2] = getdata[2].reset_index(drop=True)
                getdata[2].index = getdata[2].iloc[: , 0]
                getdata[2].drop(getdata[2].columns[0], axis=1, inplace=True)
                for col in getdata[2].columns:
                    getdata[2][col] = pd.to_numeric(getdata[2][col], 'coerce')

                getdata[2] = getdata[2].dropna()
                
                getdata[1].drop(getdata[1].head(1).index, inplace=True)
                getdata[1] = getdata[1].reset_index(drop=True)
                getdata[1].index = getdata[1].iloc[: , 0]
                getdata[1].drop(getdata[1].columns[0], axis=1, inplace=True)
                for (col1,col2) in zip(getdata[1].columns, getdata[2].columns):
                    getdata[1][col1] = pd.to_numeric(getdata[1][col1], 'coerce')
                    getdata[1].rename(columns={col1:col2}, inplace=True)

                getdata[1] = getdata[1].dropna()

                getdata[3].drop(getdata[3].head(1).index, inplace=True)
                getdata[3] = getdata[3].reset_index(drop=True)
                getdata[3].index = getdata[3].iloc[: , 0]
                getdata[3].drop(getdata[3].columns[0], axis=1, inplace=True)
                for col in getdata[3].columns:
                    getdata[3][col] = pd.to_numeric(getdata[3][col], 'coerce')

                getdata[3] = getdata[3].dropna()

                getdata[1] = getdata[1].T
                getdata[2] = getdata[2].T
                getdata[3] = getdata[3].T

                getdata[2] = getdata[2][['營業收入合計','營業毛利（毛損）','營業利益（損失）',
                                         '繼續營業單位稅前淨利（淨損）','營業費用合計','本期淨利（淨損）','基本每股盈餘合計']]
                getdata[2]['本期現金及約當現金增加（減少）數'] = getdata[3][['本期現金及約當現金增加（減少）數']]
                
                if y==2017 or y==2014 or y==2013:
                    getdata[2]['股東權益總額'] = getdata[1]['權益總額']
                else:
                    getdata[2]['股東權益總額'] = getdata[1]['權益總計']
                
                if y==2014 or y==2013:
                    getdata[2]['資產總額'] = getdata[1]['資產總額']
                else:
                    getdata[2]['資產總額'] = getdata[1]['資產總計']

                getdata[2] = getdata[2].T

                df_compsheet['%d'%(y+1)] = getdata[2][getdata[2].columns[0]]

            except Exception as e:
                print('get 404, please check if anything is wrong')

        else:
            url = url = 'https://mops.twse.com.tw/server-java/t147sb02?t203sb01Form=t203sb01Form&step=0&comp_id='+stock+'&YEAR1=%d&SEASON1=4&R_TYPE1=B' %(y)
            print (url)

            try:
                getdata=pd.read_html(url,encoding='utf16',header=0)
                getdata[1] = getdata[1].dropna()
                getdata[1].index = getdata[1].iloc[: , 0]
                getdata[1].drop(getdata[1].columns[0], axis=1, inplace=True)
                for col in getdata[1].columns:
                    getdata[1][col] = pd.to_numeric(getdata[1][col], 'coerce')

                getdata[1].index.name = ''
                getdata[1] = getdata[1].T

                getdata[1] = getdata[1][['營業收入合計','營業毛利(毛損)','營業淨利(淨損)',
                                         '繼續營業單位稅前淨利(淨損)','營業費用合計','合併總損益','基本每股盈餘淨額',
                                         '本期現金及約當現金淨增減數','股東權益總額','資產總額']]

                col_names = ['營業收入合計','營業毛利（毛損）','營業利益（損失）',
                            '繼續營業單位稅前淨利（淨損）','營業費用合計','本期淨利（淨損）','基本每股盈餘合計','本期現金及約當現金增加（減少）數','股東權益總額','資產總額']
                for iname in range(len(col_names)):
                    getdata[1].rename(columns={getdata[1].columns[iname]: col_names[iname]}, inplace=True)

                getdata[1] = getdata[1].T

                df_compsheet['%d'%(y+1)] = getdata[1][getdata[1].columns[0]]

            except Exception as e:
                print('get 404, please check if anything is wrong')

        #要睡覺一下，不然會被ben掉
        time.sleep(5)


    df_compsheet.index.name = ''
    df_compsheet = df_compsheet.T
    df_compsheet.index = pd.to_datetime(df_compsheet.index)
    for col in df_compsheet.columns:
        df_compsheet[col] = pd.to_numeric(df_compsheet[col], 'coerce')
        
    df_compsheet['營業毛利率(%)'] = df_compsheet.apply(lambda row: row['營業毛利（毛損）'] / row['營業收入合計'] * 100., axis=1)
    df_compsheet['營業利益率(%)'] = df_compsheet.apply(lambda row: row['營業利益（損失）'] / row['營業收入合計'] * 100., axis=1)
    df_compsheet['純益率(%)'] = df_compsheet.apply(lambda row: row['繼續營業單位稅前淨利（淨損）'] / row['營業收入合計'] * 100., axis=1)
    df_compsheet['股東權益率(%)'] = df_compsheet.apply(lambda row: row['繼續營業單位稅前淨利（淨損）'] / row['股東權益總額'] * 100., axis=1)
    df_compsheet['總資產報酬率(%)'] = df_compsheet.apply(lambda row: row['繼續營業單位稅前淨利（淨損）'] / row['資產總額'] * 100., axis=1)
    
    df_compsheet = df_compsheet.sort_index()
    df_compsheet['營收成長率'] = df_compsheet['營業收入合計'].pct_change()
    df_compsheet['淨利成長率'] = df_compsheet['繼續營業單位稅前淨利（淨損）'].pct_change()
    
    return df_compsheet


In [100]:
with open('Monthly-revenue-201211to201911.pickle', 'rb') as file:
    data = pickle.load(file)
    
file.close()

In [None]:
data_fstatement = {}

for sym in data['2019-11-01']['公司代號']:
    if sym == '全部國內上市公司合計':
        continue
        
    print ('company symbol: ',sym)
    data_fstatement[sym] = financial_statement(sym)



company symbol:  1101
2019
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1101&SYEAR=2019&SSEASON=4&REPORT_ID=C
get 404, please check if anything is wrong
2018
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1101&SYEAR=2018&SSEASON=4&REPORT_ID=C
2017
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1101&SYEAR=2017&SSEASON=4&REPORT_ID=C
2016
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1101&SYEAR=2016&SSEASON=4&REPORT_ID=C
2015
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1101&SYEAR=2015&SSEASON=4&REPORT_ID=C
2014
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1101&SYEAR=2014&SSEASON=4&REPORT_ID=C
2013
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1101&SYEAR=2013&SSEASON=4&REPORT_ID=C
2012
https://mops.twse.com.tw/server-java/t147sb02?t203sb01Form=t203sb01Form&step=0&comp_id=1101&YEAR1=2012&SEASON1=4&R_TYPE1=B
2011
https://mops.twse.com.tw/server-java/t147sb02?t203sb01Form=t203sb01Form&step=0&comp_id=1

2015
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1110&SYEAR=2015&SSEASON=4&REPORT_ID=C
2014
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1110&SYEAR=2014&SSEASON=4&REPORT_ID=C
2013
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1110&SYEAR=2013&SSEASON=4&REPORT_ID=C
2012
https://mops.twse.com.tw/server-java/t147sb02?t203sb01Form=t203sb01Form&step=0&comp_id=1110&YEAR1=2012&SEASON1=4&R_TYPE1=B
2011
https://mops.twse.com.tw/server-java/t147sb02?t203sb01Form=t203sb01Form&step=0&comp_id=1110&YEAR1=2011&SEASON1=4&R_TYPE1=B
2010
https://mops.twse.com.tw/server-java/t147sb02?t203sb01Form=t203sb01Form&step=0&comp_id=1110&YEAR1=2010&SEASON1=4&R_TYPE1=B
2009
https://mops.twse.com.tw/server-java/t147sb02?t203sb01Form=t203sb01Form&step=0&comp_id=1110&YEAR1=2009&SEASON1=4&R_TYPE1=B
company symbol:  1201
2019
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1201&SYEAR=2019&SSEASON=4&REPORT_ID=C
get 404, please check if anything is wrong
2018
https://mo

2010
https://mops.twse.com.tw/server-java/t147sb02?t203sb01Form=t203sb01Form&step=0&comp_id=1216&YEAR1=2010&SEASON1=4&R_TYPE1=B
2009
https://mops.twse.com.tw/server-java/t147sb02?t203sb01Form=t203sb01Form&step=0&comp_id=1216&YEAR1=2009&SEASON1=4&R_TYPE1=B
company symbol:  1217
2019
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1217&SYEAR=2019&SSEASON=4&REPORT_ID=C
get 404, please check if anything is wrong
2018
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1217&SYEAR=2018&SSEASON=4&REPORT_ID=C
2017
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1217&SYEAR=2017&SSEASON=4&REPORT_ID=C
2016
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1217&SYEAR=2016&SSEASON=4&REPORT_ID=C
2015
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1217&SYEAR=2015&SSEASON=4&REPORT_ID=C
2014
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1217&SYEAR=2014&SSEASON=4&REPORT_ID=C
2013
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=121

2017
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1229&SYEAR=2017&SSEASON=4&REPORT_ID=C
2016
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1229&SYEAR=2016&SSEASON=4&REPORT_ID=C
2015
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1229&SYEAR=2015&SSEASON=4&REPORT_ID=C
2014
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1229&SYEAR=2014&SSEASON=4&REPORT_ID=C
2013
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1229&SYEAR=2013&SSEASON=4&REPORT_ID=C
2012
https://mops.twse.com.tw/server-java/t147sb02?t203sb01Form=t203sb01Form&step=0&comp_id=1229&YEAR1=2012&SEASON1=4&R_TYPE1=B
2011
https://mops.twse.com.tw/server-java/t147sb02?t203sb01Form=t203sb01Form&step=0&comp_id=1229&YEAR1=2011&SEASON1=4&R_TYPE1=B
2010
https://mops.twse.com.tw/server-java/t147sb02?t203sb01Form=t203sb01Form&step=0&comp_id=1229&YEAR1=2010&SEASON1=4&R_TYPE1=B
2009
https://mops.twse.com.tw/server-java/t147sb02?t203sb01Form=t203sb01Form&step=0&comp_id=1229&YEAR1=200

---
## Test block ##

In [99]:
df_compsheet = financial_statement('2611')


(df_compsheet.style
     .background_gradient('Reds',subset='營業收入合計')
     .background_gradient('Reds',subset='營業毛利（毛損）')
     .background_gradient('coolwarm',subset='營業利益（損失）')
     .background_gradient('coolwarm',subset='繼續營業單位稅前淨利（淨損）')
     .background_gradient('coolwarm',subset='基本每股盈餘合計')
     .background_gradient('coolwarm',subset='本期現金及約當現金增加（減少）數')
     .background_gradient('coolwarm',subset='營業毛利率(%)')
     .background_gradient('coolwarm',subset='營業利益率(%)')
     .background_gradient('coolwarm',subset='純益率(%)')
     .background_gradient('coolwarm',subset='股東權益率(%)')
     .background_gradient('coolwarm',subset='總資產報酬率(%)')
     .background_gradient('coolwarm',subset='營收成長率')
     .background_gradient('coolwarm',subset='淨利成長率')
     # .background_gradient('Greens', subset='Google search: Stock')
     .highlight_null()
)

2019
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=2611&SYEAR=2019&SSEASON=4&REPORT_ID=C
get 404, please check if anything is wrong
2018
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=2611&SYEAR=2018&SSEASON=4&REPORT_ID=C
2017
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=2611&SYEAR=2017&SSEASON=4&REPORT_ID=C
2016
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=2611&SYEAR=2016&SSEASON=4&REPORT_ID=C
2015
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=2611&SYEAR=2015&SSEASON=4&REPORT_ID=C
2014
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=2611&SYEAR=2014&SSEASON=4&REPORT_ID=C
2013
https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=2611&SYEAR=2013&SSEASON=4&REPORT_ID=C
2012
https://mops.twse.com.tw/server-java/t147sb02?t203sb01Form=t203sb01Form&step=0&comp_id=2611&YEAR1=2012&SEASON1=4&R_TYPE1=B
2011
https://mops.twse.com.tw/server-java/t147sb02?t203sb01Form=t203sb01Form&step=0&comp_id=2611&YEAR1=2011&SEASON1

Unnamed: 0,營業收入合計,營業毛利（毛損）,營業利益（損失）,繼續營業單位稅前淨利（淨損）,營業費用合計,本期淨利（淨損）,基本每股盈餘合計,本期現金及約當現金增加（減少）數,股東權益總額,資產總額,營業毛利率(%),營業利益率(%),純益率(%),股東權益率(%),總資產報酬率(%),營收成長率,淨利成長率
2011-01-01 00:00:00,1629300.0,325071.0,-13288,39235,338359,40901,0.06,-187275,2606330.0,4875110.0,19.9516,-0.815567,2.4081,1.50537,0.804802,,
2012-01-01 00:00:00,1403750.0,213370.0,-165018,-189718,378388,-209391,-0.66,-31203,2423510.0,5124320.0,15.2,-11.7555,-13.5151,-7.82824,-3.7023,-0.138433,-5.83543
2013-01-01 00:00:00,3822840.0,1489430.0,982539,876508,506892,821885,4.61,94987,2970980.0,4605610.0,38.9614,25.7018,22.9282,29.5023,19.0313,1.72331,-5.62006
2014-01-01 00:00:00,1580580.0,297725.0,-59634,65547,357359,10987,0.12,631,3023930.0,4786880.0,18.8365,-3.77293,4.14704,2.16761,1.3693,-0.586544,-0.925218
2015-01-01 00:00:00,1438380.0,334363.0,146,70357,334217,62039,0.4,150729,2913570.0,4867810.0,23.2458,0.0101503,4.89141,2.4148,1.44535,-0.0899647,0.0733825
2016-01-01 00:00:00,1366480.0,393550.0,49319,40326,344231,32713,0.2,-194297,2779910.0,4543980.0,28.8003,3.6092,2.95109,1.45062,0.88746,-0.0499861,-0.426837
2017-01-01 00:00:00,1222280.0,372794.0,-19940,-65329,392734,-68103,-0.36,7070,2709860.0,4490960.0,30.4998,-1.63137,-5.34483,-2.41079,-1.45468,-0.105524,-2.62002
2018-01-01 00:00:00,1027060.0,293728.0,-19166,-1194,312894,1820,0.06,55897,2792780.0,4383710.0,28.599,-1.86611,-0.116254,-0.042753,-0.0272372,-0.159722,-0.981723
2019-01-01 00:00:00,937679.0,220439.0,-2884,43811,223323,50596,0.29,38024,2826860.0,4322740.0,23.509,-0.307568,4.67228,1.54981,1.0135,-0.0870243,-37.6926


In [74]:
y = 2017
# url = 'https://mops.twse.com.tw/server-java/t147sb02?t203sb01Form=t203sb01Form&step=0&comp_id=1101&YEAR1=2014&SEASON1=4&R_TYPE1=B'
url = 'https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=1101&SYEAR=%d&SSEASON=4&REPORT_ID=C' %(y)
# url = 'https://mops.twse.com.tw/server-java/t164sb01?step=1&CO_ID=2611&SYEAR=2018&SSEASON=4&REPORT_ID=C'
getdata=pd.read_html(url,encoding='utf16',header=0)

getdata[2].drop(getdata[2].head(1).index, inplace=True)
getdata[2] = getdata[2].reset_index(drop=True)
getdata[2].index = getdata[2].iloc[: , 0]
getdata[2].drop(getdata[2].columns[0], axis=1, inplace=True)
for col in getdata[2].columns:
    getdata[2][col] = pd.to_numeric(getdata[2][col], 'coerce')

getdata[2] = getdata[2].dropna()

getdata[1].drop(getdata[1].head(1).index, inplace=True)
getdata[1] = getdata[1].reset_index(drop=True)
getdata[1].index = getdata[1].iloc[: , 0]
getdata[1].drop(getdata[1].columns[0], axis=1, inplace=True)
for (col1,col2) in zip(getdata[1].columns, getdata[2].columns):
    print (col1,col2)
    getdata[1][col1] = pd.to_numeric(getdata[1][col1], 'coerce')
    getdata[1].rename(columns={col1:col2}, inplace=True)

getdata[1] = getdata[1].dropna()

getdata[3].drop(getdata[3].head(1).index, inplace=True)
getdata[3] = getdata[3].reset_index(drop=True)
getdata[3].index = getdata[3].iloc[: , 0]
getdata[3].drop(getdata[3].columns[0], axis=1, inplace=True)
for col in getdata[3].columns:
    getdata[3][col] = pd.to_numeric(getdata[3][col], 'coerce')

getdata[3] = getdata[3].dropna()

getdata[1] = getdata[1].T
getdata[2] = getdata[2].T
getdata[3] = getdata[3].T

# getdata[1][['權益總計']]
# getdata[3][['本期現金及約當現金增加（減少）數']]
getdata[2] = getdata[2][['營業收入合計','營業毛利（毛損）','營業利益（損失）',
                            '繼續營業單位稅前淨利（淨損）','營業費用合計','本期淨利（淨損）','基本每股盈餘合計']]
getdata[2]['本期現金及約當現金增加（減少）數'] = getdata[3][['本期現金及約當現金增加（減少）數']]
if y==2017 or y==2014 or y==2013:
    getdata[2]['股東權益總額'] = getdata[1]['權益總額']
else:
    getdata[2]['股東權益總額'] = getdata[1]['權益總計']


getdata[2] = getdata[2].T

getdata[2][getdata[2].columns[0]]
    

2017年12月31日 2017年度
2016年12月31日 2016年度


會計項目
營業收入合計              9.831178e+07
營業毛利（毛損）            1.891291e+07
營業利益（損失）            1.396288e+07
繼續營業單位稅前淨利（淨損）      1.382466e+07
營業費用合計              4.950029e+06
本期淨利（淨損）            1.032280e+07
基本每股盈餘合計            2.030000e+00
本期現金及約當現金增加（減少）數   -1.848540e+06
股東權益總額              1.526472e+08
Name: 2017年度, dtype: float64

---
## Obsolete ##

In [3]:
#----------------- （１）評估價值是否被低估？（股票價格不會太貴） -------------
########## 去公開資訊觀測站，把本益比、股價淨值比爬下來 ##########
url = 'http://www.twse.com.tw/exchangeReport/BWIBBU_d?response=json&date=&selectType=&_=' + str(time.time())
print (url)
list_req = requests.get(url)
soup = BeautifulSoup(list_req.content, "html.parser")
getjson=json.loads(soup.text)

# 因為是表格式，用dataframe處理會比較方便
stockdf = pd.DataFrame(getjson['data'],columns=["證券代號","證券名稱","殖利率(%)","股利年度","本益比","股價淨值比","財報年/季"])
PBR = pd.to_numeric(stockdf['股價淨值比'], errors='coerce') < 0.5 # 找到股價淨值比小於0.7的股票
EPS = pd.to_numeric(stockdf['本益比'], errors='coerce') < 15 # 找到本益比小於15的股票
candidate= stockdf[(PBR & EPS)] # 綜合以上兩者，選出兩者皆符合的股票
candidate

http://www.twse.com.tw/exchangeReport/BWIBBU_d?response=json&date=&selectType=&_=1576944476.0300171


Unnamed: 0,證券代號,證券名稱,殖利率(%),股利年度,本益比,股價淨值比,財報年/季
431,2611,志信,5.86,107,14.29,0.48,108/3
468,2841,台開,0.0,107,14.77,0.35,108/3
883,8429,金麗-KY,2.67,107,10.23,0.3,108/3
