In [59]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from lxml import html

headers = {
  'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'  
}

In [60]:
def get_cash_dividend_payouts( stock_id, 
                               year_to_check=5):
    res = requests.get("https://histock.tw/stock/{}/%E9%99%A4%E6%AC%8A%E9%99%A4%E6%81%AF".format(stock_id), headers = headers)
    res.encoding = 'utf-8'
    
    soup = BeautifulSoup(res.text, 'lxml')
    data = soup.select_one("table", class_="tb-outline")

    dfs = pd.read_html(data.prettify())

    df = dfs[0]
    df = df[1:]
    
    cash_dividend_payouts = df["現金股利"][:year_to_check].astype('float')
    return list(cash_dividend_payouts)

In [61]:
def get_ROEs( stock_id, 
              year_to_check=5):
    
    res = requests.get("https://histock.tw/stock/financial.aspx?no={}&t=3&st=2&q=3".format(stock_id), headers = headers)
    res.encoding = 'utf-8'

    soup = BeautifulSoup(res.text, 'lxml')
    data = soup.select_one("table", class_="tb-outline")

    dfs = pd.read_html(data.prettify())

    df = dfs[0]
    ROEs = [float(ROE.strip('%')) for ROE in df["年度ROE"][:year_to_check]]
    return ROEs

In [62]:
def get_PEratios( stock_id,
                  year_to_check=20):
    
    res = requests.get("https://histock.tw/stock/{}/%E6%9C%AC%E7%9B%8A%E6%AF%94".format(stock_id), headers = headers)
    res.encoding = 'utf-8'

    soup = BeautifulSoup(res.text, 'lxml')
    data = soup.select_one("table", class_="tb-outline")

    dfs = pd.read_html(data.prettify())

    df = dfs[0]
    df = df[1:]
    
    PEratio_cols = [col for col in df.columns if '本益比' in col]
    PEratios = []
    for col in PEratio_cols:
        PEratios += list(df[col])

    return PEratios[:year_to_check]

In [63]:
def get_prev_net_val_per_share( stock_id, 
                                prev_year="2019"):

    res = requests.get("https://histock.tw/stock/{}/%E6%AF%8F%E8%82%A1%E6%B7%A8%E5%80%BC".format(stock_id), headers = headers)
    res.encoding = 'utf-8'

    soup = BeautifulSoup(res.text, 'lxml')
    data = soup.select_one("table", class_="tb-outline")
    dfs = pd.read_html(data.prettify())

    df = dfs[0]

    prev_net_val_per_share = df.iloc[3][prev_year]
    return prev_net_val_per_share

In [64]:
def get_EPSs( stock_id, 
              year_to_check=5):
    
    res = requests.get("https://histock.tw/stock/{}/%E9%99%A4%E6%AC%8A%E9%99%A4%E6%81%AF".format(stock_id), headers = headers)
    res.encoding = 'utf-8'
    
    soup = BeautifulSoup(res.text, 'lxml')
    data = soup.select_one("table", class_="tb-outline")

    dfs = pd.read_html(data.prettify())

    df = dfs[0]
    df = df[1:]

    EPSs = df["EPS"][:year_to_check].astype('float')
    return list(EPSs)

In [65]:
def getCompanyAttrs(stock_id):
    res = requests.get("https://histock.tw/stock/financial.aspx?no={}".format(stock_id), headers = headers)
    res.encoding = 'utf-8'
    tree = html.fromstring(res.text)
    company_name = tree.xpath('//*[@id="form1"]/div[4]/div[4]/div/div[1]/div[2]/div[1]/div/h3/a')[0].text

    cur_price = tree.xpath('//*[@id="CPHB1_Price1_lbTPrice"]/a/span')[0].text

    volume = tree.xpath('//*[@id="CPHB1_Price1_lbTVolume"]/span')[0].text
    attr = tree.xpath('//*[@id="CPHB1_Price1_lbStockClass"]')[0].text.split(' ')
    stock_type = attr[0]
    industry = attr[1]
    
    return company_name, cur_price, volume, stock_type, industry

In [69]:
def getBuyPrice(cur_dividend_payout, 
                sell_price, 
                ror, 
                year_to_hold,
                display_log=False):
    
    buy_price = 0
    for year in range(1, year_to_hold+1):
        if year == year_to_hold:
            buy_price += (cur_dividend_payout + sell_price) / ((1 + ror) ** year )
        else: 
            buy_price += cur_dividend_payout / ((1 + ror) ** year )
        
        if display_log:
            print ("year {} => buy_price: {:.2f}".format(year, buy_price))
    
    if display_log:
        print ("buy_price: {:.2f}\n".format(buy_price))
    
    return buy_price

In [74]:
def analyzeStock(stock_id,    
                 year_to_hold = 8,
                 prev_year="2019",
                 max_PEratio=12, 
                 verbose = 0 ):
                 
    cash_dividend_payouts = get_cash_dividend_payouts( stock_id, year_to_check=5 )
    EPSs = get_EPSs( stock_id, year_to_check=5 )
    ROEs = get_ROEs( stock_id, year_to_check=5 )
    PEratios = get_PEratios( stock_id, year_to_check=20 )
    prev_net_val_per_share = get_prev_net_val_per_share( stock_id, prev_year )
    dividend_payout_ratios = np.array(cash_dividend_payouts) / np.array(EPSs)
    
    exptect_ROE = np.mean(ROEs) / 100 
    exptect_dividend_payout_ratio = np.mean(dividend_payout_ratios) 

    
    prev_EPS = EPSs[-1]
    prev_dividend_payout = cash_dividend_payouts[-1]
    cur_net_val_per_share = prev_net_val_per_share + (prev_EPS - prev_dividend_payout)

    cur_EPS = exptect_ROE * cur_net_val_per_share

    cur_dividend_payout = cur_EPS * exptect_dividend_payout_ratio

    EPS_8th = exptect_ROE * prev_net_val_per_share

    expect_PEratio = np.mean(PEratios)
    expect_PEratio = min( expect_PEratio, max_PEratio )

    sell_price = EPS_8th * expect_PEratio
    
    if verbose >= 1:
        print ('{:>36}  {}'.format("stock id", stock_id ))
        print ('{:>36}  {:>4.2f} (year)'.format("exptect_ROE", exptect_ROE ))
        print ('{:>36}  {:>4.2f}'.format("exptect_dividend_payout_ratio", exptect_dividend_payout_ratio ))
        print ('{:>36}  {:>4.2f}'.format("cur_net_val_per_share (accumulated)", cur_net_val_per_share ))
        print ('{:>36}  {:>4.2f}'.format("cur_EPS", cur_EPS ))
        print ('{:>36}  {:>4.2f}'.format("cur_dividend_payout", cur_dividend_payout ))
        print ('{:>36}  {:>4.2f}'.format("EPS_8th", EPS_8th ))
        print ('{:>36}  {:>4.2f}'.format("expect_PEratio", expect_PEratio ))
        print ('{:>36}  {:>4.2f}\n'.format("sell_price", sell_price ))    
    
    
    buy_price_ror10 = getBuyPrice( cur_dividend_payout, sell_price, 0.10, year_to_hold, display_log=verboe>=2 )
    buy_price_ror15 = getBuyPrice( cur_dividend_payout, sell_price, 0.15, year_to_hold, display_log=verboe>=2 )

    company_name, cur_price, volume, stock_type, industry = getCompanyAttrs(stock_id)
    
    return buy_price_ror10, buy_price_ror15, sell_price, company_name, cur_price, volume, stock_type, industry

In [75]:
buy_prices_ror10 = []
buy_prices_ror15 = []
sell_prices = []
company_names = []
cur_prices = []
volumes = []
stock_types = []
industrys = []
stock_ids = [2904, 3567]

for stock_id in stock_ids:
    
    try:
        buy_price_ror10, buy_price_ror15, sell_price, company_name, cur_price, volume, stock_type, industry = \
            analyzeStock(stock_id, verbose=0)

        buy_prices_ror10.append(buy_price_ror10)
        buy_prices_ror15.append(buy_price_ror15)
        sell_prices.append(sell_price)
        company_names.append(company_name)
        cur_prices.append(cur_price)
        volumes.append(volume)
        stock_types.append(stock_type)
        industrys.append(industry)
    except:
        print ("fail stock id:", stock_id)

fail stock id: 2904
fail stock id: 3567


In [76]:
pd.options.display.float_format = "{:,.2f}".format
data = {
    "id": stock_ids,
    "公司": company_names,
    "種類": stock_types,
    "產業": industrys,
    "成交量": volumes,
    "買入價 (ror=10%)": buy_prices_ror10,
    "買入價 (ror=15%)": buy_prices_ror15,
    "賣出價": sell_prices,
    "現價": cur_prices,
}
df = pd.DataFrame(data) 
df

ValueError: arrays must all be same length