`# Import dependencies`

In [1]:
import datetime as dt
import pandas as pd

import concurrent.futures as cf
from yahoofinancials import YahooFinancials

import re
import ast
import time
import requests
from bs4 import BeautifulSoup

# import numpy as np
# import yfinance as yf
# from pandas_datareader import data as pdr
# from scipy import stats

`# Get list of stocks we want to narrow down`

In [2]:
asx_200 = 'https://www.asx200list.com/'
all_ords = 'https://www.allordslist.com/'
small_ords = 'https://www.smallordslist.com/'

`# Webscraping list of stocks`

In [3]:
header = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36',
}

In [4]:
ASX200, ALLORDS, SMALLORDS = [],[],[]
asx_list = [ASX200, ALLORDS, SMALLORDS]
for index, url in enumerate([asx_200, all_ords, small_ords]):
    res = requests.get(url, headers=header)
    soup = BeautifulSoup(res.text, 'html.parser')
    divs = soup.findAll('table', class_='tableizer-table sortable')[0].findAll('tbody')
    for i, val in enumerate(divs[0]):
        if len(val) > 1:
            text = re.sub(r"[<trd>]","", str(val))
            text = text.split('/')
            asx_list[index].append(text[0])
            
print('ASX200', ASX200)
print('SMALLORDS', SMALLORDS)
print('ALLORDS', ALLORDS)

ASX200 ['A2M', 'AAA', 'ABC', 'ABP', 'AFI', 'AGL', 'AIA', 'ALD', 'ALL', 'ALQ', 'ALU', 'ALX', 'AMC', 'AMP', 'ANN', 'ANZ', 'APA', 'APE', 'APT', 'APX', 'ARB', 'ARG', 'AST', 'ASX', 'AWC', 'AZJ', 'BAP', 'BEN', 'BGA', 'BHP', 'BIN', 'BKW', 'BLD', 'BOQ', 'BPT', 'BRG', 'BSL', 'BWP', 'BXB', 'CAR', 'CBA', 'CCL', 'CCP', 'CDA', 'CGF', 'CHC', 'CHN', 'CIA', 'CIM', 'CLW', 'CMW', 'CNU', 'COH', 'COL', 'CPU', 'CQR', 'CSL', 'CSR', 'CTD', 'CWN', 'CWY', 'DEG', 'DHG', 'DMP', 'DOW', 'DRR', 'DXS', 'EBO', 'ELD', 'EML', 'EVN', 'EVT', 'FBU', 'FLT', 'FMG', 'FPH', 'GMG', 'GNE', 'GOZ', 'GPT', 'GXY', 'HLS', 'HVN', 'IAG', 'IEL', 'IFL', 'IFT', 'IGO', 'ILU', 'IOO', 'IOZ', 'IPL', 'IRE', 'IVV', 'JBH', 'JHX', 'LFG', 'LFS', 'LLC', 'LNK', 'LYC', 'MCY', 'MEZ', 'MFG', 'MGF', 'MGOC', 'MGR', 'MIN', 'MLT', 'MP1', 'MPL', 'MQG', 'MTS', 'NAB', 'NCM', 'NEC', 'NHF', 'NIC', 'NSR', 'NST', 'NUF', 'NWL', 'NXT', 'ORA', 'ORE', 'ORG', 'ORI', 'OSH', 'OZL', 'PBH', 'PDL', 'PLS', 'PME', 'PMGOLD', 'PMV', 'PNI', 'PNV', 'PPT', 'PTM', 'QAN', 'QBE', '

`# Check for duplicates`

In [5]:
stockList = ASX200
stocks = [stock + '.AX' for stock in stockList]
stocks_set = set(stocks)
contains_duplicates = len(stocks_set) != len(stocks)
contains_duplicates = any(stocks.count(stock) > 1 for stock in stockList)
print(len(stocks_set), len(stocks), contains_duplicates)

200 200 False


`# Use Yahoo Financials to retrieve FY data`

In [6]:
balanceSheet = {}
incomeStatement = {}
cashStatement = {}
def retrieve_stock_data(stock):
    try:
        print(stock)
        yahoo_financials = YahooFinancials(stock)
        balance_sheet_data = yahoo_financials.get_financial_stmts('annual', 'balance')
        income_statement_data = yahoo_financials.get_financial_stmts('annual', 'income')
        cash_statement_data = yahoo_financials.get_financial_stmts('annual', 'cash')
        balanceSheet[stock] = balance_sheet_data['balanceSheetHistory'][stock]
        incomeStatement[stock] = income_statement_data['incomeStatementHistory'][stock]
        cashStatement[stock] = cash_statement_data['cashflowStatementHistory'][stock]
    except:
        print('error with retrieving stock data')

`# Multithreading - I/O limited with API calls`

In [None]:
start = time.time()
executor = cf.ThreadPoolExecutor(16)
futures = [executor.submit(retrieve_stock_data, stock) for stock in stocks]
cf.wait(futures)
end = time.time()
print('  time taken {:.2f} s'.format(end-start))

A2M.AX
AAA.AX
ABC.AX
ABP.AX
AFI.AX
AGL.AX
AIA.AX
ALD.AX
ALL.AX
ALQ.AX
ALU.AX
ALX.AX
AMC.AX
AMP.AX
ANN.AX
ANZ.AX


`# Open data from files`

In [None]:
with open('balanceSheet.txt', 'r') as input:
    balanceSheet = ast.literal_eval(input.read())

with open('incomeStatement.txt', 'r') as input:
    incomeStatement = ast.literal_eval(input.read())

`# Evaluate ROE & EPS Growth`

In [None]:
roe_dict, epsg_dict = {}, {}
count_missing, count_cond, count_eps_0 = 0, 0, 0
for (keyB, valB), (keyI, valI) in zip(balanceSheet.items(), incomeStatement.items()):
    try:
        if keyB == keyI:
            yearsI = [k for year in valI for k, v in year.items()]
            yearsB = [k for year in valB for k, v in year.items()]
            if yearsI == yearsB:
                count_cond += 1
                equity = [v['totalStockholderEquity'] for year in valB for k, v in year.items()]
                commonStock = [v['commonStock'] for year in valB for k, v in year.items()]

                profit = [v['grossProfit'] for year in valI for k, v in year.items()]
                revenue = [v['totalRevenue'] for year in valI for k, v in year.items()]
                netIncome = [v['netIncome'] for year in valI for k, v in year.items()]

                roe = [round(netin/equity*100,2) for netin, equity in zip(netIncome, equity)]
                roe_dict[keyB] = (round(sum(roe)/len(roe),2), roe)

                eps = [round(earn/stono,2) for earn, stono in zip(profit, commonStock)]
                
                try:
                    epsg = []
                    for ep in range(len(eps)):
                        if ep == 0:
                            continue
                        elif ep == 1:
                            epsg.append(round(100*((eps[ep-1]/eps[ep])-1),2))
                        elif ep == 2:
                            epsg.append(round(100*((eps[ep-2]/eps[ep])**(1/2)-1),2))
                            epsg.append(round(100*((eps[ep-1]/eps[ep])-1),2))
                        elif ep == 3:
                            epsg.append(round(100*((eps[ep-3]/eps[ep])**(1/3)-1),2))
                            epsg.append(round(100*((eps[ep-1]/eps[ep])-1),2))
                        else:
                            print('More than 4 years of FY data')
                        
                    epsg_dict[keyB] = (round(sum(epsg)/len(epsg),2), epsg)
                except:
#                     print(keyB, 'eps contains 0')
                    count_eps_0  += 1
                    epsg_dict[keyB] = (0, eps)

    except:
#         print(keyB, 'data missing')
        count_missing += 1

print('Yearly data avail',count_cond, 'out of', len(balanceSheet))
print('Some key data missing', count_missing, 'out of', len(balanceSheet))
print('EPS Growth NaN', count_eps_0, 'out of', len(balanceSheet))

`# Apply conditions on ROE & EPS Growth`

In [None]:
ROE_req = 10
EPSG_req = 10

print('-'*50, 'RETURN ON EQUITY','-'*50)
roe_crit = {k:v for (k,v) in roe_dict.items() if v[0] >= ROE_req and sum(n < 0 for n in v[1])==0}
# print(roe_crit)
print('-'*50, 'EARNINGS PER SHARE GROWTH','-'*50)
eps_crit = {k:v for (k,v) in epsg_dict.items() if v[0] >= EPSG_req and sum(n < 0 for n in v[1])==0}
# print(eps_crit)

print('-'*50, 'ROE & EPS Growth Critera','-'*50)
both = [key1 for key1 in roe_crit.keys() for key2 in eps_crit.keys() if key2==key1]
print(both)