In [None]:
import datetime as dt 
import pandas as pd
import concurrent.futures as cf
from yahoofinancials import YahooFinancials
import re
import ast
import time
import requests
from bs4 import BeautifulSoup

In [None]:
asxComp = pd.read_csv('DJI.csv')
stockList = asxComp.loc[:,'Symbol'].to_list()
# len(stockList)
stocks = [stock for stock in stockList]
stocks_set = set(stocks)
contains_duplicates = len(stocks_set) != len(stocks)
contains_duplicates = any(stocks.count(stock) > 1 for stock in stockList)
print(len(stocks_set), len(stocks), contains_duplicates)

In [None]:
balanceSheet = {}
incomeStatement = {}
cashStatement = {}

for stock in stocks:
    print(stock)
    start = time.time()
    yahoo_financials = YahooFinancials(stock)
    balance_sheet_data = yahoo_financials.get_financial_stmts('annual', 'balance')
    income_statement_data = yahoo_financials.get_financial_stmts('annual', 'income')
    cash_statement_data = yahoo_financials.get_financial_stmts('annual', 'cash')
    
    print(balance_sheet_data)
    
    balanceSheet[stock] = balance_sheet_data['balanceSheetHistory'][stock]
    incomeStatement[stock] = income_statement_data['incomeStatementHistory'][stock]
    cashStatement[stock] = cash_statement_data['cashflowStatementHistory'][stock]
    
    end = time.time()
    print('Time taken: {:.2f} s'.format(end - start))
    break

In [None]:
balanceSheet = {}
incomeStatement = {}
cashStatement = {}
def retrieve_stock_data(stock):
    try:
        print(stock)
        yahoo_financials = YahooFinancials(stock)
        balance_sheet_data = yahoo_financials.get_financial_stmts('annual', 'balance')
        income_statement_data = yahoo_financials.get_financial_stmts('annual', 'income')
        cash_statement_data = yahoo_financials.get_financial_stmts('annual', 'cash')
        balanceSheet[stock] = balance_sheet_data['balanceSheetHistory'][stock]
        incomeStatement[stock] = income_statement_data['incomeStatementHistory'][stock]
        cashStatement[stock] = cash_statement_data['cashflowStatementHistory'][stock]
    except:
        print('error with retrieving stock data')

In [None]:
start = time.time()
executor = cf.ThreadPoolExecutor(16)
futures = [executor.submit(retrieve_stock_data, stock) for stock in stocks]
cf.wait(futures)
end = time.time()
print('  time taken {:.2f} s'.format(end-start))

In [None]:
with open('balanceSheet.txt', 'w') as output:
    output.write(str(balanceSheet))
with open('incomeStatement.txt', 'w') as output:
    output.write(str(incomeStatement))
with open('cashStatement.txt', 'w') as output:
    output.write(str(cashStatement))

In [None]:
with open('balanceSheet.txt', 'r') as input:
    balanceSheet = ast.literal_eval(input.read())

with open('incomeStatement.txt', 'r') as input:
    incomeStatement = ast.literal_eval(input.read())

In [None]:
roe_dict, epsg_dict = {}, {}
count_missing, count_cond, count_eps_0 = 0, 0, 0
for (keyB, valB), (keyI, valI) in zip(balanceSheet.items(), incomeStatement.items()):
    try:
        if keyB == keyI:
            yearsI = [k for year in valI for k, v in year.items()]
            yearsB = [k for year in valB for k, v in year.items()]
            if yearsI == yearsB:
                count_cond += 1
                equity = [v['totalStockholderEquity'] for year in valB for k, v in year.items()]
                commonStock = [v['commonStock'] for year in valB for k, v in year.items()]

                profit = [v['grossProfit'] for year in valI for k, v in year.items()]
                revenue = [v['totalRevenue'] for year in valI for k, v in year.items()]
                netIncome = [v['netIncome'] for year in valI for k, v in year.items()]

                roe = [round(netin/equity*100,2) for netin, equity in zip(netIncome, equity)]
                roe_dict[keyB] = (round(sum(roe)/len(roe),2), roe)

                eps = [round(earn/stono,2) for earn, stono in zip(profit, commonStock)]
                
                try:
                    epsg = []
                    for ep in range(len(eps)):
                        if ep == 0:
                            continue
                        elif ep == 1:
                            epsg.append(round(100*((eps[ep-1]/eps[ep])-1),2))
                        elif ep == 2:
                            epsg.append(round(100*((eps[ep-2]/eps[ep])**(1/2)-1),2))
                            epsg.append(round(100*((eps[ep-1]/eps[ep])-1),2))
                        elif ep == 3:
                            epsg.append(round(100*((eps[ep-3]/eps[ep])**(1/3)-1),2))
                            epsg.append(round(100*((eps[ep-1]/eps[ep])-1),2))
                        else:
                            print('More than 4 years of FY data')
                        
                    epsg_dict[keyB] = (round(sum(epsg)/len(epsg),2), epsg)
                except:
#                     print(keyB, 'eps contains 0')
                    count_eps_0 += 1
                    epsg_dict[keyB] = (0, eps)

    except:
#         print(keyB, 'data missing')
        count_missing += 1

print('Yearly data avail',count_cond, 'out of', len(balanceSheet))
print('Some key data missing', count_missing, 'out of', len(balanceSheet))
print('EPS Growth NaN', count_eps_0, 'out of', len(balanceSheet))

In [None]:
ROE_req = 10
EPSG_req = 10

print('-'*50, 'RETURN ON EQUITY','-'*50)
roe_crit = {k:v for (k,v) in roe_dict.items() if v[0] >= ROE_req and sum(n < 0 for n in v[1])==0}
# print(roe_crit)
print('-'*50, 'EARNINGS PER SHARE GROWTH','-'*50)
eps_crit = {k:v for (k,v) in epsg_dict.items() if v[0] >= EPSG_req and sum(n < 0 for n in v[1])==0}
# print(eps_crit)

print('-'*50, 'ROE & EPS Growth Critera','-'*50)
both = [key1 for key1 in roe_crit.keys() for key2 in eps_crit.keys() if key2==key1]
print(both)