### Using yfinance
1. [ X ] check each ticker for divendeds `has_dividends`
1. [ X ] filter out non-dividened stocks `has_dividends` > 0
1. [ X ] check each ticker (out of filtered list) for Positive TTM Free-cashflow `get_free_cashflow`
1. [ X ] filter out stocks with negative TTM Free-cashflow `get_free_cashflow` > 0
1. [ X ] check each ticker (out of filtered list) for Market Cap <= NCAV `calculate_ncav`
1. [ X ] filter out stocks without Market Cap <= NCAV `calculate_ncav` >= market cap
1. [ X ] check each ticker (out of filtered list) for net-debt `get_net_debt`
1. [ X ] filter out stocks without zero or negative net-debt `get_net_debt` <= 0 
1. [ ] check tickers HQ location
1. [ ] filter out non-whitelisted countries

### Remaining Calculations
1. [ ] Average 5Y annual Free Cash Flow yield at 10% or more
1. [ ] Get tickers full company name

In [1]:
import yfinance as yf
import pandas as pd
import json
from datetime import datetime
import os
from dotenv import load_dotenv

load_dotenv()
pd.set_option('display.float_format', lambda x: '%.3f' % x)

def read_json_file(file_path) ->list[str]:
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

def write_json_file(data, file_path):
    with open(file_path, 'w') as file:
        json.dump(data, file, indent=2)

def has_dividends(ticker: yf.Ticker) -> bool:
    return ticker.dividends.sum() > 0

def get_net_debt(ticker: yf.Ticker)->int:
    return ticker.quarterly_balance_sheet.loc['Net Debt'][0]

def get_total_liabilities(ticker: yf.Ticker)->int:
    try:
        return ticker.quarterly_balance_sheet.loc['Current Liabilities'][0]
    except:
        return -1

def get_total_debt(ticker: yf.Ticker)->int:
    try:
        return ticker.info['totalDebt']
    except:
        try:
            return ticker.quarterly_balance_sheet.loc["Total Debt"][0]
        except KeyError as k:
            return -1

def get_current_assets(ticker: yf.Ticker)->int:
    return ticker.quarterly_balance_sheet.loc['Total Assets'][0]

def get_free_cashflow(ticker: yf.Ticker)->int:
    return ticker.quarterly_cashflow.loc['Free Cash Flow'][0]

def get_net_debt(ticker: yf.Ticker) -> int:
    return ticker.quarterly_balance_sheet.loc['Net Debt'][0]

def calculate_ncav(ticker: yf.Ticker):
    '''
    Attempts two methods of calculating NCAV from yFinance API. If niether method works, returns -1. This would ensure the ticker would be removed from consideration.
    (1) uses `get_total_liabilities()` & `get_current_assets()` [both from `yf.Ticker.quarterly_balance_sheet` dataframe].
    (2) uses `get_total_debt()` & `get_current_assets()` [former uses `yf.Ticker().info`; ladder uses `yf.Ticker().quarterly_balance_sheet` dataframe].
    TODO: reduce api calls to one.
    TODO: check that `get_total_liabilities` == `get_total_debt`.
    TODO: determine better method for innermost Exception handling.
    '''
    try:
        total_liabilities = ticker.quarterly_balance_sheet.loc['Total Liabilities Net Minority Interest'][0]
        current_assets = ticker.quarterly_balance_sheet.loc['Current Assets'][0]
        return current_assets - total_liabilities
    except KeyError or TypeError as k:
        return -1 # for now
        # try:
        #     debt = get_total_debt(ticker)
        #     current_assets = get_current_assets(ticker)
        #     return current_assets - debt
        # except KeyError or TypeError as ke:
        #     return -1 # could use another function to find values elsewhere.
            
def get_country(ticker: yf.Ticker) -> str:
    return ticker.info['country']

def is_whitelist_nation(country: str) -> bool:
    countries = ['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic',
    'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary',
    'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands',
    'Poland', 'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden',
    'USA', 'United States', 'Canada', 'United Kingdom', 'UK', 'Japan', 'Australia']
    return country in countries

def convert_cik_list(file_path: str) -> dict:
    result_dict = {}
    
    with open(file_path, 'r') as file:
        for line in file:
            parts = line.strip().split(':')
            if len(parts) == 2:
                ticker = parts[0].strip().upper()
                value = int(parts[1].strip())
                result_dict[ticker] = value
    
    return result_dict

def create_dataframe(data_dict: dict) -> pd.DataFrame:
    df = pd.DataFrame.from_dict(data_dict, orient='index')
    return df

def driver(data_arr: list[str], debug: bool = False) -> dict[str: dict]:
    start_time = datetime.now()
    ret_dict = {i:{"Has Dividends": bool, "Net Debt": bool, "Positive Free Cashflow": bool, "Market Cap <= NCAV": bool, "HQ Country": str} for i in data_arr}
    removal_matrix = [[],[],[],[], []]
    if debug:
        print(f"{len(ret_dict)} Tickers to be screened.")
    
    for key, val in ret_dict.items(): # check "Has Dividends"
        try:
            ticker = yf.Ticker(key)
            ticker_has_dividends = has_dividends(ticker)
            if ticker_has_dividends:
               val["Has Dividends"] = True
            else:
                removal_matrix[0].append(key)
        except KeyError or TypeError as k:
            removal_matrix[0].append(key) 
    
    print(f"Time to check dividends: {datetime.now() - start_time}") # average time to check dividends: 0:20:24 – 0:21:55 (hh:mm:ss).

    for i in removal_matrix[0]: # remove Ticker symbols that don't pay dividends
        ret_dict.pop(i)

    if debug:
        print(f"{len(ret_dict)} Tickers to be screened.")
    start_market_cap = datetime.now()
    
    for key, val in ret_dict.items(): # check "Market Cap <= NCAV"
        try:
            ticker = yf.Ticker(key)
            market_cap = ticker.info['marketCap']
            is_positive_ncav = market_cap <= calculate_ncav(ticker)
            if is_positive_ncav:
                val["Market Cap <= NCAV"] = True
            else:
                removal_matrix[1].append(key) # remove tickers with Market Cap > NCAV
        except KeyError or ValueError as k:
            removal_matrix[1].append(key) 
    print(f"Time to check Market Cap <= NCAV: {datetime.now() - start_market_cap}")

    for i in removal_matrix[1]: # remove tickers with Market Cap > NCAV
        ret_dict.pop(i)

    if debug:
        print(f"{len(ret_dict)} Tickers to be screened.")
    start_ttm = datetime.now()
    
    for key, val in ret_dict.items(): # check "Positive Free Cashflow"
        try:
            ticker = yf.Ticker(key)
            has_positive_ttm = get_free_cashflow(ticker) > 0
            if has_positive_ttm:
                val["Positive Free Cashflow"] = True
            else:
                removal_matrix[2].append(key) 
        except KeyError as k:
            removal_matrix[2].append(key) 
    print(f"Time to check Positive Free Cashflow: {datetime.now() - start_ttm}")
    
    for i in removal_matrix[2]: # remove tickers with negative TTM (i.e., Positive Free Cashflow)
        ret_dict.pop(i)

    if debug:
        print(f"{len(ret_dict)} Tickers to be screened.")
    start_net_debt = datetime.now()

    for key, val in ret_dict.items(): # check "Net Debt".
        try:
            ticker = yf.Ticker(key)
            has_net_debt = get_net_debt(ticker) > 0
            if has_net_debt:
                val["Net Debt"] = True
            else:
                removal_matrix[3].append(key)
        except:
            try:
                has_net_debt = get_total_debt(ticker)
                if has_net_debt:
                    val["Net Debt"] = True
                else:
                    removal_matrix[3].append(key)
            except:
                removal_matrix[3].append(key)
    print(f"Time to check Net Debt: {datetime.now() - start_net_debt}")

    for i in removal_matrix[3]: # remove tickers without Positive "Net Debt".
        ret_dict.pop(i)
    adding_hq = datetime.now()
    
    for key, val in ret_dict.items(): # assuming all tickers have a country.
        ticker = yf.Ticker(key)
        country = get_country(ticker)
        if is_whitelist_nation(country): # remove non-whitelisted tickers
            val["HQ Country"] = country
        else:
            removal_matrix[4] = key
    print(f"Time to check HQs': {datetime.now() - adding_hq}")

    for i in removal_matrix[4]:
        if len(i) != 1:
            ret_dict.pop(i)

    cik_dict = convert_cik_list('./cik.txt')
    for key, val in ret_dict.items():
        val["CIK"] = cik_dict.get(key, "NONE")
    
    print(f"{len(ret_dict)} tickers remaining. {len(removal_matrix[0])+len(removal_matrix[1])+len(removal_matrix[2])+len(removal_matrix[3])} stocks screened out")
    print(f"tickers removed: {removal_matrix}")
    
    return ret_dict


data = read_json_file('./tickers.json')
data2 = read_json_file('./new_tickers.json')
cik_dict = convert_cik_list('./cik.txt')

  _empty_series = pd.Series()


In [None]:
market_caps = [0] * len(data)
tickers_to_remove = []

for i in range(len(data)-1):
    try:
        t = yf.Ticker(data[i])
        market_caps[i] = t.info['marketCap']
    except KeyError as k:
        print(f"Key error occurred when looking for market cap for ticker: {data[i]}")
        tickers_to_remove.append(data[i])


In [None]:
new_data = [i for i in data if i not in tickers_to_remove]
len(new_data)

with open("new_tickers.json", 'w') as file:
    json.dump(data, file, indent=2)

In [4]:
d = {i:bool for i in data2}
missing = []

for i in data2:
    try:
        t = yf.Ticker(i)
        ncav = calculate_ncav(t)
        market_cap = t.info['marketCap']
        d[i] = market_cap <= ncav
    except KeyError as ke:
        missing.append(i)

In [None]:
dd = {i:bool for i in missing}
still_missing = []
for i in missing:
    try:
        t = yf.Ticker(i)
        debt = get_total_debt(t)
        current_assets = get_current_assets(t)
        market_cap = t.info['marketCap']
        dd[i] = market_cap <= (current_assets - debt)
    except KeyError as k:
        still_missing.append(i)
len(still_missing)

In [None]:
fin = [i for i in data2 if i not in still_missing]
has_dividends_dict = {i:bool for i in fin}
for i in fin:
    try:
        t = yf.Ticker(i)
        has_dividends_dict[i] = has_dividends(t)
    except KeyError as k:
        print(f'error with {i}')

In [48]:
no_ttm = []
ttm = {i:0 for i in data2}

for i in data2:
    try:
        t = yf.Ticker(i)
        ttm[i] = get_free_cashflow(t)
    except KeyError as k:
        print(f'error with {i}')
        no_ttm.append(i)
print(len(no_ttm))

error with SMFG
error with DB
error with HGAS
error with IGIC
error with EVAX
error with LILM
error with SAI
error with APLM
error with TYG
error with CANF
error with DRD
error with IREN
error with PRZO
error with PHG
error with WAVE
error with AVAL
error with ING
error with WDS
error with IMOS
error with GRFS
error with BLTE
error with IHG
error with TURN
error with GFI
error with BAP
error with CREV
error with VCIG
error with BTDR
error with SNT
error with CACO
error with RIO
error with IVA
error with NCNC
error with CIB
error with WRNT
error with ORAN
error with MOB
error with SBSW
error with CAMT
error with SNN
error with MTEK
error with MTR
error with ADD
error with NLSP
error with VOC
error with ARVL
error with VEON
error with MSDL
error with SHLT
error with ARQQ
error with LGHL
error with MFG
error with NVNI
error with NBTX
error with GNFT
error with DEC
error with GYRO
error with OKYO
error with APM
error with SKIL
error with ALAR
error with CZOO
error with CYTO
error with BUD


In [17]:
partially_screened = driver(data2, debug= True)

5250 Tickers to be screened.
Time to check dividends: 0:22:53.417085
2312 Tickers to be screened.
Time to check Market Cap <= NCAV: 0:18:24.658967
31 Tickers to be screened.
Time to check Positive Free Cashflow: 0:00:06.811184
12 Tickers to be screened.
Time to check Net Debt: 0:00:04.842178
Time to check HQs': 0:00:00.909184
11 tickers remaining. 5239 stocks screened out
tickers removed: [['CLBT', 'AZUL', 'THRD', 'GBIO', 'SMFG', 'PFIE', 'LGMK', 'ORGS', 'CERS', 'XLO', 'AMWL', 'LBRDK', 'TBBK', 'DRRX', 'DNLI', 'FTRE', 'GTLS', 'PHUN', 'SPT', 'LTRY', 'PRLD', 'PPIH', 'BROS', 'MGAM', 'VISL', 'CPSS', 'ALLR', 'IPX', 'AMBI', 'NR', 'BKYI', 'FLL', 'THAR', 'ONTX', 'IDEX', 'CDTX', 'BCAL', 'NTLA', 'SGMO', 'MRKR', 'CLH', 'WVVI', 'EFOI', 'ARDX', 'ALT', 'HGAS', 'ANTX', 'SENS', 'RRGB', 'EVAX', 'AMPE', 'AVXL', 'LILM', 'VINO', 'OCTO', 'MTTR', 'SAI', 'MXCT', 'BDTX', 'SNCY', 'ESLA', 'APLM', 'SMMT', 'CMPS', 'DLO', 'TGL', 'NU', 'HOOD', 'NCDL', 'SNES', 'CDAY', 'EGAN', 'MYTE', 'ADOC', 'FLNC', 'NAII', 'CRMD', 'D

In [18]:
df = create_dataframe(partially_screened)
df

Unnamed: 0,Has Dividends,Net Debt,Positive Free Cashflow,Market Cap <= NCAV,HQ Country,CIK
MTBL,True,True,True,True,United States,NONE
LGL,True,True,True,True,United States,61004
FBRT,True,True,True,True,United States,1562528
EQC,True,True,True,True,United States,803649
LDWY,True,True,True,True,United States,NONE
STCN,True,True,True,True,United States,914712
JRSH,True,True,True,True,United States,1696558
IOR,True,True,True,True,United States,949961
ACTG,True,True,True,True,United States,934549
HURC,True,True,True,True,United States,315374


In [19]:
df.to_excel("full.xlsx", index=True)

In [20]:
ind = df.index.to_list()
partially_screened_sum = len(ind)
partially_screened_sum
partially_screened_also_in_cik = [i for i in ind if i in cik_dict.keys()]
len(partially_screened_also_in_cik)

9

## try to get FCF from EDGAR docs

In [None]:
ttms = [1123000,472000,72000]
average = sum(ttms)/3
average_three_year_fcf = round(average*1000, 2)
market_capitalization = 4693000000
(average_three_year_fcf / market_capitalization) * 100

In [None]:
# Calculate Score
# cash + average_three_year_fcf 

market_capitalization/(yf.Ticker("FNB").get_cashflow().loc['EndCashPosition'][0] + average_three_year_fcf)

In [10]:
yf.Ticker("FNB").quarterly_balance_sheet

Unnamed: 0,2023-09-30,2023-06-30,2023-03-31,2022-12-31
Treasury Shares Number,16110462.0,16114836.0,14575547.0,14437135.0
Preferred Shares Number,4435080.0,4435080.0,4435080.0,4435080.0
Ordinary Shares Number,358828542.0,358820568.0,360359857.0,360470110.0
Share Issued,374939004.0,374935404.0,374935404.0,374907245.0
Net Debt,2108000000.0,2391000000.0,1294000000.0,474000000.0
Total Debt,3745000000.0,4095000000.0,3017000000.0,2148000000.0
Tangible Book Value,3236000000.0,3155000000.0,3120000000.0,2980000000.0
Invested Capital,9532000000.0,9806000000.0,8698000000.0,7694000000.0
Net Tangible Assets,3343000000.0,3262000000.0,3227000000.0,3087000000.0
Common Stock Equity,5787000000.0,5711000000.0,5681000000.0,5546000000.0


In [23]:
data = read_json_file("./tickers_tradingview.json")

24

In [2]:
tv = read_json_file('./tv_extract.json')

clean = {'Japan': '.T',
 'Canada': '.TO',
 'Austria': '.VI',
 'Belgium': '.BR',
 'Estonia': '.TL',
 'France': '.PA',
 'Germany': '.DE',
 'Greece': '.AT',
 'Hungary': '.BD',
 'Italy': '.MI',
 'Latvia': '.RG',
 'Lithuania': '.VS',
 'Netherlands': '.AS',
 'Poland': '.WS',
 'Portugal': '.LS',
 'Romania': '.RO',
 'Finland': '.HE',
 'Spain': '.MC',
 'Sweden': '.ST',
 'Switzerland': '.SW',
 'United Kingdom': '.L',
 'New Zealand': '.NZ',
 'Czech Republic': '.PR',
 'USA': ''}

for k, v in tv.items():
    tv[k] = [i.split(':')[1]+clean[k] for i in tv[k]]


all_data = [item for sublist in tv.values() for item in sublist]
driva = driver(all_data, debug= True)


22657 Tickers to be screened.


9942.T: No timezone found, symbol may be delisted
  return pd.Series()
1869.T: No timezone found, symbol may be delisted
  return pd.Series()
9035.T: No timezone found, symbol may be delisted
  return pd.Series()
8228.T: No timezone found, symbol may be delisted
  return pd.Series()
2902.T: No timezone found, symbol may be delisted
  return pd.Series()
3830.T: No timezone found, symbol may be delisted
  return pd.Series()
9359.T: No timezone found, symbol may be delisted
  return pd.Series()
9402.T: No timezone found, symbol may be delisted
  return pd.Series()
6076.T: No timezone found, symbol may be delisted
  return pd.Series()
9027.T: No timezone found, symbol may be delisted
  return pd.Series()
3055.T: No timezone found, symbol may be delisted
  return pd.Series()
3504.T: No timezone found, symbol may be delisted
  return pd.Series()
7488.T: No timezone found, symbol may be delisted
  return pd.Series()
1832.T: No timezone found, symbol may be delisted
  return pd.Series()
8190.T

Time to check dividends: 7:00:08.646605
11288 Tickers to be screened.
Time to check Market Cap <= NCAV: 1:10:47.472452
308 Tickers to be screened.
Time to check Positive Free Cashflow: 0:00:53.956351
33 Tickers to be screened.
Time to check Net Debt: 0:00:11.200454
Time to check HQs': 0:00:03.343687
26 tickers remaining. 22631 stocks screened out
tickers removed: [['6525.T', '3697.T', '5838.T', '4385.T', '9552.T', '4194.T', '3994.T', '5032.T', '4180.T', '4443.T', '5253.T', '4478.T', '4587.T', '4480.T', '2160.T', '3993.T', '9166.T', '5842.T', '4485.T', '7157.T', '9519.T', '2931.T', '9348.T', '6027.T', '7342.T', '9158.T', '6740.T', '5139.T', '3479.T', '4384.T', '5595.T', '9164.T', '9416.T', '2980.T', '5027.T', '3491.T', '3694.T', '3182.T', '4413.T', '4449.T', '4431.T', '4375.T', '5574.T', '4371.T', '9424.T', '4475.T', '9338.T', '4259.T', '4592.T', '4053.T', '2998.T', '9522.T', '4477.T', '9726.T', '5337.T', '4169.T', '3914.T', '9942.T', '4599.T', '6223.T', '4419.T', '7047.T', '7373.T', '1

In [4]:
df = create_dataframe(driva)
df.to_excel('tv.xlsx', index= True)

In [22]:
import requests
import os
from dotenv import load_dotenv
load_dotenv()

key = os.environ['FMP_KEY']
def get_cashflow(ticker: str, span:int = 5) -> str:
    url = f'https://financialmodelingprep.com/api/v3/cash-flow-statement/{ticker}?period=annual&apikey={key}&limit={span}'
    response = requests.get(url)
    return response.json()

def get_five_year_fcf(ticker: str) -> list[str]:
    return [i['freeCashFlow'] for i in get_cashflow(ticker)]

def get_five_year_fcf_average(ticker: str) -> float:
    return sum(get_five_year_fcf(ticker))/5

def get_profile(ticker: str, span:int = 5) -> str:
    url = f'https://financialmodelingprep.com/api/v3/profile/{ticker}?apikey={key}'
    response = requests.get(url)
    return response.json()

def five_year_yield(ticker: str) -> float:
    return round((get_five_year_fcf_average(ticker) / get_profile(ticker)[0]['mktCap'])*100, 2)


In [24]:
driva

{'3254.T': {'Has Dividends': True,
  'Net Debt': True,
  'Positive Free Cashflow': True,
  'Market Cap <= NCAV': True,
  'HQ Country': 'Japan',
  'CIK': 'NONE'},
 '6804.T': {'Has Dividends': True,
  'Net Debt': True,
  'Positive Free Cashflow': True,
  'Market Cap <= NCAV': True,
  'HQ Country': 'Japan',
  'CIK': 'NONE'},
 '8084.T': {'Has Dividends': True,
  'Net Debt': True,
  'Positive Free Cashflow': True,
  'Market Cap <= NCAV': True,
  'HQ Country': 'Japan',
  'CIK': 'NONE'},
 '7229.T': {'Has Dividends': True,
  'Net Debt': True,
  'Positive Free Cashflow': True,
  'Market Cap <= NCAV': True,
  'HQ Country': 'Japan',
  'CIK': 'NONE'},
 '7266.T': {'Has Dividends': True,
  'Net Debt': True,
  'Positive Free Cashflow': True,
  'Market Cap <= NCAV': True,
  'HQ Country': 'Japan',
  'CIK': 'NONE'},
 '6393.T': {'Has Dividends': True,
  'Net Debt': True,
  'Positive Free Cashflow': True,
  'Market Cap <= NCAV': True,
  'HQ Country': 'Japan',
  'CIK': 'NONE'},
 'BUI.TO': {'Has Dividends':

In [23]:
drop = []
for k, v in driva.items():
    try:
        yie = five_year_yield(k)
        print(f'Five year average earnings yield for {k}: {yie}%')
    except:
        drop.append(k)
print(len(drop))

26


In [26]:
{i:'' for i in tv.keys()}

{'Japan': '',
 'Canada': '',
 'Austria': '',
 'Belgium': '',
 'Estonia': '',
 'France': '',
 'Germany': '',
 'Greece': '',
 'Hungary': '',
 'Italy': '',
 'Latvia': '',
 'Lithuania': '',
 'Netherlands': '',
 'Poland': '',
 'Portugal': '',
 'Romania': '',
 'Finland': '',
 'Spain': '',
 'Sweden': '',
 'Switzerland': '',
 'United Kingdom': '',
 'New Zealand': '',
 'Russia': '',
 'Czech Republic': '',
 'USA': ''}