### Using yfinance
1. [ X ] check each ticker for divendeds `has_dividends`
1. [ X ] filter out non-dividened stocks `has_dividends` > 0
1. [ X ] check each ticker (out of filtered list) for Positive TTM Free-cashflow `get_free_cashflow`
1. [ X ] filter out stocks with negative TTM Free-cashflow `get_free_cashflow` > 0
1. [ X ] check each ticker (out of filtered list) for Market Cap <= NCAV `calculate_ncav`
1. [ X ] filter out stocks without Market Cap <= NCAV `calculate_ncav` >= market cap
1. [ X ] check each ticker (out of filtered list) for net-debt `get_net_debt`
1. [ X ] filter out stocks without zero or negative net-debt `get_net_debt` <= 0 
1. [ ] check tickers HQ location
1. [ ] filter out non-whitelisted countries

### Remaining Calculations
1. [ ] Average 5Y annual Free Cash Flow yield at 10% or more
1. [ ] Get tickers full company name

In [1]:
import yfinance as yf
import pandas as pd
import json
from datetime import datetime
import os
from dotenv import load_dotenv

load_dotenv()
pd.set_option('display.float_format', lambda x: '%.3f' % x)

def read_json_file(file_path) ->list[str]:
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

def write_json_file(data, file_path):
    with open(file_path, 'w') as file:
        json.dump(data, file, indent=2)

def has_dividends(ticker: yf.Ticker) -> bool:
    return ticker.dividends.sum() > 0

def get_net_debt(ticker: yf.Ticker)->int:
    return ticker.quarterly_balance_sheet.loc['Net Debt'][0]

def get_total_liabilities(ticker: yf.Ticker)->int:
    try:
        return ticker.quarterly_balance_sheet.loc['Current Liabilities'][0]
    except:
        return -1

def get_total_debt(ticker: yf.Ticker)->int:
    try:
        return ticker.info['totalDebt']
    except:
        try:
            return ticker.quarterly_balance_sheet.loc["Total Debt"][0]
        except KeyError as k:
            return -1

def get_current_assets(ticker: yf.Ticker)->int:
    return ticker.quarterly_balance_sheet.loc['Total Assets'][0]

def get_free_cashflow(ticker: yf.Ticker)->int:
    return ticker.quarterly_cashflow.loc['Free Cash Flow'][0]

def get_net_debt(ticker: yf.Ticker) -> int:
    return ticker.quarterly_balance_sheet.loc['Net Debt'][0]

def calculate_ncav(ticker: yf.Ticker):
    '''
    Attempts two methods of calculating NCAV from yFinance API. If niether method works, returns -1. This would ensure the ticker would be removed from consideration.
    (1) uses `get_total_liabilities()` & `get_current_assets()` [both from `yf.Ticker.quarterly_balance_sheet` dataframe].
    (2) uses `get_total_debt()` & `get_current_assets()` [former uses `yf.Ticker().info`; ladder uses `yf.Ticker().quarterly_balance_sheet` dataframe].
    TODO: check that `get_total_liabilities` == `get_total_debt`.
    TODO: determine better method for innermost Exception handling.
    '''
    try:
        total_liabilities = get_total_liabilities(ticker)
        current_assets = get_current_assets(ticker)
        return current_assets-total_liabilities
    except KeyError or TypeError as k:
        return -1 # for now
        # try:
        #     debt = get_total_debt(ticker)
        #     current_assets = get_current_assets(ticker)
        #     return current_assets - debt
        # except KeyError or TypeError as ke:
        #     return -1 # could use another function to find values elsewhere.
            
def get_country(ticker: yf.Ticker) -> str:
    return ticker.info['country']

def is_whitelist_nation(country: str) -> bool:
    countries = ['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic',
    'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary',
    'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands',
    'Poland', 'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden',
    'USA', 'United States', 'Canada', 'United Kingdom', 'UK', 'Japan', 'Australia']
    return country in countries

def convert_cik_list(file_path: str) -> dict:
    result_dict = {}
    
    with open(file_path, 'r') as file:
        for line in file:
            parts = line.strip().split(':')
            if len(parts) == 2:
                ticker = parts[0].strip().upper()
                value = int(parts[1].strip())
                result_dict[ticker] = value
    
    return result_dict

def create_dataframe(data_dict: dict) -> pd.DataFrame:
    df = pd.DataFrame.from_dict(data_dict, orient='index')
    return df

def driver(data_arr: list[str], debug: bool = False) -> dict[str:dict]:
    start_time = datetime.now()
    ret_dict = {i:{"Has Dividends": bool, "Net Debt": bool, "Positive Free Cashflow": bool, "Market Cap <= NCAV": bool, "HQ Country": str} for i in data_arr}
    removal_matrix = [[],[],[],[], []]
    if debug:
        print(f"{len(ret_dict)} Tickers to be screened.")
    
    for key, val in ret_dict.items(): # check "Has Dividends"
        try:
            ticker = yf.Ticker(key)
            ticker_has_dividends = has_dividends(ticker)
            if ticker_has_dividends:
               val["Has Dividends"] = True
            else:
                removal_matrix[0].append(key)
        except KeyError or TypeError as k:
            removal_matrix[0].append(key) 
    
    print(f"Time to check dividends: {datetime.now() - start_time}") # average time to check dividends: 0:20:24 – 0:21:55 (hh:mm:ss).

    for i in removal_matrix[0]: # remove Ticker symbols that don't pay dividends
        ret_dict.pop(i)

    if debug:
        print(f"{len(ret_dict)} Tickers to be screened.")
    start_market_cap = datetime.now()
    
    for key, val in ret_dict.items(): # check "Market Cap <= NCAV"
        try:
            ticker = yf.Ticker(key)
            market_cap = ticker.info['marketCap']
            is_positive_ncav = market_cap <= calculate_ncav(ticker)
            if is_positive_ncav:
                val["Market Cap <= NCAV"] = True
            else:
                removal_matrix[1].append(key) # remove tickers with Market Cap > NCAV
        except KeyError as k:
            removal_matrix[1].append(key) 
    print(f"Time to check Market Cap <= NCAV: {datetime.now() - start_market_cap}")

    for i in removal_matrix[1]: # remove tickers with Market Cap > NCAV
        ret_dict.pop(i)

    if debug:
        print(f"{len(ret_dict)} Tickers to be screened.")
    start_ttm = datetime.now()
    
    for key, val in ret_dict.items(): # check "Positive Free Cashflow"
        try:
            ticker = yf.Ticker(key)
            has_positive_ttm = get_free_cashflow(ticker) > 0
            if has_positive_ttm:
                val["Positive Free Cashflow"] = True
            else:
                removal_matrix[2].append(key) 
        except KeyError as k:
            removal_matrix[2].append(key) 
    print(f"Time to check Positive Free Cashflow: {datetime.now() - start_ttm}")
    
    for i in removal_matrix[2]: # remove tickers with negative TTM (i.e., Positive Free Cashflow)
        ret_dict.pop(i)

    if debug:
        print(f"{len(ret_dict)} Tickers to be screened.")
    start_net_debt = datetime.now()

    for key, val in ret_dict.items(): # check "Net Debt".
        try:
            ticker = yf.Ticker(key)
            has_net_debt = get_net_debt(ticker) > 0
            if has_net_debt:
                val["Net Debt"] = True
            else:
                removal_matrix[3].append(key)
        except:
            try:
                has_net_debt = get_total_debt(ticker)
                if has_net_debt:
                    val["Net Debt"] = True
                else:
                    removal_matrix[3].append(key)
            except:
                removal_matrix[3].append(key)
    print(f"Time to check Net Debt: {datetime.now() - start_net_debt}")

    for i in removal_matrix[3]: # remove tickers without Positive "Net Debt".
        ret_dict.pop(i)
    adding_hq = datetime.now()
    
    for key, val in ret_dict.items(): # assuming all tickers have a country.
        ticker = yf.Ticker(key)
        country = get_country(ticker)
        if is_whitelist_nation(country): # remove non-whitelisted tickers
            val["HQ Country"] = country
        else:
            removal_matrix[4] = key
    print(f"Time to check HQs': {datetime.now() - adding_hq}")

    for i in removal_matrix[4]:
        if len(i) != 1:
            ret_dict.pop(i)

    cik_dict = convert_cik_list('./cik.txt')
    for key, val in ret_dict.items():
        val["CIK"] = cik_dict.get(key, "NONE")
    
    print(f"{len(ret_dict)} tickers remaining. {len(removal_matrix[0])+len(removal_matrix[1])+len(removal_matrix[2])+len(removal_matrix[3])} stocks screened out")
    print(f"tickers removed: {removal_matrix}")
    
    return ret_dict


data = read_json_file('./tickers.json')
data2 = read_json_file('./new_tickers.json')
cik_dict = convert_cik_list('./cik.txt')

In [None]:
market_caps = [0] * len(data)
tickers_to_remove = []

for i in range(len(data)-1):
    try:
        t = yf.Ticker(data[i])
        market_caps[i] = t.info['marketCap']
    except KeyError as k:
        print(f"Key error occurred when looking for market cap for ticker: {data[i]}")
        tickers_to_remove.append(data[i])


In [None]:
new_data = [i for i in data if i not in tickers_to_remove]
len(new_data)

with open("new_tickers.json", 'w') as file:
    json.dump(data, file, indent=2)

In [4]:
d = {i:bool for i in data2}
missing = []

for i in data2:
    try:
        t = yf.Ticker(i)
        ncav = calculate_ncav(t)
        market_cap = t.info['marketCap']
        d[i] = market_cap <= ncav
    except KeyError as ke:
        missing.append(i)

In [None]:
dd = {i:bool for i in missing}
still_missing = []
for i in missing:
    try:
        t = yf.Ticker(i)
        debt = get_total_debt(t)
        current_assets = get_current_assets(t)
        market_cap = t.info['marketCap']
        dd[i] = market_cap <= (current_assets - debt)
    except KeyError as k:
        still_missing.append(i)
len(still_missing)

In [None]:
fin = [i for i in data2 if i not in still_missing]
has_dividends_dict = {i:bool for i in fin}
for i in fin:
    try:
        t = yf.Ticker(i)
        has_dividends_dict[i] = has_dividends(t)
    except KeyError as k:
        print(f'error with {i}')

In [48]:
no_ttm = []
ttm = {i:0 for i in data2}

for i in data2:
    try:
        t = yf.Ticker(i)
        ttm[i] = get_free_cashflow(t)
    except KeyError as k:
        print(f'error with {i}')
        no_ttm.append(i)
print(len(no_ttm))

error with SMFG
error with DB
error with HGAS
error with IGIC
error with EVAX
error with LILM
error with SAI
error with APLM
error with TYG
error with CANF
error with DRD
error with IREN
error with PRZO
error with PHG
error with WAVE
error with AVAL
error with ING
error with WDS
error with IMOS
error with GRFS
error with BLTE
error with IHG
error with TURN
error with GFI
error with BAP
error with CREV
error with VCIG
error with BTDR
error with SNT
error with CACO
error with RIO
error with IVA
error with NCNC
error with CIB
error with WRNT
error with ORAN
error with MOB
error with SBSW
error with CAMT
error with SNN
error with MTEK
error with MTR
error with ADD
error with NLSP
error with VOC
error with ARVL
error with VEON
error with MSDL
error with SHLT
error with ARQQ
error with LGHL
error with MFG
error with NVNI
error with NBTX
error with GNFT
error with DEC
error with GYRO
error with OKYO
error with APM
error with SKIL
error with ALAR
error with CZOO
error with CYTO
error with BUD


In [2]:
partially_screened = driver(data2, debug= True)

5266 Tickers to be screened.


In [None]:
df = create_dataframe(partially_screened)
df

Unnamed: 0,Has Dividends,Net Debt,Positive Free Cashflow,Market Cap <= NCAV,HQ Country,CIK
ARI,True,True,True,True,United States,1467760
PROV,True,True,True,True,United States,1010470
PBF,True,True,True,True,United States,1534504
CCU,True,True,True,True,<class 'str'>,888746
RC,True,True,True,True,United States,1527590
...,...,...,...,...,...,...
CLMT,True,True,True,True,United States,1340122
BRX,True,True,True,True,United States,1581068
FNB,True,True,True,True,United States,37808
ARKO,True,True,True,True,United States,1823794


In [None]:
df.to_excel("full.xlsx", index=True)

In [None]:
ind = df.index.to_list()
partially_screened_sum = len(ind)
partially_screened_sum
partially_screened_also_in_cik = [i for i in ind if i in cik_dict.keys()]
len(partially_screened_also_in_cik)

['ARI',
 'PROV',
 'PBF',
 'CCU',
 'RC',
 'FAF',
 'MKSI',
 'BCBP',
 'CZNC',
 'SAFE',
 'QRTEA',
 'AEE',
 'ARTW',
 'BSET',
 'FL',
 'FBIZ',
 'HIW',
 'MLKN',
 'SF',
 'ADNT',
 'HFBL',
 'NTST',
 'CCAP',
 'MTG',
 'QUAD',
 'KBH',
 'CB',
 'FNCB',
 'DLX',
 'NWFL',
 'NMRK',
 'ASH',
 'PFE',
 'BSRR',
 'UMH',
 'CNK',
 'TCPC',
 'AIRC',
 'EPC',
 'ERIC',
 'VOYA',
 'ARMK',
 'TX',
 'SOI',
 'HBNC',
 'CNOB',
 'WRB',
 'THC',
 'ACCO',
 'ONL',
 'VALE',
 'DOW',
 'AXS',
 'TG',
 'TRV',
 'SEM',
 'FDBC',
 'BAFN',
 'NICK',
 'BP',
 'FMBH',
 'OLN',
 'TSLX',
 'STC',
 'OBE',
 'OCSL',
 'GOOD',
 'AMAL',
 'WRK',
 'LAD',
 'GPS',
 'CMCT',
 'LOCO',
 'NS',
 'ESNT',
 'FLIC',
 'SEE',
 'IVR',
 'EWBC',
 'NHI',
 'FFIN',
 'LUMN',
 'WMB',
 'GECC',
 'JEF',
 'CVE',
 'VICI',
 'MATV',
 'RWAY',
 'UVV',
 'SQFT',
 'B',
 'KARO',
 'FSBC',
 'ACNB',
 'CIO',
 'BHR',
 'OXY',
 'LDI',
 'WPC',
 'CZFS',
 'GAIA',
 'STGW',
 'NEP',
 'SKM',
 'DFS',
 'PLBC',
 'KALU',
 'BNL',
 'PSEC',
 'BDN',
 'AC',
 'LRFC',
 'BFH',
 'CRESY',
 'CPF',
 'DK',
 'LADR',
 'BOOM