In [16]:
# Importing necessary libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [17]:
# List of tickers whose financial information needs to be extracted
tickers = ["MMM","AXP","AAPL","BA","CAT","CVX","CSCO","KO","DIS","DWDP",
           "XOM","GE","GS","HD","IBM","INTC","JNJ","JPM","MCD","MRK",
           "MSFT","NKE","PFE","PG","TRV","UTX","UNH","VZ","V","WMT"]

In [63]:
financial_dir = {} # directory to store financial information for each ticker

In [64]:
for ticker in tickers:
    try:
        print("scraping financial statement data for ", ticker)
        #getting balance sheet statement data
        url = "https://stockrow.com/api/companies/{}/financials.xlsx?dimension=A&section=Balance%20Sheet&sort=desc".format(ticker)
        df1 = pd.read_excel(url)
        # getting income statement data
        url = "https://stockrow.com/api/companies/{}/financials.xlsx?dimension=A&section=Income%20Statement&sort=desc".format(ticker)
        df2 = pd.read_excel(url)        
        # getting cashflow statement data
        url = "https://stockrow.com/api/companies/{}/financials.xlsx?dimension=A&section=Cash%20Flow&sort=desc".format(ticker)
        df3 = pd.read_excel(url)

        #combining all extracted information with the corresponding ticker
        df = pd.concat([df1, df2, df3])
        columns = df.columns.values
        for i in range(len(columns)):
            if columns[i] == "Unnamed: 0":
                columns[i] = "heading"
            else:
                columns[i] = columns[i].strftime("%Y-%m-%d")
        df.columns = columns
        df.set_index("heading", inplace=True)      
        financial_dir[ticker] = df
    except Exception as e:
        print(ticker, ":", e)

scraping financial statement data for  MMM
scraping financial statement data for  AXP
scraping financial statement data for  AAPL
scraping financial statement data for  BA
scraping financial statement data for  CAT
scraping financial statement data for  CVX
scraping financial statement data for  CSCO
scraping financial statement data for  KO
scraping financial statement data for  DIS
scraping financial statement data for  DWDP
DWDP : HTTP Error 404: Not Found
scraping financial statement data for  XOM
scraping financial statement data for  GE
scraping financial statement data for  GS
scraping financial statement data for  HD
scraping financial statement data for  IBM
scraping financial statement data for  INTC
scraping financial statement data for  JNJ
scraping financial statement data for  JPM
scraping financial statement data for  MCD
scraping financial statement data for  MRK
scraping financial statement data for  MSFT
scraping financial statement data for  NKE
scraping financial st

In [71]:
# Creating dataframe with relevant financial information for each stock using fundamental data
stats = ["Net Income Common",
         "Total Assets",
         "Operating Cash Flow",
         "Long Term Debt (Total)",
         "Total non-current liabilities",
         "Total current assets",
         "Total current liabilities",
         "Common Equity (Total)",
         "Revenue",
         "Gross Profit"] # change as required

indx = ["NetIncome","TotAssets","CashFlowOps","LTDebt","TotLTLiab","CurrAssets","CurrLiab","CommStock","TotRevenue","GrossProfit"]

def info_filter(df, stats, indx, lookback):
    """ function to filter relevant financial information 
    df = dataframe to be filtered
    stats = headings to filter
    indx = rename long headings
    lookback = number of years of data to be retained"""
    for stat in stats:
        if stat not in df.index:
            return
    df_new = df.loc[stats,df.columns[:lookback]]
    df_new.rename(dict(zip(stats, indx)), inplace=True)
    df_new.loc["OtherLTDebt", :] = df_new.loc["TotLTLiab",:] - df_new.loc["LTDebt", :]
    return df_new

In [72]:
# applying filtering to the financials
transformed_df = {}
for ticker in financial_dir:
    transformed_df[ticker] = info_filter(financial_dir[ticker], stats, indx, 3)

In [86]:
# Application of Piotroski F Score 
def piotroski_f(df_dict):
    # function to calculate f score of each stock and output information as dataframe
    f_score = {}
    for ticker in df_dict:
        columns = df_dict[ticker].columns
        
        ROA_FS = int(df_dict[ticker].loc["NetIncome", columns[0]]/((df_dict[ticker].loc["TotAssets", columns[0]] + df_dict[ticker].loc["TotAssets", columns[1]])/2) > 0)
        
        CFO_FS = int(df_dict[ticker].loc["CashFlowOps", columns[0]] > 0)
        
        ROA_D_FS = int((df_dict[ticker].loc["NetIncome", columns[0]]/((df_dict[ticker].loc["TotAssets", columns[0]] + df_dict[ticker].loc["TotAssets", columns[1]])/2)) >
                    (df_dict[ticker].loc["NetIncome", columns[1]]/((df_dict[ticker].loc["TotAssets", columns[1]] + df_dict[ticker].loc["TotAssets", columns[2]])/2)))
        
        CFO_ROA_FS = int(df_dict[ticker].loc["CashFlowOps", columns[0]]/df_dict[ticker].loc["TotAssets", columns[0]] >
                    (df_dict[ticker].loc["NetIncome", columns[0]]/(df_dict[ticker].loc["TotAssets", columns[0]] + df_dict[ticker].loc["TotAssets", columns[1]])/2))
        
        LTD_FS = int((df_dict[ticker].loc["LTDebt", columns[0]] + df_dict[ticker].loc["OtherLTDebt", columns[0]]) < (df_dict[ticker].loc["LTDebt", columns[1]] + df_dict[ticker].loc["OtherLTDebt", columns[1]]))
        
        CR_FS = int(df_dict[ticker].loc["CurrAssets", columns[0]] / df_dict[ticker].loc["CurrLiab", columns[0]] > df_dict[ticker].loc["CurrAssets", columns[1]] / df_dict[ticker].loc["CurrLiab", columns[1]])

        DILUTION_FS = int(df_dict[ticker].loc["CommStock", columns[0]] <= df_dict[ticker].loc["CommStock", columns[1]])

        GM_FS = int(df_dict[ticker].loc["GrossProfit", columns[0]] / df_dict[ticker].loc["TotRevenue", columns[0]] > df_dict[ticker].loc["GrossProfit", columns[1]] / df_dict[ticker].loc["TotRevenue", columns[1]])

        ATO_FS = int(df_dict[ticker].loc["TotRevenue", columns[0]]/((df_dict[ticker].loc["TotAssets", columns[0]]+df_dict[ticker].loc["TotAssets", columns[1]])/2) > 
                    df_dict[ticker].loc["TotRevenue", columns[1]] / ((df_dict[ticker].loc["TotAssets", columns[1]] + df_dict[ticker].loc["TotAssets", columns[2]])/2))

        f_score[ticker] = [ROA_FS, CFO_FS, ROA_D_FS, CFO_ROA_FS, LTD_FS, CR_FS, DILUTION_FS, GM_FS, ATO_FS]

    f_score_df = pd.DataFrame(f_score, index=["PosROA", "PosCFO", "ROAChange", "Accruals", "Leverage", "Liquidity", "Dilution", "GM", "ATO"])

    return f_score_df

In [87]:
# sorting stocks with highest Piotroski f score to lowest
f_score_df = piotroski_f(transformed_df)
f_score_df.sum().sort_values(ascending=False)

AXP     9
XOM     8
JNJ     8
DIS     7
PFE     7
IBM     7
TRV     7
MRK     7
MCD     7
KO      7
CSCO    7
CVX     7
CAT     7
V       7
AAPL    7
UNH     6
PG      6
MMM     6
WMT     6
GS      6
MSFT    5
NKE     5
JPM     5
HD      5
GE      5
INTC    5
VZ      4
BA      4
dtype: int64

TypeError: ignored