# Value Investing Indicators from SEC Filings

### Data Source: https://www.sec.gov/dera/data/financial-statement-data-sets.html



In [1]:
import pandas as pd
import os
import shutil
import glob
import sys
import warnings
import functools
from functools import reduce
warnings.simplefilter("ignore")

os.chdir("..")

dir_root = os.getcwd()


In [2]:
dir_raw = dir_root + u"/sec_filings/Raw"

cik_lookup = pd.read_csv(dir_root + u"/sec_filings/cik_ticker.csv", usecols=['CIK', 'Ticker'], sep="|")


In [3]:
num_tags = ["PreferredStockValue", "AssetsCurrent", "Liabilities", "EarningsPerShareBasic", "CommonStockSharesOutstanding", "LiabilitiesCurrent", "EarningsPerShareBasic", "SharePrice", "StockholdersEquity", "PreferredStockValue", "CommonStockSharesOutstanding", "NetIncomeLoss", "GrossProfit", "SalesRevenueNet","StockRepurchasedAndRetiredDuringPeriodShares"]

num_cols = ['adsh', 'tag', 'version', 'uom', 'ddate', 'qtrs', 'value']

sub_cols = ['adsh', 'ein', 'cik', 'sic', 'name', 'period', 'fye', 'form','filed','fp']



files_num = sorted(glob.glob(dir_raw + u'/*_num.txt'))

files_sub = sorted(glob.glob(dir_raw + u'/*_sub.txt'))

files_pre = sorted(glob.glob(dir_raw + u'/*_pre.txt'))

sec_df = pd.DataFrame([])

for num, sub, pre in zip(files_num, files_sub, files_pre):
    df_num = pd.read_csv(num, sep='\t', dtype=str, encoding = "ISO-8859-1")
    df_sub = pd.read_csv(sub, sep='\t', dtype=str, encoding = "ISO-8859-1")
    df_pre = pd.read_csv(pre, sep='\t', dtype=str, encoding = "ISO-8859-1")
    df_sub = df_sub[sub_cols]
    df_num = df_num[num_cols]
    df_num = df_num[df_num['tag'].isin(num_tags)]
    df_pre = df_pre.merge(df_num, on=['adsh', 'tag', 'version'], sort=True)
    sec_merge = df_sub.merge(df_pre, on='adsh', how="inner", sort=True)
    sec_df = sec_df.append(sec_merge)


In [None]:
sec_df.columns.values

In [None]:
# cik_lookup.columns = cik_lookup.columns.str.lower()

# sec_df['ticker'] = sec_df['cik'].map(cik_lookup.set_index('cik')['ticker'].to_dict())

# sec_df['cik'] = sec_df['cik'].astype(str)

# cik_lookup['cik'] = cik_lookup['cik'].astype(str)

import os 

path = os.getcwd()

sec_df.to_csv(path + r'/test.csv', index=False)


In [None]:
sec_df = sec_df[['adsh', 'uom', 'name', 'cik', 'tag', 'fp', 'value']]

PreferredStockValue = sec_df[sec_df["tag"] == "PreferredStockValue"].drop(columns=['tag']).rename(columns={"value": "PreferredStockValue"})
AssetsCurrent = sec_df[sec_df["tag"] == "AssetsCurrent"].drop(columns=['tag']).rename(columns={'value': 'AssetsCurrent'})
Liabilities = sec_df[sec_df["tag"] == "Liabilities"].drop(columns=['tag']).rename(columns={'value': 'Liabilities'})
EarningsPerShareBasic = sec_df[sec_df["tag"] == "EarningsPerShareBasic"].drop(columns=['tag']).rename(columns={'value': 'EarningsPerShareBasic'})
CommonStockSharesOutstanding = sec_df[sec_df["tag"] == "CommonStockSharesOutstanding"].drop(columns=['tag']).rename(columns={'value': 'CommonStockSharesOutstanding'})
LiabilitiesCurrent = sec_df[sec_df["tag"] == "LiabilitiesCurrent"].drop(columns=['tag']).rename(columns={'value': 'LiabilitiesCurrent'})
EarningsPerShareBasic = sec_df[sec_df["tag"] == "EarningsPerShareBasic"].drop(columns=['tag']).rename(columns={'value': 'EarningsPerShareBasic'})
SharePrice = sec_df[sec_df["tag"] == "SharePrice"].drop(columns=['tag']).rename(columns={'value': 'SharePrice'})
StockholdersEquity = sec_df[sec_df["tag"] == "StockholdersEquity"].drop(columns=['tag']).rename(columns={'value': 'StockholdersEquity'})
PreferredStockValue = sec_df[sec_df["tag"] == "PreferredStockValue"].drop(columns=['tag']).rename(columns={'value': 'PreferredStockValue'})
CommonStockSharesOutstanding = sec_df[sec_df["tag"] == "CommonStockSharesOutstanding"].drop(columns=['tag']).rename(columns={'value': 'CommonStockSharesOutstanding'})
NetIncomeLoss = sec_df[sec_df["tag"] == "NetIncomeLoss"].drop(columns=['tag']).rename(columns={'value': 'NetIncomeLoss'})
GrossProfit = sec_df[sec_df["tag"] == "GrossProfit"].drop(columns=['tag']).rename(columns={'value': 'GrossProfit'})
SalesRevenueNet = sec_df[sec_df["tag"] == "SalesRevenueNet"].drop(columns=['tag']).rename(columns={'value': 'SalesRevenueNet'})
StockRepurchasedAndRetiredDuringPeriodShares = sec_df[sec_df["tag"] == "StockRepurchasedAndRetiredDuringPeriodShares"].drop(columns=['tag']).rename(columns={'value': 'StockRepurchasedAndRetiredDuringPeriodShares'})

print(StockRepurchasedAndRetiredDuringPeriodShares)

dfList = [sec_df, PreferredStockValue, AssetsCurrent, Liabilities, EarningsPerShareBasic, 
CommonStockSharesOutstanding, LiabilitiesCurrent, EarningsPerShareBasic, 
SharePrice, StockholdersEquity, PreferredStockValue, CommonStockSharesOutstanding, 
NetIncomeLoss, GrossProfit, SalesRevenueNet,StockRepurchasedAndRetiredDuringPeriodShares]

sec_curated = reduce(lambda df1,df2: df1.merge(df2,on=['adsh', 'uom', 'name', 'cik', 'fp']), dfList)


# Benjamin Graham 

## Formulas:

* NCAVPS = CurrentAssets - (Total Liabilities + Preferred Stock) ÷ Shares Outstanding
    * Less than 1.10 

* Debt to Assets = Current Assets / Current Liabilities
    * Greater than 1.50

* Price / Earnings per Share ratio 
    * Less than 9.0

* PRICE TO BOOK VALUE = (P/BV) 
    * Where BV = (Total Shareholder Equity−Preferred Stock)/ Total Outstanding Shares
    * Less than 1.20. P/E ratios

## References:

Benjamin Graham rules: https://cabotwealth.com/daily/value-investing/benjamin-grahams-value-stock-criteria/ 

Benjamin Graham rules Modified: https://www.netnethunter.com/16-benjamin-graham-rules/ 



In [None]:

sec_df['NCAVPS'] = sec_df['AssetsCurrent'] - (sec_df['Liabilities'] + sec_df[
'PreferredStockValue']) / sec_df['CommonStockSharesOutstanding']

sec_df['DebtToAssets'] = sec_df['AssetsCurrent'] / sec_df['LiabilitiesCurrent']

sec_df['PE'] = sec_df['SharePrice'] / sec_df['EarningsPerShareBasic'] 

sec_df['PBV'] = sec_df['SharePrice'] / ((sec_df['StockholdersEquity'] - sec_df['PreferredStockValue']) / sec_df['CommonStockSharesOutstanding'])


# Warren Buffet Rules 

## Formulas

* Debt/Equity= Total Liabilities / Total Shareholders’ Equity 
    * Less than 1 and ROE is greater than 10%

* Return on Earnings = (Net Income / Stock Holders Equity)
    * Is Positive

* Gross Profit Margin = Gross Profit / Revenue 
    * Greater than 40% 
​
* Quarter over Quarter EPS 
    * Greater than 10

* Stock Buybacks
    * Greater than last period

## References: 
https://www.oldschoolvalue.com/tutorial/this-is-how-buffett-interprets-financial-statements/

In [None]:

sec_df['DebtEquity'] = sec_df['Liabilities'] / sec_df['StockholdersEquity']

sec_df['ReturnEarnings'] = sec_df['NetIncomeLoss'] / sec_df['StockholdersEquity']

sec_df["GrossProfitMargin"] = sec_df['GrossProfit'] / sec_df['SalesRevenueNet']

sec_df["EPS"] = sec_df["EarningsPerShareBasic"]

sec_df["StockBuybacks"] = sec_df["StockRepurchasedAndRetiredDuringPeriodShares"]