In [1]:
# setup
import pandas as pd
import numpy as np
from os import listdir
from os.path import isfile, join
from CalcBenchHandler import CalcBenchHandler as CBH

%load_ext autoreload
%autoreload 2

In [63]:
# pull list of relevant calcbench companies
dfcorps = pd.read_csv('data/ciks/calcbench_companies.csv')

# get list of tickers with available financials
ticker_dir = 'data/financials'
tickers = [f.split('.csv')[0] for f in listdir(ticker_dir) if isfile(join(ticker_dir, f))]

# pull financials for test ticker
ticker = tickers[0]
ticker_path = join(ticker_dir, f'{ticker}.csv')
df_base = pd.read_csv(ticker_path)

In [64]:
# extract raw financials
dffin = df_base.copy()

# clean earnings_release_date
dffin.earnings_release_date = pd.to_datetime(dffin.earnings_release_date, errors='coerce')
dffin = dffin.dropna(subset=['earnings_release_date'])
dffin = dffin.sort_values(by='earnings_release_date', ascending=False)

# fill na income statement fields with with 0
cbh = CBH()
fs_cols = cbh.INS+cbh.CFS+cbh.BS
dffin = dffin.fillna(value={k: 0 for k in fs_cols})

# build instance cols
# exclude unit based items like shares outstanding
share_cols = [
    'sharesoutstandingendofperiod',
    'avgsharesoutstandingbasic',
    'avgdilutedsharesoutstanding',
    'stockrepurchasedduringperiodshares',
    'commonstockdividendspershare'
]

# tmp col storage; not currently used
debt_cols = [
    'currentlongtermdebt', 'longtermdebt',
    'totaldebt', 'lineofcreditfacilityamountoutstanding',
    'secureddebt', 'seniornotes', 'subordinateddebt',
    'convertibledebt', 'termloan', 'mortgagedebt',
    'unsecureddebt', 'mediumtermnotes',
    'trustpreferredsecurities'
]

ins_cols = [c for c in cbh.INS if c not in share_cols]
cfs_cols = [c for c in cbh.CFS if c not in share_cols]
bs_cols = [c for c in cbh.BS if c not in share_cols]

# yoy chg
ins_chg_cols = [f'{c}_yoy_chg' for c in ins_cols]
cfs_chg_cols = [f'{c}_yoy_chg' for c in cfs_cols]
bs_chg_cols = [f'{c}_yoy_chg' for c in bs_cols]

base_cols = ins_cols+cfs_cols+bs_cols
chg_cols = ins_chg_cols+cfs_chg_cols+bs_chg_cols
out_cols = base_cols+chg_cols

dffin[chg_cols] = dffin[base_cols]-dffin[base_cols].shift(4)
dffin = dffin.dropna()

# check all fields have values
assert dffin.isna().sum().sum() == 0, f'{ticker} has na fields!'

dffin.head(10)

	save_dir: None
	verbose: True


Unnamed: 0,earnings_release_date,filing_date,period,period_start,period_end,revenueadjusted,grossprofit,sgaexpense,researchanddevelopment,operatingexpenses,...,lineofcreditfacilityamountoutstanding_yoy_chg,secureddebt_yoy_chg,seniornotes_yoy_chg,subordinateddebt_yoy_chg,convertibledebt_yoy_chg,termloan_yoy_chg,mortgagedebt_yoy_chg,unsecureddebt_yoy_chg,mediumtermnotes_yoy_chg,trustpreferredsecurities_yoy_chg
38,2018-11-19,2018-12-20,2018Q3,2018-07-31,2018-10-31,1294000000.0,715000000.0,356000000.0,104000000.0,460000000.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
37,2018-08-14,2018-08-30,2018Q2,2018-05-01,2018-07-31,1203000000.0,659000000.0,341000000.0,97000000.0,438000000.0,...,0.0,0.0,497000000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
36,2018-05-14,2018-05-31,2018Q1,2018-02-01,2018-04-30,1206000000.0,643000000.0,341000000.0,92000000.0,433000000.0,...,0.0,0.0,-2000000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
35,2018-02-14,2018-03-06,2017Q4,2017-11-01,2018-01-31,1211000000.0,670000000.0,347000000.0,94000000.0,441000000.0,...,0.0,0.0,-2000000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
34,2017-11-20,2017-12-21,2017Q3,2017-07-31,2017-10-31,1189000000.0,647000000.0,325000000.0,89000000.0,414000000.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
33,2017-08-15,2017-09-06,2017Q2,2017-05-01,2017-07-31,1114000000.0,596000000.0,308000000.0,87000000.0,395000000.0,...,0.0,0.0,-2000000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
32,2017-05-22,2017-06-06,2017Q1,2017-02-01,2017-04-30,1102000000.0,592000000.0,307000000.0,84000000.0,391000000.0,...,0.0,0.0,-400000000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
31,2017-02-14,2017-03-08,2016Q4,2016-11-01,2017-01-31,1067000000.0,574000000.0,289000000.0,79000000.0,368000000.0,...,0.0,0.0,-400000000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
30,2016-11-15,2016-12-20,2016Q3,2016-07-31,2016-10-31,1111000000.0,588000000.0,321000000.0,84000000.0,405000000.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
29,2016-08-17,2016-09-07,2016Q2,2016-05-01,2016-07-31,1044000000.0,542000000.0,310000000.0,86000000.0,396000000.0,...,235000000.0,0.0,-1091000000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [65]:
# load bond-equity links and bond trades
link_path = 'data/ciks/bonds_to_equities_link.csv'
bond_px_path = 'data/bonds/clean_bond_close_pxs.csv'

dflink = pd.read_csv(link_path)
dfpxs = pd.read_csv(bond_px_path)

In [66]:
# pull bond prices for test ticker
# bond prices must have a transaction date
dfpxs.trans_dt = pd.to_datetime(dfpxs.trans_dt, errors='coerce')
dfpxs = dfpxs.dropna(subset=['trans_dt'])

# drop duplicate symbol/equity_cusip records
dfdupes = dflink.groupby(['SYMBOL', 'EQUITY_CUSIP']).count()
sym_counts = dfdupes.index.get_level_values(0).value_counts()
sym_dupes = sym_counts[sym_counts>1]
dfdupes = dfdupes.reset_index()
ser_eqy_cusip = dfdupes[~dfdupes.SYMBOL.isin(sym_dupes.index.values)].EQUITY_CUSIP
dflink = dflink[dflink.EQUITY_CUSIP.isin(ser_eqy_cusip)]

# get links to bond cusips for test ticker
df_tick_links = dflink[dflink.SYMBOL == ticker]

# get pxs for ticker bond cusips
dftxs = dfpxs[dfpxs.cusip_id.isin(df_tick_links.cusip_id)]
dftxs = dftxs.sort_values(by='trans_dt', ascending=False)
dftxs.head()

Unnamed: 0,trans_dt,trd_rpt_efctv_dt,mtrty_dt,cusip_id,bond_sym_id,company_symbol,issuer_nm,debt_type_cd,scrty_ds,cpn_rt,close_pr,close_yld
4437,2019-12-31,2019-09-05,2029-09-15,00846UAL5,A4881169,A,AGILENT TECHNOLOGIES INC,S-NT,Senior Unsecured Note,2.75,99.601,2.796985
4374,2019-12-31,2016-09-15,2026-09-22,00846UAK7,A4404722,A,AGILENT TECHNOLOGIES INC,S-NT,Senior Unsecured Note,3.05,102.339,2.654079
4129,2019-12-31,2013-06-18,2023-07-15,00846UAJ0,A4020252,A,AGILENT TECHNOLOGIES INC,S-NT,Senior Unsecured Note,3.875,105.5099,2.12916
4128,2019-12-30,2013-06-18,2023-07-15,00846UAJ0,A4020252,A,AGILENT TECHNOLOGIES INC,S-NT,Senior Unsecured Note,3.875,105.653,2.086651
3149,2019-12-30,2012-09-10,2022-10-01,00846UAH4,A3900782,A,AGILENT TECHNOLOGIES INC,S-NT,Senior Unsecured Note,3.2,102.976857,1.972428


In [206]:
# EV calcs
# pull equity data for test ticker transactions
eqy_px_path = f'data/equities/{ticker}.csv'
df_eqypxs = pd.read_csv(eqy_px_path)
df_eqypxs = df_eqypxs.rename(columns={'Adj Close': 'AdjClose'})

# fetch prices for relevant transaction dates
df_tx_matches = pd.merge(left=dftxs.set_index('trans_dt'),
                         right=df_eqypxs.set_index('Date'),
                         how='inner', left_index=True,
                         right_index=True)
df_tx_matches.index.name = 'trans_dt'
df_tx_matches = df_tx_matches.drop_duplicates()

# ev components
adds = [
    'currentlongtermdebt', 'longtermdebt',
    'restrictedcashandinvestmentscurrent',
    'trustpreferredsecurities'
]
subs = [
    'cash', 'availableforsalesecurities',
    'totalinvestments'
]
tx_col_desc = [
    'mtrty_dt', 'bond_sym_id',
    'company_symbol', 'issuer_nm', 'debt_type_cd',
    'scrty_ds', 'cpn_rt', 'close_pr', 'close_yld'
]

pcnt = 8
cols_flat = [f'{c}_{i}' for c in out_cols for i in range(pcnt)]

df_tx_in = df_tx_matches.copy()
df_tx_in.index.name = 'trans_dt'

mi = pd.MultiIndex.from_tuples(zip(df_tx_in.cusip_id, df_tx_in.index),
                               names=['cusip_id', 'trans_dt'])
df_flat = pd.DataFrame(columns=tx_col_desc+cols_flat, index=mi)
df_flat[tx_col_desc] = df_tx_in[tx_col_desc].values

for trans_dt, tx in df_tx_in.iterrows():
    # find most recent period
    df_2ltm = dffin[dffin.earnings_release_date <= trans_dt].head(pcnt)
    if df_2ltm.shape[0] == pcnt:
        df_2ltm = df_2ltm.reset_index()
        per = df_2ltm.iloc[0]
        mkt_cap = tx.AdjClose*per.avgdilutedsharesoutstanding
        ev = per[adds].sum()-per[subs].sum()+mkt_cap
        df_2ltm[out_cols] = per[out_cols].values/ev
        
        # stack rows into single column
        for i, row in df_2ltm.iterrows():
            for key in out_cols:
                df_flat.loc[(tx.cusip_id, trans_dt), f'{key}_{i}'] = row[key]

# save flattened transactions
tx_out_path = f'data/bonds/transactions/{ticker}.csv'
df_flat.to_csv(tx_out_path, index=True)
print(f'successfully saved {ticker} bond transaction data to {tx_out_path}')

successfully saved A bond transaction data to data/bonds/transactions/A.csv
