In [1]:
# setup
import pandas as pd
import numpy as np

%load_ext autoreload
%autoreload 2

In [259]:
# load master file
input_mf_path = 'data/master_file.csv'
mf_col_names = ['cusip_id', 'bond_sym_id', 'company_symbol', 'debt_type_cd',
                'issuer_nm', 'scrty_ds', 'cpn_rt', 'cpn_type_cd',
               'trd_rpt_efctv_dt', 'mtrty_dt', 'cnvrb_fl']
mf_col_dtypes = dict(cusip_id=str, bond_sym_id=str, company_symbol=str,
                  debt_type_cd=str,issuer_nm=str, scrty_ds=str,
                  cpn_rt=np.float64, cpn_type_cd=str, cnvrb_fl=str)
mf_col_dates = ['trd_rpt_efctv_dt', 'mtrty_dt']
dfmf = pd.read_csv(input_mf_path, usecols=mf_col_names, dtype=mf_col_dtypes,
                   parse_dates=mf_col_dates)

In [260]:
# keep cpn_type_cd FXPV (plain vanilla fixed coupon) and OTH (other)
dfmf = dfmf[(dfmf.cpn_type_cd == 'FXPV') | (dfmf.cpn_type_cd == 'OTH')]
dfmf = dfmf.drop(['cpn_type_cd'], axis=1)

# must have a cusip
dfmf = dfmf[~dfmf.cusip_id.isna()]

# no converts
dfmf = dfmf[~(dfmf.cnvrb_fl == 'Y')].drop(['cnvrb_fl'], axis=1)

# exclude specific debt types
valid_debt_types = ['1LN-BND', '1LN-NT', '1M-BND', '1M-NT', '1STMTG', '1STMTGNT',
                    '2LN-NT', 'B-BND', 'B-BNT', 'B-DEB', 'B-NT', 'BND', 'DEB', 'MTN',
                    'NT', 'OTH', 'OTH-BND', 'OTH-NT', 'OTH-OTH', 'S-BND', 'S-BNT',
                    'S-DEB', 'S-NT', 'S-OTH', 'SB-NT', 'SBN-NT', 'SC-BND', 'SC-NT',
                    'SC-OTH', 'SECNT', 'SR', 'SRDEB', 'SRNT', 'SRSEC', 'SRSUBNT',
                    'SSC-BND', 'SSC-COV', 'SSC-NT', 'SUBDEB', 'SUBNT', 'TGNT',
                    'UN-BND', 'UN-DEB', 'UN-NT', 'UNNT'
                   ]
dfmf = dfmf[dfmf.debt_type_cd.isin(valid_debt_types)]

# only positive coupon rates
dfmf = dfmf[dfmf.cpn_rt > 0]

# only credits with coupon rate between 0.25% and 15%
dfmf = dfmf[(dfmf.cpn_rt <= 15) & (dfmf.cpn_rt >= 0.25)]

# convert maturity date from object to date and drop na
dfmf.mtrty_dt = pd.to_datetime(dfmf.mtrty_dt, format='%Y%m%d', errors='coerce')
dfmf = dfmf.dropna(subset=['mtrty_dt'])

# only credits maturing after 2011-12-31
dfmf = dfmf[dfmf.mtrty_dt > '2011-12-31']

# reorder columns
ordered_mf_cols = ['bond_sym_id', 'cusip_id', 'company_symbol',
                   'issuer_nm', 'debt_type_cd', 'scrty_ds',
                   'cpn_rt', 'trd_rpt_efctv_dt', 'mtrty_dt']
dfmf = dfmf[ordered_mf_cols]

In [261]:
# load bond_prices data
input_data_path = 'data/bond_prices.csv'

# only load relevant columns
col_names = ['trans_dt', 'cusip_id', 'sub_prd_type',
             'close_pr', 'close_yld_sign_cd', 'close_yld']
col_dtypes = dict(cusip_id=str, sub_prd_type=str,
                  close_pr=np.float64, close_yld_sign_cd=str,
                  close_yld=np.float64)
col_dates = ['trans_dt']
df = pd.read_csv(input_data_path, usecols=col_names,
                 dtype=col_dtypes, parse_dates=col_dates)

# only corporate notes
df = df[df.sub_prd_type == 'CORP'].drop(columns=['sub_prd_type'])

# must have close_yld
df = df[df.close_yld > 0]

# adjust close_yld for negative sign and must be greater than -5%, less than 200%
mask = df.close_yld_sign_cd == '-'
df.loc[mask, 'close_yld'] *= -1
df = df[(df.close_yld >= -5) & (df.close_yld <= 200)]
df = df.drop(columns='close_yld_sign_cd')

# closing price must be less than 150
df = df[df.close_pr <= 150]

In [262]:
# merge two dfs
dfout = pd.merge(left=df, right=dfmf, how='inner', left_on='cusip_id', right_on='cusip_id')
dfout = dfout.drop_duplicates(subset=['trans_dt', 'cusip_id', 'close_pr'])

# only trades with over 2 years remaining life
dfout['days_left'] = dfout.mtrty_dt-dfout.trans_dt
dfout = dfout[dfout.days_left.dt.days > 730]
dfout = dfout.drop(columns=['days_left'])

# rename and reorder columns
final_col_names = [
    'trans_dt', 'trd_rpt_efctv_dt', 'mtrty_dt', 'cusip_id',
    'bond_sym_id', 'company_symbol', 'issuer_nm', 'debt_type_cd',
    'scrty_ds', 'cpn_rt', 'close_pr', 'close_yld'
]

dfout = dfout[final_col_names]

In [267]:
# save to csv
out_path = 'data/clean_bond_close_pxs.csv'
dfout.to_csv(out_path, index=False)
print('dataframe saved successfully!')

dataframe saved successfully!
