# U.S. Stock Market IV: Interest & Debt

In [None]:
import pandas as pd
import numpy as np
import requests, zipfile, io
import os
from pathlib import Path

from tiingo import TiingoClient                       
tiingo = TiingoClient({'api_key':'XXXX'})

import matplotlib.pyplot as plt                        # Basic plot library.
plt.style.use('ggplot')                                # Make plots look nice.

In [None]:
def get_items_from_SEC_files(tags, filename=None):                # Function input: list of tags, optional filename.

    directory = 'data/sec/merged/'                                # Read data from here.
    filenames = [filename] if filename else os.listdir(directory) # Supplied filename or all files in "merged" directory.
    filenames = [f for f in filenames if not f.startswith(".")]   # Exclude hidden files from file list.

    results   = {t:pd.DataFrame() for t in tags}                  # Dictionary of tables (1 table for each tag)

    for filename in filenames:                                    # Loop over all files.
        print(filename)
        data = pd.read_csv(directory+filename, parse_dates=['filed','ddate'])  # Read the file.
        
        for t in tags:                                            # Loop over all tags.
            item  = data[data.tag==t]                             # Select all data for this tag.
            short = item.sort_values(['cik','filed','ddate','qtrs'], ascending=[True,True,True,False]) # Samllest qrts.
            long  = item.sort_values(['cik','filed','ddate','qtrs'], ascending=[True,True,True,True])  # Largest  qtrs.
            short = short.groupby(['cik','filed']).last()[['value','qtrs']]     # One value for each firm and filing.
            long  = long .groupby(['cik','filed']).last()[['value','qtrs']]     
            short_long = short.join(long, lsuffix='_shortest', rsuffix='_longest') # Put shortest and longest next to each other.
            results[t] = results[t].append( short_long )  
                        
    for t in tags:                                                # Now sort all tables by filing date.
        if not results[t].empty: results[t] = results[t].sort_index(level='filed')            

    return results



def combine_items(tags, items):
    result = items[tags[0]]
    for tag in tags[1:]:  result = result.combine_first( items[tag] )
    return result



def calculate_quarterly_annual_values(item):                        # item: table with shortest and longest values and quarters.
    result           = pd.DataFrame()                               # Results go here.
    all_firms        = item.index.get_level_values('cik').unique()  # All CIKs.
    all_filing_dates = pd.read_csv('data/sec/dates/filing_dates.csv', index_col='cik', parse_dates=['filed'])
    
    for cik in all_firms:                                           # Loop over all firms.  
        filing_dates = pd.Series(all_filing_dates.filed[cik])       # All filing dates for this firm.

        # Quarterly values:
        valuesQ = item.loc[cik].value_shortest.reindex(filing_dates) # Values with shortest reported quarters.
        qtrsQ   = item.loc[cik].qtrs_shortest.astype(int)           # Number of quarters for each value.
        for date,q in qtrsQ[qtrsQ>1].iteritems():                   # Loop over all dates with > 1 quarters. 
            previous_values = valuesQ[:date][-q:-1]                 # Example: for q=3 we need to subtract 2 previous quarters.            
            if len(previous_values) == q-1:                         # If all previous values available.
                valuesQ[date] -= previous_values.sum(skipna=False)  # Subtract previous values to get quarterly value.
            else:
                valuesQ[date]  = np.nan                  

        # Annual values:
        valuesA = item.loc[cik].value_longest.reindex(filing_dates) # Values with longest reported quarters.
        qtrsA   = item.loc[cik].qtrs_longest.astype(int)            # Number of quarters for each value.
        for date,q in qtrsA[qtrsA<4].iteritems():                   # Loop over all dates with < 4 quarters. 
            previous_values = valuesQ[:date][-4:-q]                 # Example: for q=2 we need to add quarters -3 and -4.
            if len(previous_values) == 4-q:                         # If all previous data available.     
                valuesA[date] += previous_values.sum(skipna=False)  # Add previous values to get annual values.
            else:
                valuesA[date]  = np.nan        
        
        result = result.append( pd.DataFrame({'cik':cik, 'filed':filing_dates, 'valueQ':valuesQ.values, 'valueA':valuesA.values}) )

    return result.set_index(['cik','filed'])                        # Return a table with columns 'valueQ' and 'valueA'.



def ffill_values(item, dates):                                          
    data = item.unstack('cik')
    data = data.reindex(dates.union(data.index)).sort_index()           # Add specified dates to index.
    filing_dates = pd.read_csv('data/sec/dates/filing_dates.csv', index_col='cik', parse_dates=['filed']).filed
    last_filing_date_all_firms = filing_dates.max()                     # Most recent date where at least 1 firm filed.
     
    for cik in data.columns:                                            # Loop over all firms.
        last_filing_date      = pd.Series(filing_dates[cik]).iloc[-1]   # Last date where this firm filed
        days_since_last_filed = (last_filing_date_all_firms - last_filing_date).days
        last_date_this_firm   = dates[-1] if days_since_last_filed < 120 else last_filing_date
        data.loc[:last_date_this_firm, cik].ffill(inplace=True)         # Forward fill all the values.

    return data.loc[dates]                                              # Return only specified dates.   

Get these tags:

In [None]:
tags_shortTermDebt               = ['ShortTermBorrowing','DebtCurrent']
tags_longTermDebtCurrent         = ['LongTermDebtAndCapitalLeaseObligationsCurrent', 'LongTermDebtCurrent']
tags_longTermDebtNoncurrent      = ['LongTermDebtAndCapitalLeaseObligations', 'LongTermDebtNoncurrent']
tags_interest_expense            = ['InterestExpenseDebt','InterestAndDebtExpense','InterestExpense','InterestIncomeExpenseNonoperatingNet']
tags_interest_income             = ['InvestmentIncomeInterest','InterestAndOtherIncome']

all_tags = tags_shortTermDebt + tags_longTermDebtCurrent + tags_longTermDebtNoncurrent + tags_interest_expense + tags_interest_income 

items = get_items_from_SEC_files( all_tags )

In [None]:
items['interest_expense'] = combine_items(tags_interest_expense,  items)
items['interest_income']  = combine_items(tags_interest_income,   items)

interest_expense = calculate_quarterly_annual_values(items['interest_expense'])
interest_income  = calculate_quarterly_annual_values(items['interest_income'])

interest_expense[:3]

In [None]:
shortTermDebt          = combine_items(tags_shortTermDebt,          items)
longTermDebtCurrent    = combine_items(tags_longTermDebtCurrent,    items)
longTermDebtNoncurrent = combine_items(tags_longTermDebtNoncurrent, items)

shortTermDebt[:3]

In [None]:
# Save files
interest_income        .to_csv('data/sec/items/InterestIncome.csv')
interest_expense       .to_csv('data/sec/items/InterestExpense.csv')
shortTermDebt          .to_csv('data/sec/items/ShortTermDebt.csv')
longTermDebtCurrent    .to_csv('data/sec/items/LongTermDebtCurrent.csv')
longTermDebtNoncurrent .to_csv('data/sec/items/LongTermDebtNoncurrent.csv')

In [None]:
# Read files (units: billion dollars)
interest_income        = pd.read_csv('data/sec/items/InterestIncome.csv',        parse_dates=['filed'], index_col=['filed','cik'])  / 10**9
interest_expense       = pd.read_csv('data/sec/items/InterestExpense.csv',       parse_dates=['filed'], index_col=['filed','cik'])  / 10**9
shortTermDebt          = pd.read_csv('data/sec/items/ShortTermDebt.csv',         parse_dates=['filed'], index_col=['filed','cik'])  / 10**9
longTermDebtCurrent    = pd.read_csv('data/sec/items/LongTermDebtCurrent.csv',   parse_dates=['filed'], index_col=['filed','cik'])  / 10**9
longTermDebtNoncurrent = pd.read_csv('data/sec/items/LongTermDebtNoncurrent.csv',parse_dates=['filed'], index_col=['filed','cik'])  / 10**9

operatingIncome        = pd.read_csv('data/sec/items/OperatingIncome.csv',       parse_dates=['filed'], index_col=['filed','cik'])  / 10**9
operatingIncome[:2]

In [None]:
sic = pd.read_csv('data/sec/attributes/sic.csv', parse_dates=['filed'], index_col=['filed','cik'])
sic[:2]

Fill the tables:

In [56]:
trading_days = pd.to_datetime( tiingo.get_dataframe('SPY','2009-04-15').index ).tz_convert(None)

interestExpenseQ = ffill_values( interest_expense.valueQ, trading_days )  
interestExpenseA = ffill_values( interest_expense.valueA, trading_days )

interestIncomeQ = ffill_values( interest_income.valueQ, trading_days )  
interestIncomeA = ffill_values( interest_income.valueA, trading_days )

operatingIncomeQ = ffill_values( operatingIncome.valueQ, trading_days )  
operatingIncomeA = ffill_values( operatingIncome.valueA, trading_days )

shortTermDebt          = ffill_values( shortTermDebt.value_shortest,          trading_days )
longTermDebtCurrent    = ffill_values( longTermDebtCurrent.value_shortest,    trading_days )
longTermDebtNoncurrent = ffill_values( longTermDebtNoncurrent.value_shortest, trading_days )

sic = ffill_values(sic.sic, trading_days)

Calculate total debt:

In [None]:
debt = shortTermDebt.add(longTermDebtCurrent, fill_value=0).add(longTermDebtNoncurrent, fill_value=0)

debt[-2:]

Historical debt for specific firm:

In [None]:
symbols = pd.read_csv('data/ticker_symbols/symbols.csv',index_col=0)

In [None]:
cik = symbols[symbols.ticker==''].index[0]

debt[cik].plot()

Top 10 annual interest expense:

In [36]:
sic_current = sic.iloc[-1].to_frame('sic')
sic_current[:2]

Unnamed: 0_level_0,sic
cik,Unnamed: 1_level_1
1750,3720.0
1800,2834.0


In [2]:
# Get all 6000s
codes = sic.div(  ).apply(np.floor)
codes[-3:]

In [None]:
financials = codes[codes==].notnull()
financials[-3:]

Top 10 financial firms annual interest expense:

Top 10 non-financial firms annual interest expense:

Total market debt (non-financials):

Total quarterly interest expense (non-financials):

Interest expense relative to debt:

In [None]:
total_interestExpsense = 
total_debt             = 

(total_interestExpsense*4/total_debt)['2013':].plot()

Operating income relative to interest expense:

In [None]:
total_operatingIncome = operatingIncomeQ[~financials].sum('columns')

(total_operatingIncome/total_interestExpsense)['2012':].plot()