In [310]:
import numpy as np
import pandas as pd
import sqlite3
import json
from datetime import datetime, timedelta
from collections import namedtuple

pd.set_option('display.max_columns', None)

In [311]:
DB_PATH = '/Users/ezras/projects/personal/finance_ml/data/quarterly_financial_data.db'
INFO_CSV_PATH = '/Users/ezras/projects/personal/finance_ml/data/stock_general_info.csv'
TABLE_NAME = 'yahoo_financial_data'
STOCKPUP_TABLE_NAME = 'stockpup_data'


DELTA_PREFIX = 'Delta_'
VS_MKT_IDX = '_vs_'
AVG_REC_SCORE_PREFIX = 'AvgRecScore_'

MISSING_SECTOR = 'MissingSector'
MISSING_INDUSTRY = 'MissingIndustry'

MONTH_TO_QUARTER = {
    1: 4,
    2: 1,
    3: 1,
    4: 1,
    5: 2,
    6: 2,
    7: 2,
    8: 3,
    9: 3,
    10: 3,
    11: 4,
    12: 4
}
    

class StockPupColumns:
    """
    Our dataset comes from over 20 years of 10-Q and 10-K filings made by public companies
     with the U.S. Securities and Exchange Commission. We extract data from both text and
     XBRL filings, fix reporting mistakes, and normalize the data into quarterly time series
     of final restated values.
    """
    # Date Quarter Ends
    QUARTER_END = 'QuarterEnd'
    # The total number of common shares outstanding at the end of a given quarter, including all
    # classes of common stock.
    SHARES = 'Shares'
    # The number of shares the company had at the end of a given quarter, adjusted for splits to
    # be comparable to today's shares.
    SHARES_SPLIT_ADJUSTED = 'SharesSplitAdjusted'
    # If an investor started with 1 share of stock at the end of a given quarter, the split factor
    # for that quarter indicates how many shares the investor would own today as a result of
    # subsequent stock splits.
    SPLIT_FACTOR = 'SplitFactor'
    # Total assets at the end of a quarter.
    ASSETS = 'Assets'
    # Current assets at the end of a quarter.
    CURRENT_ASSETS = 'CurrentAssets'
    # Total liabilities at the end of a quarter.
    LIABILITIES = 'Liabilities'
    # Current liabilities at the end of a quarter.
    CURRENT_LIABILITIES = 'CurrentLiabilities'
    # Total shareholders' equity at the end of a quarter, including both common and preferred
    # stockholders.
    SHAREHOLDER_EQUITY = 'ShareholdersEquity'
    # Non-controlling or minority interest, if any, excluded from Shareholders equity.
    NON_CONTROLLING_INTEREST = 'NonControllingInterest'
    # Preferred equity, if any, included in Shareholders equity.
    PREFERRED_EQUITY = 'PreferredEquity'
    # Total Goodwill and all other Intangible assets, if any.
    GOODWILL_AND_INTANGIBLES = 'GoodwillIntangibles'
    # All long-term debt including capital lease obligations.
    LONG_TERM_DEBT = 'LongTermDebt'
    # Total revenue for a given quarter.
    REVENUE = 'Revenue'
    # Earnings or Net Income for a given quarter.
    EARNINGS = 'Earnings'
    # Earnings available for common stockholders - Net income minus earnings that must be
    # distributed to preferred shareholders. May be omitted when not reported by the company.
    EARNINGS_AVAILABLE_FOR_COMMON_STOCKHOLDERS = 'EarningsAvailableForCommonStockholders'
    # Basic earnings per share for a given quarter.
    EPS_BASIC = 'EPS_basic'
    # Diluted earnings per share.
    EPS_DILUTED = 'EPS_diluted'
    # Common stock dividends paid during a quarter per share, including all regular and special
    # dividends and distributions to common shareholders.
    DIVIDEND_PER_SHARE = 'DividendPerShare'
    # Cash produced by operating activities during a given quarter, including Continuing and
    # Discontinued operations.
    CASH_FROM_OPERATING_ACTIVITES = 'CashFromOperatingActivities'
    # Cash produced by investing activities during a given quarter, including Continuing and
    # Discontinued operations.
    CASH_FROM_INVESTING_ACTIVITIES = 'CashFromInvestingActivities'
    # Cash produced by financing activities during a given quarter, including Continuing and
    # Discontinued operations.
    CASH_FROM_FINANCING_ACTIVITES = 'CashFromFinancingActivities'
    # Change in cash and cash equivalents during a given quarter, including Effect of Exchange
    # Rate Movements and Other Cash Change Adjustments, if any.
    CASH_CHANGE_DURING_PERIOD = 'CashChangeDuringPeriod'
    # Cash and cash equivalents at the end of a quarter, including Continuing and
    # Discontinued operations.
    CASH_AT_END_OF_PERIOD = 'CashAtEndOfPeriod'
    # Capital Expenditures are the cash outflows for long-term productive assets, net of cash
    # from disposals of capital assets.
    CAPITAL_EXPENDITURES = 'CapitalExpenditures'
    # The medium price per share of the company common stock during a given quarter. The prices
    # are as reported, and are not adjusted for subsequent dividends.
    PRICE = 'Price'  # Average price during quarter
    # The highest price per share of the company common stock during a given quarter.
    PRICE_HIGH = 'PriceHigh'
    # The lowest price of the company common stock during a quarter.
    PRICE_LOW = 'PriceLow'
    # Return on equity is the ratio of Earnings (available to common stockholders)
    # TTM (over the Trailing Twelve Months) to TTM average common shareholders' equity.
    ROE = 'ROE'
    # Return on assets is the ratio of total Earnings TTM to TTM average Assets.
    ROA = 'ROA'
    # Common stockholders' equity per share, also known as BVPS.
    BOOK_VALUE_OF_EQUITY_PER_SHARE = 'BookValueOfEquityPerShare'
    # The ratio of Price to Book value of equity per share as of the previous quarter.
    P_B_RATIO = 'P_B_ratio'
    # The ratio of Price to EPS diluted TTM as of the previous quarter.
    P_E_RATIO = 'P_E_ratio'
    # The aggregate amount of dividends paid per split-adjusted share of common stock from the
    # first available reporting quarter until a given quarter.
    CUM_DIVIDENDS_PER_SHARE = 'CumulativeDividendsPerShare'
    # The ratio of Dividends TTM to Earnings (available to common stockholders) TTM.
    DIVIDEND_PAYOUT_RATIO = 'DividendPayoutRatio'
    # The ratio of Long-term debt to common shareholders' equity (Shareholders equity minus
    # Preferred equity).
    LONG_TERM_DEBT_TO_EQUITY_RATIO = 'LongTermDebtToEquityRatio'
    # The ratio of common shareholders' equity (Shareholders equity minus Preferred equity) to
    # Assets.
    EQUITY_TO_ASSETS_RATIO = 'EquityToAssetsRatio'
    # The ratio of Earnings (available for common stockholders) TTM to Revenue TTM.
    NET_MARGIN = 'NetMargin'
    # The ratio of Revenue TTM to TTM average Assets.
    ASSET_TURNOVER = 'AssetTurnover'
    # Cash from operating activities minus the Capital Expenditures for a quarter.
    FREE_CASH_FLOW_PER_SHARE = 'FreeCashFlowPerShare'
    # The ratio of Current assets to Current liabilities.
    CURRENT_RATIO = 'CurrentRatio'
    
    @staticmethod
    def columns():
        return [getattr(StockPupColumns, col) for col in dir(StockPupColumns) if col[0] != '_' and col != 'columns']

class QuarterlyColumns:
    TICKER_SYMBOL = 'TickerSymbol'
    QUARTER = 'Quarter'
    YEAR = 'Year'
    PRICE_AVG = 'PriceAvg'
    PRICE_HI = 'PriceHigh'
    PRICE_LO = 'PriceLow'
    PRICE_AT_END_OF_QUARTER = 'PriceEoQ'
    AVG_RECOMMENDATIONS = 'AvgRecommendations'
    AVG_RECOMMENDATION_SCORE = 'AvgRecommendationScore'
    SPLIT = 'Split'
    EBIT = 'Ebit'
    PROFIT = 'GrossProfit'
    REVENUE = 'TotalRevenue'
    RND = 'ResearchDevelopment'
    OPERATING_EXPENSES = 'TotalOperatingExpenses'
    INCOME_PRETAX = 'IncomeBeforeTax'
    INCOME_TAX = 'IncomeTaxExpense'
    OPERATING_INCOME = 'OperatingIncome'
    NET_INCOME = 'NetIncome'
    DIVIDENDS = 'DividendsPaid'
    STOCK_REPURCHASED = 'RepurchaseOfStock'
    STOCK_ISSUED = 'IssuanceOfStock'
    DEPRECIATION = 'Depreciation'
    NET_BORROWINGS = 'NetBorrowings'
    INVESTMENTS = 'Investments'
    CASH = 'Cash'
    COMMON_STOCK = 'CommonStock'
    ASSETS = 'TotalAssets'
    LIABILITIES = 'TotalLiab'
    DEBT_LONG = 'LongTermDebt'
    DEBT_SHORT = 'ShortLongTermDebt'
    DATE = 'Date'
    VOLUME = 'Volume'
    EARNINGS = 'Earnings'
    STOCKHOLDER_EQUITY = 'TotalStockholderEquity'
    VOLATILITY = 'Volatility'
    SECTOR = 'Sector'
    INDUSTRY = 'Industry'
    MARKET_CAP = 'MarketCap'
    AGE_OF_DATA = 'AgeOfData'
    WORKING_CAPITAL_RATIO = 'AssetsToLiabilitiesRatio'
    AVG_PE_RATIO = 'AvgPriceToEarningsRatio'
    DEBT_EQUITY_RATIO = 'DebtToEquityRatio'
    ROE = 'ReturnOnEquity'
    PRICE_BOOK_RATIO = 'PriceToBookRatio'
    FCF = 'FreeCashFlow'
    PROFIT_MARGIN = 'ProfitMargin'
    
    @staticmethod
    def columns():
        return [getattr(QuarterlyColumns, col) for col in dir(QuarterlyColumns) if col[0] != '_' and col != 'columns']


QuarterlyIndex = namedtuple('QuarterlyIndex', (QuarterlyColumns.TICKER_SYMBOL, 
                                               QuarterlyColumns.QUARTER, 
                                               QuarterlyColumns.YEAR))
TICKER_SYMBOL, QUARTER, YEAR = 0, 1, 2

PRICE_ONLY_DELTA_COLUMNS = [
    QuarterlyColumns.PRICE_AVG,
    QuarterlyColumns.VOLATILITY
]

DELTA_COLUMNS = [
    QuarterlyColumns.PRICE_AVG,
    QuarterlyColumns.CASH,
    QuarterlyColumns.EARNINGS,
    QuarterlyColumns.AVG_PE_RATIO,
    QuarterlyColumns.DEBT_EQUITY_RATIO,
    QuarterlyColumns.ROE,
    QuarterlyColumns.WORKING_CAPITAL_RATIO,
    QuarterlyColumns.PRICE_BOOK_RATIO,
    QuarterlyColumns.PROFIT_MARGIN,
    QuarterlyColumns.OPERATING_INCOME
]

CATEGORICAL_COLUMNS = [
    QuarterlyColumns.QUARTER,
    QuarterlyColumns.SECTOR,
    QuarterlyColumns.INDUSTRY
]

VS_MARKET_INDICES_COLUMNS = [
    f'{DELTA_PREFIX}{QuarterlyColumns.PRICE_AVG}',
    QuarterlyColumns.VOLATILITY,
]

MARKET_INDICES = ['^DJI']  #, 'VTSAX', '^IXIC', '^GSPC', '^RUT', '^NYA']


FORMULAE = {
    QuarterlyColumns.VOLATILITY: lambda row: (
        row[QuarterlyColumns.PRICE_HI] - row[QuarterlyColumns.PRICE_LO]) / row[QuarterlyColumns.PRICE_AVG],

    QuarterlyColumns.WORKING_CAPITAL_RATIO: lambda row: (
        row[QuarterlyColumns.ASSETS] / row[QuarterlyColumns.LIABILITIES]),

    QuarterlyColumns.AGE_OF_DATA: lambda row: (
        datetime.now().date() - datetime.strptime(row[QuarterlyColumns.DATE], '%Y-%m-%d').date()).days/90,

    QuarterlyColumns.AVG_PE_RATIO: lambda row: (
        row[QuarterlyColumns.PRICE_AVG] / row[QuarterlyColumns.EARNINGS]),

    QuarterlyColumns.DEBT_EQUITY_RATIO: lambda row: (
        row[QuarterlyColumns.DEBT_LONG] + row[QuarterlyColumns.DEBT_SHORT]) / row[QuarterlyColumns.STOCKHOLDER_EQUITY],

    QuarterlyColumns.ROE: lambda row: (
        row[QuarterlyColumns.EARNINGS] - row[QuarterlyColumns.DIVIDENDS]) / row[QuarterlyColumns.STOCKHOLDER_EQUITY],

    QuarterlyColumns.PRICE_BOOK_RATIO: lambda row: (
        row[QuarterlyColumns.ASSETS] - row[QuarterlyColumns.LIABILITIES]) / row[QuarterlyColumns.MARKET_CAP],

    QuarterlyColumns.PROFIT_MARGIN: lambda row: (
        row[QuarterlyColumns.NET_INCOME] / row[QuarterlyColumns.REVENUE])
}

TARGET_COL = f'{DELTA_PREFIX}{QuarterlyColumns.PRICE_AVG}{VS_MKT_IDX}^DJI'

FEATURE_COLS = [
#     QuarterlyColumns.QUARTER,
    QuarterlyColumns.SECTOR,
    QuarterlyColumns.AGE_OF_DATA,
    QuarterlyColumns.VOLATILITY,
    QuarterlyColumns.AVG_RECOMMENDATION_SCORE,
    QuarterlyColumns.AVG_PE_RATIO,
    QuarterlyColumns.DEBT_EQUITY_RATIO,
    QuarterlyColumns.ROE,
    QuarterlyColumns.WORKING_CAPITAL_RATIO,
    QuarterlyColumns.PRICE_BOOK_RATIO,
    QuarterlyColumns.OPERATING_INCOME,
    QuarterlyColumns.PROFIT_MARGIN
] + [
    f'{DELTA_PREFIX}{col}' for col in DELTA_COLUMNS
] + [
    f'{QuarterlyColumns.VOLATILITY}{VS_MKT_IDX}{mkt_idx}' for mkt_idx in MARKET_INDICES
]

In [312]:
db_conn = sqlite3.connect(DB_PATH)

quarterly_df = pd.read_sql_query(f'SELECT * FROM {TABLE_NAME}', db_conn)
db_conn.close()

# This needs to be above the filters below otherwise we'll drop quarterly data! (indices have no revenue)
market_index_df = quarterly_df[quarterly_df[QuarterlyColumns.TICKER_SYMBOL].isin(MARKET_INDICES)]

market_index_df.dropna(subset=[QuarterlyColumns.DATE,
                               QuarterlyColumns.PRICE_AVG,
                               QuarterlyColumns.PRICE_HI,
                               QuarterlyColumns.PRICE_LO,
                               ])

quarterly_df.dropna(subset=[QuarterlyColumns.DATE,
                            QuarterlyColumns.REVENUE,
                            QuarterlyColumns.PRICE_AVG,
                            QuarterlyColumns.PRICE_HI,
                            QuarterlyColumns.PRICE_LO,
                            QuarterlyColumns.EARNINGS],
                    inplace=True)
quarterly_df = quarterly_df[((quarterly_df[QuarterlyColumns.REVENUE] != 0) &
                             (quarterly_df[QuarterlyColumns.EARNINGS] != 0) &
                             (~quarterly_df[QuarterlyColumns.TICKER_SYMBOL].isin(MARKET_INDICES)))]

quarterly_df.set_index([QuarterlyColumns.TICKER_SYMBOL,
                        QuarterlyColumns.QUARTER,
                        QuarterlyColumns.YEAR],
                       inplace=True)
quarterly_df.sort_index(inplace=True)



In [313]:
db_conn = sqlite3.connect(DB_PATH)

stockpup_df = pd.read_sql_query(f'SELECT * FROM {STOCKPUP_TABLE_NAME}', db_conn)
db_conn.close()


def _date_to_index(dt: datetime.date):
    # shift date into the next month if it's at the end of a month
    new_date = dt + timedelta(days=15) if dt.day > 20 else dt

    q = MONTH_TO_QUARTER[new_date.month]

    year = new_date.year - 1 if new_date.month == 1 else new_date.year
    return QuarterlyIndex('', q, year)


def process_stockpup_df(df):
    df.dropna(subset=[StockPupColumns.SHARES, 
                      StockPupColumns.SHARES_SPLIT_ADJUSTED,
                      StockPupColumns.FREE_CASH_FLOW_PER_SHARE,
                      StockPupColumns.EARNINGS,
                      StockPupColumns.SHAREHOLDER_EQUITY,
                      StockPupColumns.LIABILITIES,
                      StockPupColumns.PRICE],
              inplace=True)
    df = df[((df[StockPupColumns.REVENUE] != 0) &
             (df[StockPupColumns.EARNINGS] != 0))]
    
    df[StockPupColumns.QUARTER_END] = pd.to_datetime(df[StockPupColumns.QUARTER_END])

    df[QuarterlyColumns.QUARTER] = df[StockPupColumns.QUARTER_END].apply(
        lambda r: _date_to_index(r)[QUARTER])
    df[QuarterlyColumns.YEAR] = df[StockPupColumns.QUARTER_END].apply(lambda r: _date_to_index(r)[YEAR])
    df[QuarterlyColumns.DIVIDENDS] = df[StockPupColumns.DIVIDEND_PER_SHARE] * df[
        StockPupColumns.SHARES]
    df[QuarterlyColumns.DATE] = df[StockPupColumns.QUARTER_END].apply(lambda r: str(r.date()))
    df[QuarterlyColumns.OPERATING_INCOME] = df[StockPupColumns.FREE_CASH_FLOW_PER_SHARE] * df[
        StockPupColumns.SHARES]
    df[QuarterlyColumns.MARKET_CAP] = df[StockPupColumns.SHARES_SPLIT_ADJUSTED] * df[StockPupColumns.PRICE]
    df[QuarterlyColumns.DEBT_SHORT] = 0  # I think short long term debt is figured into long term debt
#     df[QuarterlyColumns.EBIT] = ???  # TODO: Compute EBIT?

    df.rename(columns={
        StockPupColumns.ASSETS: QuarterlyColumns.ASSETS,
        StockPupColumns.REVENUE: QuarterlyColumns.REVENUE,
        StockPupColumns.LIABILITIES: QuarterlyColumns.LIABILITIES,
        StockPupColumns.LONG_TERM_DEBT: QuarterlyColumns.DEBT_LONG,
        StockPupColumns.SHAREHOLDER_EQUITY: QuarterlyColumns.STOCKHOLDER_EQUITY,
        StockPupColumns.CASH_AT_END_OF_PERIOD: QuarterlyColumns.CASH,
        StockPupColumns.PRICE: QuarterlyColumns.PRICE_AVG,
        StockPupColumns.PRICE_LOW: QuarterlyColumns.PRICE_LO,
        StockPupColumns.PRICE_HIGH: QuarterlyColumns.PRICE_HI,
        StockPupColumns.SPLIT_FACTOR: QuarterlyColumns.SPLIT,
        StockPupColumns.SHARES_SPLIT_ADJUSTED: QuarterlyColumns.COMMON_STOCK,
        StockPupColumns.EARNINGS: QuarterlyColumns.EARNINGS  # These aren't exactly the same
    }, inplace=True)
    
    df[QuarterlyColumns.DEBT_LONG] = df[QuarterlyColumns.DEBT_LONG].apply(lambda row: int(row))
    df[QuarterlyColumns.NET_INCOME] = df[QuarterlyColumns.EARNINGS]  # These aren't exactly the same...

    # Filter only to columns in QuarterlyColumns
    df = df[[col for col in df.columns if col in QuarterlyColumns.columns()]]
    
    df.set_index([QuarterlyColumns.TICKER_SYMBOL,
                  QuarterlyColumns.QUARTER,
                  QuarterlyColumns.YEAR],
                 inplace=True)
    df.sort_index(inplace=True)
    
    # Filter out all data in stockpup_df that exists in quarterly_df (by index)
    df = df[~df.index.isin(quarterly_df.index)]
    
    # Drop duplicates (occurs if quarter end dates are close to eachother)
    df = df.loc[~df.index.duplicated(keep='last')]
    
    return df

stockpup_df = process_stockpup_df(stockpup_df)

quarterly_df = pd.concat([quarterly_df, stockpup_df])
quarterly_df.sort_index(inplace=True)
quarterly_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

Index(['CommonStock', 'Split', 'TotalAssets', 'TotalLiab',
       'TotalStockholderEquity', 'LongTermDebt', 'TotalRevenue', 'Earnings',
       'Cash', 'PriceAvg', 'PriceHigh', 'PriceLow', 'DividendsPaid', 'Date',
       'OperatingIncome', 'MarketCap', 'ShortLongTermDebt', 'NetIncome'],
      dtype='object')


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,PriceAvg,PriceHigh,PriceLow,PriceEoQ,AvgRecommendations,Split,Ebit,GrossProfit,TotalRevenue,ResearchDevelopment,TotalOperatingExpenses,IncomeBeforeTax,IncomeTaxExpense,OperatingIncome,NetIncome,DividendsPaid,RepurchaseOfStock,Depreciation,IssuanceOfStock,NetBorrowings,Investments,Cash,CommonStock,TotalAssets,TotalLiab,LongTermDebt,Revenue,Date,TotalStockholderEquity,Volume,Earnings,ShortLongTermDebt,MarketCap
TickerSymbol,Quarter,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1
A,1,1999,59.630000,79.25,40.00,,,1,,,2.246000e+09,,,,,3.525600e+08,131000000.0,0.000000e+00,,,,,,1.368000e+09,452000000.0,7.107000e+09,2.621000e+09,0.000000e+00,,2000-01-31,4.486000e+09,,131000000.0,0.0,2.695276e+10
A,1,2000,53.030000,68.00,38.06,,,1,,,2.841000e+09,,,,,-3.106034e+08,154000000.0,0.000000e+00,,,,,,4.330000e+08,456769737.0,9.208000e+09,3.667000e+09,0.000000e+00,,2001-01-31,5.541000e+09,,154000000.0,0.0,2.422250e+10
A,1,2001,27.680000,33.30,22.06,,,1,,,1.426000e+09,,,,,-2.133756e+08,-315000000.0,0.000000e+00,,,,,,2.188000e+09,463859978.0,8.493000e+09,3.140000e+09,1.150000e+09,,2002-01-31,5.353000e+09,,-315000000.0,0.0,1.283964e+10
A,1,2002,16.750000,20.30,13.19,,,1,,,1.412000e+09,,,,,-1.366604e+08,-369000000.0,0.000000e+00,,,,,,1.754000e+09,471242690.0,7.770000e+09,3.421000e+09,1.150000e+09,,2003-01-31,4.349000e+09,,-369000000.0,0.0,7.893315e+09
A,1,2003,31.880000,38.80,24.97,,,1,,,1.643000e+09,,,,,9.615090e+06,71000000.0,0.000000e+00,,,,,,1.678000e+09,480754478.0,6.384000e+09,3.315000e+09,1.150000e+09,,2004-01-31,3.069000e+09,,71000000.0,0.0,1.532645e+10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZTS,4,2015,43.810000,48.65,38.98,,,1,,,1.274000e+09,,,,,1.988622e+08,22000000.0,4.474400e+07,,,,,,1.154000e+09,497155532.0,7.913000e+09,6.822000e+09,4.463000e+09,,2015-12-31,1.068000e+09,,22000000.0,0.0,2.178038e+10
ZTS,4,2016,50.500000,54.15,46.86,,,1,,,1.277000e+09,,,,,2.263035e+08,154000000.0,9.839281e+07,,,,,,7.270000e+08,491964064.0,7.649000e+09,6.150000e+09,4.468000e+09,,2016-12-31,1.487000e+09,,154000000.0,0.0,2.484419e+10
ZTS,4,2017,68.310000,73.58,63.03,,,1,,,1.460000e+09,,,,,5.628943e+08,81000000.0,1.116084e+08,,,,,,1.564000e+09,485253713.0,8.586000e+09,6.800000e+09,4.953000e+09,,2017-12-31,1.770000e+09,,81000000.0,0.0,3.314768e+10
ZTS,4,2018,87.730000,96.57,78.90,,,1,,,1.564000e+09,,,,,4.931351e+08,345000000.0,1.388439e+08,,,,,,1.602000e+09,478771915.0,1.077700e+10,8.592000e+09,6.443000e+09,,2018-12-31,2.185000e+09,,345000000.0,0.0,4.200266e+10


In [314]:
stock_info_df = pd.read_csv(INFO_CSV_PATH)[['tickerSymbol', 'sector', 'industry']]
stock_info_df.rename(columns={
    'tickerSymbol': QuarterlyColumns.TICKER_SYMBOL,
    'sector': QuarterlyColumns.SECTOR,
    'industry': QuarterlyColumns.INDUSTRY
}, inplace=True)
stock_info_df.set_index([QuarterlyColumns.TICKER_SYMBOL], inplace=True)

quarterly_df = quarterly_df.join(stock_info_df, on=[QuarterlyColumns.TICKER_SYMBOL])

quarterly_df[QuarterlyColumns.SECTOR].fillna(MISSING_SECTOR, inplace=True)
quarterly_df[QuarterlyColumns.INDUSTRY].fillna(MISSING_INDUSTRY, inplace=True)
quarterly_df[QuarterlyColumns.DEBT_SHORT].fillna(0, inplace=True)

0

In [316]:
for col_name, fn in FORMULAE.items():
        quarterly_df[col_name] = quarterly_df.apply(fn, axis=1)

market_index_df[QuarterlyColumns.VOLATILITY] = market_index_df.apply(FORMULAE[QuarterlyColumns.VOLATILITY], axis=1)
market_index_df.set_index([QuarterlyColumns.TICKER_SYMBOL,
                           QuarterlyColumns.QUARTER,
                           QuarterlyColumns.YEAR],
                          inplace=True)
market_index_df.sort_index(inplace=True)

quarterly_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,PriceAvg,PriceHigh,PriceLow,PriceEoQ,AvgRecommendations,Split,Ebit,GrossProfit,TotalRevenue,ResearchDevelopment,TotalOperatingExpenses,IncomeBeforeTax,IncomeTaxExpense,OperatingIncome,NetIncome,DividendsPaid,RepurchaseOfStock,Depreciation,IssuanceOfStock,NetBorrowings,Investments,Cash,CommonStock,TotalAssets,TotalLiab,LongTermDebt,Revenue,Date,TotalStockholderEquity,Volume,Earnings,ShortLongTermDebt,MarketCap,Sector,Industry,Volatility,AssetsToLiabilitiesRatio,AgeOfData,AvgPriceToEarningsRatio,DebtToEquityRatio,ReturnOnEquity,PriceToBookRatio,ProfitMargin
TickerSymbol,Quarter,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1
A,1,1999,59.630000,79.25,40.00,,,1,,,2.246000e+09,,,,,3.525600e+08,131000000.0,0.000000e+00,,,,,,1.368000e+09,452000000.0,7.107000e+09,2.621000e+09,0.000000e+00,,2000-01-31,4.486000e+09,,131000000.0,0.0,2.695276e+10,Healthcare,Diagnostics & Research,0.658226,2.711560,82.866667,4.551908e-07,0.000000,0.029202,0.166439,0.058326
A,1,2000,53.030000,68.00,38.06,,,1,,,2.841000e+09,,,,,-3.106034e+08,154000000.0,0.000000e+00,,,,,,4.330000e+08,456769737.0,9.208000e+09,3.667000e+09,0.000000e+00,,2001-01-31,5.541000e+09,,154000000.0,0.0,2.422250e+10,Healthcare,Diagnostics & Research,0.564586,2.511044,78.800000,3.443506e-07,0.000000,0.027793,0.228754,0.054206
A,1,2001,27.680000,33.30,22.06,,,1,,,1.426000e+09,,,,,-2.133756e+08,-315000000.0,0.000000e+00,,,,,,2.188000e+09,463859978.0,8.493000e+09,3.140000e+09,1.150000e+09,,2002-01-31,5.353000e+09,,-315000000.0,0.0,1.283964e+10,Healthcare,Diagnostics & Research,0.406069,2.704777,74.744444,-8.787302e-08,0.214833,-0.058846,0.416912,-0.220898
A,1,2002,16.750000,20.30,13.19,,,1,,,1.412000e+09,,,,,-1.366604e+08,-369000000.0,0.000000e+00,,,,,,1.754000e+09,471242690.0,7.770000e+09,3.421000e+09,1.150000e+09,,2003-01-31,4.349000e+09,,-369000000.0,0.0,7.893315e+09,Healthcare,Diagnostics & Research,0.424478,2.271266,70.688889,-4.539295e-08,0.264429,-0.084847,0.550973,-0.261331
A,1,2003,31.880000,38.80,24.97,,,1,,,1.643000e+09,,,,,9.615090e+06,71000000.0,0.000000e+00,,,,,,1.678000e+09,480754478.0,6.384000e+09,3.315000e+09,1.150000e+09,,2004-01-31,3.069000e+09,,71000000.0,0.0,1.532645e+10,Healthcare,Diagnostics & Research,0.433814,1.925792,66.633333,4.490141e-07,0.374715,0.023135,0.200242,0.043214
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZTS,4,2015,43.810000,48.65,38.98,,,1,,,1.274000e+09,,,,,1.988622e+08,22000000.0,4.474400e+07,,,,,,1.154000e+09,497155532.0,7.913000e+09,6.822000e+09,4.463000e+09,,2015-12-31,1.068000e+09,,22000000.0,0.0,2.178038e+10,Healthcare,Drug Manufacturers—Specialty & Generic,0.220726,1.159924,18.277778,1.991364e-06,4.178839,-0.021296,0.050091,0.017268
ZTS,4,2016,50.500000,54.15,46.86,,,1,,,1.277000e+09,,,,,2.263035e+08,154000000.0,9.839281e+07,,,,,,7.270000e+08,491964064.0,7.649000e+09,6.150000e+09,4.468000e+09,,2016-12-31,1.487000e+09,,154000000.0,0.0,2.484419e+10,Healthcare,Drug Manufacturers—Specialty & Generic,0.144356,1.243740,14.211111,3.279221e-07,3.004707,0.037396,0.060336,0.120595
ZTS,4,2017,68.310000,73.58,63.03,,,1,,,1.460000e+09,,,,,5.628943e+08,81000000.0,1.116084e+08,,,,,,1.564000e+09,485253713.0,8.586000e+09,6.800000e+09,4.953000e+09,,2017-12-31,1.770000e+09,,81000000.0,0.0,3.314768e+10,Healthcare,Drug Manufacturers—Specialty & Generic,0.154443,1.262647,10.155556,8.433333e-07,2.798305,-0.017293,0.053880,0.055479
ZTS,4,2018,87.730000,96.57,78.90,,,1,,,1.564000e+09,,,,,4.931351e+08,345000000.0,1.388439e+08,,,,,,1.602000e+09,478771915.0,1.077700e+10,8.592000e+09,6.443000e+09,,2018-12-31,2.185000e+09,,345000000.0,0.0,4.200266e+10,Healthcare,Drug Manufacturers—Specialty & Generic,0.201413,1.254306,6.100000,2.542899e-07,2.948741,0.094351,0.052021,0.220588


In [317]:
def get_prev_quarterly_index(index: QuarterlyIndex):
    if index[QUARTER] == 1:
        return QuarterlyIndex(index[TICKER_SYMBOL], 4, index[YEAR] - 1)

    return QuarterlyIndex(index[TICKER_SYMBOL], index[QUARTER] - 1, index[YEAR])


def add_delta_columns(row: pd.Series, df: pd.DataFrame, columns: list):    
    try:
        prev_quarter_row = df.loc[get_prev_quarterly_index(row.name)]  # row.name returns the multiIndex tuple
    except:
#         print(f'Unable to find prev quarter info for {row.name}')
        prev_quarter_row = pd.DataFrame()
    
    if not prev_quarter_row.empty:
        new_cols = []

        for col in columns:
            if prev_quarter_row[col] == 0:
                new_cols.append(0)
            else:
                # converting to float to get rid of index terms
                new_cols.append(float((row[col] - prev_quarter_row[col]) / prev_quarter_row[col]))

        return pd.Series(new_cols)

    return pd.Series([None]*len(columns))


delta_col_names = [f'{DELTA_PREFIX}{col}' for col in DELTA_COLUMNS]
quarterly_df[delta_col_names] = quarterly_df.apply(add_delta_columns, axis=1, df=quarterly_df, columns=DELTA_COLUMNS)
# quarterly_df.dropna(subset=delta_col_names, inplace=True)

delta_col_mkt_index_names = [f'{DELTA_PREFIX}{col}' for col in PRICE_ONLY_DELTA_COLUMNS]
market_index_df[delta_col_mkt_index_names] = market_index_df.apply(add_delta_columns, axis=1, df=market_index_df, columns=PRICE_ONLY_DELTA_COLUMNS)
market_index_df.dropna(subset=delta_col_mkt_index_names, inplace=True)
quarterly_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,PriceAvg,PriceHigh,PriceLow,PriceEoQ,AvgRecommendations,Split,Ebit,GrossProfit,TotalRevenue,ResearchDevelopment,TotalOperatingExpenses,IncomeBeforeTax,IncomeTaxExpense,OperatingIncome,NetIncome,DividendsPaid,RepurchaseOfStock,Depreciation,IssuanceOfStock,NetBorrowings,Investments,Cash,CommonStock,TotalAssets,TotalLiab,LongTermDebt,Revenue,Date,TotalStockholderEquity,Volume,Earnings,ShortLongTermDebt,MarketCap,Sector,Industry,Volatility,AssetsToLiabilitiesRatio,AgeOfData,AvgPriceToEarningsRatio,DebtToEquityRatio,ReturnOnEquity,PriceToBookRatio,ProfitMargin,Delta_PriceAvg,Delta_Cash,Delta_Earnings,Delta_AvgPriceToEarningsRatio,Delta_DebtToEquityRatio,Delta_ReturnOnEquity,Delta_AssetsToLiabilitiesRatio,Delta_PriceToBookRatio,Delta_ProfitMargin,Delta_OperatingIncome
TickerSymbol,Quarter,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1
A,1,1999,59.630000,79.25,40.00,,,1,,,2.246000e+09,,,,,3.525600e+08,131000000.0,0.000000e+00,,,,,,1.368000e+09,452000000.0,7.107000e+09,2.621000e+09,0.000000e+00,,2000-01-31,4.486000e+09,,131000000.0,0.0,2.695276e+10,Healthcare,Diagnostics & Research,0.658226,2.711560,82.866667,4.551908e-07,0.000000,0.029202,0.166439,0.058326,,,,,,,,,,
A,1,2000,53.030000,68.00,38.06,,,1,,,2.841000e+09,,,,,-3.106034e+08,154000000.0,0.000000e+00,,,,,,4.330000e+08,456769737.0,9.208000e+09,3.667000e+09,0.000000e+00,,2001-01-31,5.541000e+09,,154000000.0,0.0,2.422250e+10,Healthcare,Diagnostics & Research,0.564586,2.511044,78.800000,3.443506e-07,0.000000,0.027793,0.228754,0.054206,,,,,,,,,,
A,1,2001,27.680000,33.30,22.06,,,1,,,1.426000e+09,,,,,-2.133756e+08,-315000000.0,0.000000e+00,,,,,,2.188000e+09,463859978.0,8.493000e+09,3.140000e+09,1.150000e+09,,2002-01-31,5.353000e+09,,-315000000.0,0.0,1.283964e+10,Healthcare,Diagnostics & Research,0.406069,2.704777,74.744444,-8.787302e-08,0.214833,-0.058846,0.416912,-0.220898,-0.456295,1.196787,-2.032787,-1.526444,0.000000,-2.015809,0.014492,0.839766,-3.442186,-4.117022
A,1,2002,16.750000,20.30,13.19,,,1,,,1.412000e+09,,,,,-1.366604e+08,-369000000.0,0.000000e+00,,,,,,1.754000e+09,471242690.0,7.770000e+09,3.421000e+09,1.150000e+09,,2003-01-31,4.349000e+09,,-369000000.0,0.0,7.893315e+09,Healthcare,Diagnostics & Research,0.424478,2.271266,70.688889,-4.539295e-08,0.264429,-0.084847,0.550973,-0.261331,-0.339250,0.499145,-3.580420,-1.256063,0.000000,-4.357690,-0.338187,0.144460,-3.498182,-0.750237
A,1,2003,31.880000,38.80,24.97,,,1,,,1.643000e+09,,,,,9.615090e+06,71000000.0,0.000000e+00,,,,,,1.678000e+09,480754478.0,6.384000e+09,3.315000e+09,1.150000e+09,,2004-01-31,3.069000e+09,,71000000.0,0.0,1.532645e+10,Healthcare,Diagnostics & Research,0.433814,1.925792,66.633333,4.490141e-07,0.374715,0.023135,0.200242,0.043214,1.170184,-0.090022,-1.300847,-8.213569,0.507657,-1.453575,-0.160474,-0.703095,-1.317877,-1.042016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZTS,4,2015,43.810000,48.65,38.98,,,1,,,1.274000e+09,,,,,1.988622e+08,22000000.0,4.474400e+07,,,,,,1.154000e+09,497155532.0,7.913000e+09,6.822000e+09,4.463000e+09,,2015-12-31,1.068000e+09,,22000000.0,0.0,2.178038e+10,Healthcare,Drug Manufacturers—Specialty & Generic,0.220726,1.159924,18.277778,1.991364e-06,4.178839,-0.021296,0.050091,0.017268,-0.005674,0.949324,-0.883598,7.542164,0.592000,-1.177234,-0.057627,-0.123673,-0.889080,0.288340
ZTS,4,2016,50.500000,54.15,46.86,,,1,,,1.277000e+09,,,,,2.263035e+08,154000000.0,9.839281e+07,,,,,,7.270000e+08,491964064.0,7.649000e+09,6.150000e+09,4.468000e+09,,2016-12-31,1.487000e+09,,154000000.0,0.0,2.484419e+10,Healthcare,Drug Manufacturers—Specialty & Generic,0.144356,1.243740,14.211111,3.279221e-07,3.004707,0.037396,0.060336,0.120595,0.015279,0.116743,-0.355649,0.575661,0.066143,-0.752000,-0.013875,-0.072561,-0.373813,0.238539
ZTS,4,2017,68.310000,73.58,63.03,,,1,,,1.460000e+09,,,,,5.628943e+08,81000000.0,1.116084e+08,,,,,,1.564000e+09,485253713.0,8.586000e+09,6.800000e+09,4.953000e+09,,2017-12-31,1.770000e+09,,81000000.0,0.0,3.314768e+10,Healthcare,Drug Manufacturers—Specialty & Generic,0.154443,1.262647,10.155556,8.433333e-07,2.798305,-0.017293,0.053880,0.055479,0.090169,-0.210500,-0.728188,3.010746,0.123956,-1.115421,-0.011824,-0.185168,-0.749225,0.443946
ZTS,4,2018,87.730000,96.57,78.90,,,1,,,1.564000e+09,,,,,4.931351e+08,345000000.0,1.388439e+08,,,,,,1.602000e+09,478771915.0,1.077700e+10,8.592000e+09,6.443000e+09,,2018-12-31,2.185000e+09,,345000000.0,0.0,4.200266e+10,Healthcare,Drug Manufacturers—Specialty & Generic,0.201413,1.254306,6.100000,2.542899e-07,2.948741,0.094351,0.052021,0.220588,-0.008252,0.245723,-0.005764,-0.002503,-0.030363,-0.424108,0.000303,0.043866,-0.059163,0.036768


In [318]:
def compare_to_market_index(row: pd.Series, market_indices=None):
    market_indices = MARKET_INDICES if not market_indices else market_indices
    
    new_cols = []
    for col in VS_MARKET_INDICES_COLUMNS:
        for mkt_idx in market_indices:
            try:
                mkt_idx_row = market_index_df.loc[mkt_idx, row.name[QUARTER], row.name[YEAR]]
            except:
#                 print(f'Unable to find {mkt_idx} Q{row.name[QUARTER]} {row.name[YEAR]}')
                mkt_idx_row = pd.DataFrame()
            
            if not mkt_idx_row.empty:
                if mkt_idx_row[col] == 0:
                    new_cols.append(0)
                else:
                    # converting to float to drop index terms
                    new_cols.append(float(row[col] / mkt_idx_row[col]))
            else:
                new_cols.append(None)

    return pd.Series(new_cols)

vs_market_indices_col_names = [f'{col}{VS_MKT_IDX}{mkt_idx}' 
                               for col in VS_MARKET_INDICES_COLUMNS for mkt_idx in MARKET_INDICES]
quarterly_df[vs_market_indices_col_names] = quarterly_df.apply(compare_to_market_index, axis=1)
quarterly_df.dropna(subset=vs_market_indices_col_names, inplace=True)

In [319]:
quarterly_df.shape

(59295, 55)

In [320]:
def get_avg_recommendation_score(row: pd.Series):
    if row[QuarterlyColumns.AVG_RECOMMENDATIONS] is None or str(row[QuarterlyColumns.AVG_RECOMMENDATIONS]) == 'nan':
        return pd.Series([None])
    
    avg_recommendation = np.mean([float(v) for v in json.loads(row[QuarterlyColumns.AVG_RECOMMENDATIONS]).values()])
    return pd.Series([avg_recommendation])


quarterly_df[QuarterlyColumns.AVG_RECOMMENDATION_SCORE] = quarterly_df.apply(get_avg_recommendation_score, axis=1)

In [387]:
recommendations = [json.loads(contents) if contents else {} 
                   for contents in quarterly_df[QuarterlyColumns.AVG_RECOMMENDATIONS]]
recommendations = [
    {f'{AVG_REC_SCORE_PREFIX}{firm}': value if value is not None else 0 for firm, value in recommendation.items()} 
    for recommendation in recommendations]
recommendations_df = pd.DataFrame(recommendations).fillna(0)


quarterly_df = pd.merge(quarterly_df, recommendations_df, left_index=True, right_index=True)

In [321]:
feature_df = quarterly_df[FEATURE_COLS]
feature_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Sector,AgeOfData,Volatility,AvgRecommendationScore,AvgPriceToEarningsRatio,DebtToEquityRatio,ReturnOnEquity,AssetsToLiabilitiesRatio,PriceToBookRatio,OperatingIncome,ProfitMargin,Delta_PriceAvg,Delta_Cash,Delta_Earnings,Delta_AvgPriceToEarningsRatio,Delta_DebtToEquityRatio,Delta_ReturnOnEquity,Delta_AssetsToLiabilitiesRatio,Delta_PriceToBookRatio,Delta_ProfitMargin,Delta_OperatingIncome,Volatility_vs_^DJI
TickerSymbol,Quarter,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
A,1,2001,Healthcare,74.744444,0.406069,,-8.787302e-08,0.214833,-0.058846,2.704777,0.416912,-2.133756e+08,-0.220898,-0.456295,1.196787,-2.032787,-1.526444,0.000000,-2.015809,0.014492,0.839766,-3.442186,-4.117022,3.415535
A,1,2002,Healthcare,70.688889,0.424478,,-4.539295e-08,0.264429,-0.084847,2.271266,0.550973,-1.366604e+08,-0.261331,-0.339250,0.499145,-3.580420,-1.256063,0.000000,-4.357690,-0.338187,0.144460,-3.498182,-0.750237,4.201000
A,1,2003,Healthcare,66.633333,0.433814,,4.490141e-07,0.374715,0.023135,1.925792,0.200242,9.615090e+06,0.043214,1.170184,-0.090022,-1.300847,-8.213569,0.507657,-1.453575,-0.160474,-0.703095,-1.317877,-1.042016,2.682984
A,1,2004,Healthcare,62.566667,0.188926,,2.297087e-07,0.305202,0.027335,2.114134,0.324242,9.823274e+07,0.062123,0.011543,0.545115,6.923077,-0.872329,-0.250531,4.938102,0.166013,0.278729,7.004315,-0.426926,3.125036
A,1,2005,Healthcare,58.511111,0.123529,,1.207386e-08,0.358680,0.673362,2.087929,0.283920,-1.776205e+08,2.107784,0.517180,0.182289,37.054054,-0.960131,0.113156,31.476069,0.031832,-0.132082,50.897071,-1.467747,2.309428
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZTS,4,2015,Healthcare,18.277778,0.220726,,1.991364e-06,4.178839,-0.021296,1.159924,0.050091,1.988622e+08,0.017268,-0.005674,0.949324,-0.883598,7.542164,0.592000,-1.177234,-0.057627,-0.123673,-0.889080,0.288340,1.969986
ZTS,4,2016,Healthcare,14.211111,0.144356,,3.279221e-07,3.004707,0.037396,1.243740,0.060336,2.263035e+08,0.120595,0.015279,0.116743,-0.355649,0.575661,0.066143,-0.752000,-0.013875,-0.072561,-0.373813,0.238539,2.098382
ZTS,4,2017,Healthcare,10.155556,0.154443,,8.433333e-07,2.798305,-0.017293,1.262647,0.053880,5.628943e+08,0.055479,0.090169,-0.210500,-0.728188,3.010746,0.123956,-1.115421,-0.011824,-0.185168,-0.749225,0.443946,1.695123
ZTS,4,2018,Healthcare,6.100000,0.201413,,2.542899e-07,2.948741,0.094351,1.254306,0.052021,4.931351e+08,0.220588,-0.008252,0.245723,-0.005764,-0.002503,-0.030363,-0.424108,0.000303,0.043866,-0.059163,0.036768,2.036722


In [175]:
# for i,p in enumerate(quarterly_df['Delta_PriceAvg']):
#     try:
#         float(p)
#     except:
#         print(i, p)
#         raise
float(quarterly_df['Delta_PriceAvg'][1316])

0.012148823082763868

In [177]:
quarterly_df['Delta_PriceAvg'][1316]

TickerSymbol  Quarter  Year
ADCT          2        2008    0.012149
Name: PriceAvg, dtype: float64

In [246]:
quarterly_df.loc['A',4,2000]

PriceAvg                         50.91
PriceHigh                           63
PriceLow                         38.81
PriceEoQ                           NaN
AvgRecommendations                 NaN
                              ...     
AvgPriceToEarningsRatio    1.66918e-07
DebtToEquityRatio                    0
ReturnOnEquity               0.0579297
PriceToBookRatio              0.226611
ProfitMargin                       NaN
Name: (A, 4, 2000), Length: 43, dtype: object

In [279]:
delta_col_names

['Delta_PriceAvg',
 'Delta_Cash',
 'Delta_Earnings',
 'Delta_AvgPriceToEarningsRatio',
 'Delta_DebtToEquityRatio',
 'Delta_ReturnOnEquity',
 'Delta_AssetsToLiabilitiesRatio',
 'Delta_PriceToBookRatio',
 'Delta_ProfitMargin',
 'Delta_OperatingIncome']

In [299]:
for col in delta_col_names + [QuarterlyColumns.PROFIT_MARGIN, 
                              QuarterlyColumns.REVENUE, 
                              QuarterlyColumns.NET_INCOME, 
                              QuarterlyColumns.DIVIDENDS,
                              QuarterlyColumns.EARNINGS
                ]:
    print(f'{col} {len([i for i in quarterly_df[col] if str(i) == "nan"])}')

Delta_PriceAvg 0
Delta_Cash 65
Delta_Earnings 57663
Delta_AvgPriceToEarningsRatio 57663
Delta_DebtToEquityRatio 124
Delta_ReturnOnEquity 58024
Delta_AssetsToLiabilitiesRatio 3
Delta_PriceToBookRatio 658
Delta_ProfitMargin 188
Delta_OperatingIncome 0
ProfitMargin 142
TotalRevenue 142
NetIncome 0
DividendsPaid 463
Earnings 57159
