In [30]:
import numpy as np
import pandas as pd
import sqlite3
import json
from datetime import datetime

pd.set_option('display.max_columns', None)

In [31]:
DB_PATH = '/Users/ezras/projects/personal/finance_ml/data/quarterly_financial_data.db'
INFO_CSV_PATH = '/Users/ezras/projects/personal/finance_ml/data/stock_general_info.csv'
TABLE_NAME = 'yahoo_financial_data'
STOCKPUP_TABLE_NAME = 'stockpup_data'


DELTA_PREFIX = 'Delta_'
VS_MKT_IDX = '_vs_'
AVG_REC_SCORE_PREFIX = 'AvgRecScore_'

MISSING_SECTOR = 'MissingSector'
MISSING_INDUSTRY = 'MissingIndustry'

MONTH_TO_QUARTER = {
    1: 4,
    2: 1,
    3: 1,
    4: 1,
    5: 2,
    6: 2,
    7: 2,
    8: 3,
    9: 3,
    10: 3,
    11: 4,
    12: 4
}

class StockPupColumns:
    """
    Our dataset comes from over 20 years of 10-Q and 10-K filings made by public companies
     with the U.S. Securities and Exchange Commission. We extract data from both text and
     XBRL filings, fix reporting mistakes, and normalize the data into quarterly time series
     of final restated values.
    """
    # Date Quarter Ends
    QUARTER_END = 'QuarterEnd'
    # The total number of common shares outstanding at the end of a given quarter, including all
    # classes of common stock.
    SHARES = 'Shares'
    # The number of shares the company had at the end of a given quarter, adjusted for splits to
    # be comparable to today's shares.
    SHARES_SPLIT_ADJUSTED = 'SharesSplitAdjusted'
    # If an investor started with 1 share of stock at the end of a given quarter, the split factor
    # for that quarter indicates how many shares the investor would own today as a result of
    # subsequent stock splits.
    SPLIT_FACTOR = 'SplitFactor'
    # Total assets at the end of a quarter.
    ASSETS = 'Assets'
    # Current assets at the end of a quarter.
    CURRENT_ASSETS = 'CurrentAssets'
    # Total liabilities at the end of a quarter.
    LIABILITIES = 'Liabilities'
    # Current liabilities at the end of a quarter.
    CURRENT_LIABILITIES = 'CurrentLiabilities'
    # Total shareholders' equity at the end of a quarter, including both common and preferred
    # stockholders.
    SHAREHOLDER_EQUITY = 'ShareholdersEquity'
    # Non-controlling or minority interest, if any, excluded from Shareholders equity.
    NON_CONTROLLING_INTEREST = 'NonControllingInterest'
    # Preferred equity, if any, included in Shareholders equity.
    PREFERRED_EQUITY = 'PreferredEquity'
    # Total Goodwill and all other Intangible assets, if any.
    GOODWILL_AND_INTANGIBLES = 'GoodwillIntangibles'
    # All long-term debt including capital lease obligations.
    LONG_TERM_DEBT = 'LongTermDebt'
    # Total revenue for a given quarter.
    REVENUE = 'Revenue'
    # Earnings or Net Income for a given quarter.
    EARNINGS = 'Earnings'
    # Earnings available for common stockholders - Net income minus earnings that must be
    # distributed to preferred shareholders. May be omitted when not reported by the company.
    EARNINGS_AVAILABLE_FOR_COMMON_STOCKHOLDERS = 'EarningsAvailableForCommonStockholders'
    # Basic earnings per share for a given quarter.
    EPS_BASIC = 'EPS_basic'
    # Diluted earnings per share.
    EPS_DILUTED = 'EPS_diluted'
    # Common stock dividends paid during a quarter per share, including all regular and special
    # dividends and distributions to common shareholders.
    DIVIDEND_PER_SHARE = 'DividendPerShare'
    # Cash produced by operating activities during a given quarter, including Continuing and
    # Discontinued operations.
    CASH_FROM_OPERATING_ACTIVITES = 'CashFromOperatingActivities'
    # Cash produced by investing activities during a given quarter, including Continuing and
    # Discontinued operations.
    CASH_FROM_INVESTING_ACTIVITIES = 'CashFromInvestingActivities'
    # Cash produced by financing activities during a given quarter, including Continuing and
    # Discontinued operations.
    CASH_FROM_FINANCING_ACTIVITES = 'CashFromFinancingActivities'
    # Change in cash and cash equivalents during a given quarter, including Effect of Exchange
    # Rate Movements and Other Cash Change Adjustments, if any.
    CASH_CHANGE_DURING_PERIOD = 'CashChangeDuringPeriod'
    # Cash and cash equivalents at the end of a quarter, including Continuing and
    # Discontinued operations.
    CASH_AT_END_OF_PERIOD = 'CashAtEndOfPeriod'
    # Capital Expenditures are the cash outflows for long-term productive assets, net of cash
    # from disposals of capital assets.
    CAPITAL_EXPENDITURES = 'CapitalExpenditures'
    # The medium price per share of the company common stock during a given quarter. The prices
    # are as reported, and are not adjusted for subsequent dividends.
    PRICE = 'Price'  # Average price during quarter
    # The highest price per share of the company common stock during a given quarter.
    PRICE_HIGH = 'PriceHigh'
    # The lowest price of the company common stock during a quarter.
    PRICE_LOW = 'PriceLow'
    # Return on equity is the ratio of Earnings (available to common stockholders)
    # TTM (over the Trailing Twelve Months) to TTM average common shareholders' equity.
    ROE = 'ROE'
    # Return on assets is the ratio of total Earnings TTM to TTM average Assets.
    ROA = 'ROA'
    # Common stockholders' equity per share, also known as BVPS.
    BOOK_VALUE_OF_EQUITY_PER_SHARE = 'BookValueOfEquityPerShare'
    # The ratio of Price to Book value of equity per share as of the previous quarter.
    P_B_RATIO = 'P_B_ratio'
    # The ratio of Price to EPS diluted TTM as of the previous quarter.
    P_E_RATIO = 'P_E_ratio'
    # The aggregate amount of dividends paid per split-adjusted share of common stock from the
    # first available reporting quarter until a given quarter.
    CUM_DIVIDENDS_PER_SHARE = 'CumulativeDividendsPerShare'
    # The ratio of Dividends TTM to Earnings (available to common stockholders) TTM.
    DIVIDEND_PAYOUT_RATIO = 'DividendPayoutRatio'
    # The ratio of Long-term debt to common shareholders' equity (Shareholders equity minus
    # Preferred equity).
    LONG_TERM_DEBT_TO_EQUITY_RATIO = 'LongTermDebtToEquityRatio'
    # The ratio of common shareholders' equity (Shareholders equity minus Preferred equity) to
    # Assets.
    EQUITY_TO_ASSETS_RATIO = 'EquityToAssetsRatio'
    # The ratio of Earnings (available for common stockholders) TTM to Revenue TTM.
    NET_MARGIN = 'NetMargin'
    # The ratio of Revenue TTM to TTM average Assets.
    ASSET_TURNOVER = 'AssetTurnover'
    # Cash from operating activities minus the Capital Expenditures for a quarter.
    FREE_CASH_FLOW_PER_SHARE = 'FreeCashFlowPerShare'
    # The ratio of Current assets to Current liabilities.
    CURRENT_RATIO = 'CurrentRatio'

class QuarterlyColumns:
    TICKER_SYMBOL = 'TickerSymbol'
    QUARTER = 'Quarter'
    YEAR = 'Year'
    PRICE_AVG = 'PriceAvg'
    PRICE_HI = 'PriceHigh'
    PRICE_LO = 'PriceLow'
    PRICE_AT_END_OF_QUARTER = 'PriceEoQ'
    AVG_RECOMMENDATIONS = 'AvgRecommendations'
    AVG_RECOMMENDATION_SCORE = 'AvgRecommendationScore'
    SPLIT = 'Split'
    EBIT = 'Ebit'
    PROFIT = 'GrossProfit'
    REVENUE = 'TotalRevenue'
    RND = 'ResearchDevelopment'
    OPERATING_EXPENSES = 'TotalOperatingExpenses'
    INCOME_PRETAX = 'IncomeBeforeTax'
    INCOME_TAX = 'IncomeTaxExpense'
    OPERATING_INCOME = 'OperatingIncome'
    NET_INCOME = 'NetIncome'
    DIVIDENDS = 'DividendsPaid'
    STOCK_REPURCHASED = 'RepurchaseOfStock'
    STOCK_ISSUED = 'IssuanceOfStock'
    DEPRECIATION = 'Depreciation'
    NET_BORROWINGS = 'NetBorrowings'
    INVESTMENTS = 'Investments'
    CASH = 'Cash'
    COMMON_STOCK = 'CommonStock'
    ASSETS = 'TotalAssets'
    LIABILITIES = 'TotalLiab'
    DEBT_LONG = 'LongTermDebt'
    DEBT_SHORT = 'ShortLongTermDebt'
    DATE = 'Date'
    VOLUME = 'Volume'
    EARNINGS = 'Earnings'
    STOCKHOLDER_EQUITY = 'TotalStockholderEquity'
    VOLATILITY = 'Volatility'
    SECTOR = 'Sector'
    INDUSTRY = 'Industry'
    MARKET_CAP = 'MarketCap'
    AGE_OF_DATA = 'AgeOfData'
    WORKING_CAPITAL_RATIO = 'AssetsToLiabilitiesRatio'
    AVG_PE_RATIO = 'AvgPriceToEarningsRatio'
    DEBT_EQUITY_RATIO = 'DebtToEquityRatio'
    ROE = 'ReturnOnEquity'
    PRICE_BOOK_RATIO = 'PriceToBookRatio'
    FCF = 'FreeCashFlow'
    PROFIT_MARGIN = 'ProfitMargin'

PRICE_ONLY_DELTA_COLUMNS = [
    QuarterlyColumns.PRICE_AVG,
    QuarterlyColumns.VOLATILITY
]

DELTA_COLUMNS = [
    QuarterlyColumns.PRICE_AVG,
    QuarterlyColumns.EBIT,
    QuarterlyColumns.CASH,
    QuarterlyColumns.EARNINGS,
    QuarterlyColumns.AVG_PE_RATIO,
    QuarterlyColumns.DEBT_EQUITY_RATIO,
    QuarterlyColumns.ROE,
    QuarterlyColumns.WORKING_CAPITAL_RATIO,
    QuarterlyColumns.PRICE_BOOK_RATIO,
    QuarterlyColumns.FCF,
    QuarterlyColumns.PROFIT_MARGIN,
    QuarterlyColumns.OPERATING_INCOME
]

CATEGORICAL_COLUMNS = [
    QuarterlyColumns.QUARTER,
    QuarterlyColumns.SECTOR,
    QuarterlyColumns.INDUSTRY
]

VS_MARKET_INDICES_COLUMNS = [
    f'{DELTA_PREFIX}{QuarterlyColumns.PRICE_AVG}',
    QuarterlyColumns.VOLATILITY,
]

MARKET_INDICES = ['^DJI', 'VTSAX', '^IXIC', '^GSPC', '^RUT', '^NYA']


FORMULAE = {
    QuarterlyColumns.VOLATILITY: lambda row: (
        row[QuarterlyColumns.PRICE_HI] - row[QuarterlyColumns.PRICE_LO]) / row[QuarterlyColumns.PRICE_AVG],
    QuarterlyColumns.WORKING_CAPITAL_RATIO: lambda row: (
        row[QuarterlyColumns.ASSETS] / row[QuarterlyColumns.LIABILITIES]),
    QuarterlyColumns.AGE_OF_DATA: lambda row: (
        datetime.now().date() - datetime.strptime(row[QuarterlyColumns.DATE], '%Y-%m-%d').date()).days/90,
    QuarterlyColumns.AVG_PE_RATIO: lambda row: (
        row[QuarterlyColumns.PRICE_AVG] / row[QuarterlyColumns.EARNINGS]),
    QuarterlyColumns.DEBT_EQUITY_RATIO: lambda row: (
        row[QuarterlyColumns.DEBT_LONG] + row[QuarterlyColumns.DEBT_SHORT]) / row[QuarterlyColumns.STOCKHOLDER_EQUITY],
    QuarterlyColumns.ROE: lambda row: (
        row[QuarterlyColumns.EARNINGS] - row[QuarterlyColumns.DIVIDENDS]) / row[QuarterlyColumns.STOCKHOLDER_EQUITY],
    QuarterlyColumns.PRICE_BOOK_RATIO: lambda row: (
        row[QuarterlyColumns.ASSETS] - row[QuarterlyColumns.LIABILITIES]) / row[QuarterlyColumns.MARKET_CAP],
    QuarterlyColumns.PROFIT_MARGIN: lambda row: (
        row[QuarterlyColumns.NET_INCOME] / row[QuarterlyColumns.REVENUE])
}

TARGET_COL = f'{DELTA_PREFIX}{QuarterlyColumns.PRICE_AVG}{VS_MKT_IDX}^DJI'

FEATURE_COLS = [
    QuarterlyColumns.QUARTER,
    QuarterlyColumns.SECTOR,
    QuarterlyColumns.AGE_OF_DATA,
    QuarterlyColumns.EBIT,
    QuarterlyColumns.VOLATILITY,
    QuarterlyColumns.AVG_RECOMMENDATION_SCORE,
    QuarterlyColumns.AVG_PE_RATIO,
    QuarterlyColumns.DEBT_EQUITY_RATIO,
    QuarterlyColumns.ROE,
    QuarterlyColumns.WORKING_CAPITAL_RATIO,
    QuarterlyColumns.PRICE_BOOK_RATIO,
    QuarterlyColumns.FCF,
    QuarterlyColumns.PROFIT_MARGIN
] + [
    f'{DELTA_PREFIX}{col}' for col in DELTA_COLUMNS
] + [
    f'{QuarterlyColumns.VOLATILITY}{VS_MKT_IDX}{mkt_idx}' for mkt_idx in MARKET_INDICES
]

In [32]:
db_conn = sqlite3.connect(DB_PATH)

quarterly_df = pd.read_sql_query(f'SELECT * FROM {TABLE_NAME}', db_conn)
db_conn.close()

market_index_df = quarterly_df[quarterly_df[QuarterlyColumns.TICKER_SYMBOL].isin(MARKET_INDICES)]
quarterly_df = quarterly_df[((quarterly_df[QuarterlyColumns.DATE].notna()) & 
                             (quarterly_df[QuarterlyColumns.REVENUE].notna()) &
                             (quarterly_df[QuarterlyColumns.REVENUE] != 0) &
                             (quarterly_df[QuarterlyColumns.PRICE_AVG].notna()) &
                             (quarterly_df[QuarterlyColumns.EARNINGS].notna()) &
                             (quarterly_df[QuarterlyColumns.EARNINGS] != 0) &
                             (~quarterly_df[QuarterlyColumns.TICKER_SYMBOL].isin(MARKET_INDICES)))]

In [40]:
db_conn = sqlite3.connect(DB_PATH)

stockpup_df = pd.read_sql_query(f'SELECT * FROM {STOCKPUP_TABLE_NAME}', db_conn)
db_conn.close()

stockpup_df = stockpup_df[((stockpup_df[StockPupColumns.SHARES].notna()) &
                           (stockpup_df[StockPupColumns.FREE_CASH_FLOW_PER_SHARE].notna()))]


def process_stockpup_df(df):
    df = df[((df[StockPupColumns.SHARES].notna()) &
             (df[StockPupColumns.FREE_CASH_FLOW_PER_SHARE].notna()))]
    
    df[StockPupColumns.QUARTER_END] = pd.to_datetime(df[StockPupColumns.QUARTER_END])

    df[QuarterlyColumns.QUARTER] = df[StockPupColumns.QUARTER_END].apply(
        lambda r: MONTH_TO_QUARTER[r.month])
    df[QuarterlyColumns.YEAR] = df[StockPupColumns.QUARTER_END].apply(lambda r: r.year)
    df[QuarterlyColumns.DIVIDENDS] = df[StockPupColumns.DIVIDEND_PER_SHARE] * df[
        StockPupColumns.SHARES]
    df[QuarterlyColumns.DATE] = df[StockPupColumns.QUARTER_END].apply(lambda r: str(r.date()))
    df[QuarterlyColumns.OPERATING_INCOME] = df[StockPupColumns.FREE_CASH_FLOW_PER_SHARE] * df[
        StockPupColumns.SHARES]

    df.rename(columns={
        StockPupColumns.ASSETS: QuarterlyColumns.ASSETS,
        StockPupColumns.REVENUE: QuarterlyColumns.REVENUE,
        StockPupColumns.EARNINGS: QuarterlyColumns.EARNINGS,
        StockPupColumns.LIABILITIES: QuarterlyColumns.LIABILITIES,
        StockPupColumns.LONG_TERM_DEBT: QuarterlyColumns.DEBT_LONG,
        StockPupColumns.SHAREHOLDER_EQUITY: QuarterlyColumns.STOCKHOLDER_EQUITY,
        StockPupColumns.CASH_AT_END_OF_PERIOD: QuarterlyColumns.CASH,
        StockPupColumns.PRICE: QuarterlyColumns.PRICE_AVG,
        StockPupColumns.PRICE_LOW: QuarterlyColumns.PRICE_LO,
        StockPupColumns.PRICE_HIGH: QuarterlyColumns.PRICE_HI,
        StockPupColumns.SPLIT_FACTOR: QuarterlyColumns.SPLIT,
        StockPupColumns.SHARES_SPLIT_ADJUSTED: QuarterlyColumns.COMMON_STOCK
    }, inplace=True)

    # Filter only to columns in QuarterlyColumns
    df = df[[col for col in df.columns if col in [
        getattr(QuarterlyColumns, qc) for qc in dir(QuarterlyColumns) if qc[0] != '_']]]
    
    return df

stockpup_df = process_stockpup_df(stockpup_df)
stockpup_df


Unnamed: 0,TickerSymbol,CommonStock,Split,TotalAssets,TotalLiab,TotalStockholderEquity,LongTermDebt,TotalRevenue,Earnings,Cash,PriceAvg,PriceHigh,PriceLow,Quarter,Year,DividendsPaid,Date,OperatingIncome
0,WM,1705359302,1.0,3.097310e+11,2.836450e+11,2.217200e+10,0,2.857000e+09,-3.328000e+09,7.235000e+09,,,,2,2008,1.705359e+07,2008-06-30,3.052593e+09
1,WM,1058827858,1.0,3.196680e+11,2.972190e+11,1.853500e+10,0,3.744000e+09,-1.138000e+09,1.008900e+10,,,,1,2008,1.588242e+08,2008-03-31,3.282366e+09
2,WM,882557330,1.0,3.279130e+11,3.033290e+11,2.066500e+10,0,3.412000e+09,-1.867000e+09,9.560000e+09,24.57,36.07,13.07,4,2007,4.942321e+08,2007-12-31,4.995274e+09
3,WM,868722736,1.0,3.301100e+11,3.061690e+11,2.099600e+10,0,3.393000e+09,1.860000e+08,1.137000e+10,38.13,43.68,32.57,3,2007,4.864847e+08,2007-09-30,-1.250961e+09
4,WM,870584006,1.0,3.122190e+11,2.880090e+11,2.126500e+10,0,3.792000e+09,8.300000e+08,4.167000e+09,41.64,44.41,38.87,2,2007,4.788212e+08,2007-06-30,3.377866e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61645,WAG,984564288,8.0,3.103458e+09,1.500512e+09,1.602946e+09,0,2.405556e+09,5.399400e+07,1.616100e+07,4.96,5.30,4.63,4,1994,2.399875e+07,1994-11-30,-8.614938e+06
61646,WAG,984564288,8.0,2.908749e+09,1.335109e+09,1.573640e+09,0,2.282526e+09,6.908300e+07,7.791500e+07,4.67,5.08,4.27,3,1994,2.092199e+07,1994-08-31,-8.614938e+06
61647,WAG,984564288,8.0,2.788329e+09,1.261590e+09,1.526739e+09,0,2.335961e+09,7.101800e+07,1.798600e+08,5.16,5.34,4.98,2,1994,2.092199e+07,1994-05-31,2.092199e+07
61648,WAG,984564288,8.0,2.682314e+09,1.204690e+09,1.477624e+09,0,2.498537e+09,9.761500e+07,4.848100e+07,4.99,5.28,4.70,1,1994,2.092199e+07,1994-02-28,7.384232e+06


In [None]:
stock_info_df = pd.read_csv(INFO_CSV_PATH)[['tickerSymbol', 'sector', 'industry']]
stock_info_df.rename(columns={
    'tickerSymbol': QuarterlyColumns.TICKER_SYMBOL,
    'sector': QuarterlyColumns.SECTOR,
    'industry': QuarterlyColumns.INDUSTRY
}, inplace=True)

quarterly_df = pd.merge(quarterly_df, stock_info_df, how='inner', on=QuarterlyColumns.TICKER_SYMBOL)

quarterly_df[QuarterlyColumns.SECTOR].fillna(MISSING_SECTOR, inplace=True)
quarterly_df[QuarterlyColumns.INDUSTRY].fillna(MISSING_INDUSTRY, inplace=True)
quarterly_df[QuarterlyColumns.DEBT_SHORT].fillna(0, inplace=True)

ticker_symbols = quarterly_df.TickerSymbol.unique()

In [381]:
for col_name, fn in FORMULAE.items():
        quarterly_df[col_name] = quarterly_df.apply(fn, axis=1)

market_index_df[QuarterlyColumns.VOLATILITY] = market_index_df.apply(FORMULAE[QuarterlyColumns.VOLATILITY], axis=1)

In [382]:
def get_prev_quarter(quarter, year):
    if quarter == 1:
        return 4, year - 1

    return quarter - 1, year

def get_row(df, ticker_symbol, quarter, year):
    return df[(df[QuarterlyColumns.TICKER_SYMBOL] == ticker_symbol) & 
              (df[QuarterlyColumns.QUARTER] == quarter) & 
              (df[QuarterlyColumns.YEAR] == year)]

def add_delta_columns(row: pd.Series, df: pd.DataFrame, columns: list):    
    prev_quarter_row = get_row(df, row.TickerSymbol, *get_prev_quarter(row.Quarter, row.Year))
    
    if not prev_quarter_row.empty:
        new_cols = []

        for col in columns:
            new_cols.append(float((row[col] - prev_quarter_row[col]) / prev_quarter_row[col]))

        return pd.Series(new_cols)

    return pd.Series([None]*len(columns))

def add_delta_columns_quarterly_df(row: pd.Series):
    return add_delta_columns(row, quarterly_df, DELTA_COLUMNS)

def add_delta_columns_market_index_df(row: pd.Series):
    return add_delta_columns(row, market_index_df, PRICE_ONLY_DELTA_COLUMNS)


delta_col_names = [f'{DELTA_PREFIX}{col}' for col in DELTA_COLUMNS]
quarterly_df[delta_col_names] = quarterly_df.apply(add_delta_columns_quarterly_df, axis=1)

delta_col_mkt_index_names = [f'{DELTA_PREFIX}{col}' for col in PRICE_ONLY_DELTA_COLUMNS]
market_index_df[delta_col_mkt_index_names] = market_index_df.apply(add_delta_columns_market_index_df, axis=1)

In [383]:
def compare_to_market_index(row: pd.Series, market_indices=None):
    market_indices = MARKET_INDICES if not market_indices else market_indices
    
    
    new_cols = []
    for col in VS_MARKET_INDICES_COLUMNS:
        for mkt_idx in market_indices:
            mkt_idx_row = get_row(market_index_df, mkt_idx, row.Quarter, row.Year)
            
            if not mkt_idx_row.empty:
                new_cols.append(float(row[f'{col}'] / mkt_idx_row[f'{col}']))
            else:
                new_cols.append(None)

    return pd.Series(new_cols)

vs_market_indices_col_names = [f'{col}{VS_MKT_IDX}{mkt_idx}' 
                               for col in VS_MARKET_INDICES_COLUMNS for mkt_idx in MARKET_INDICES]
quarterly_df[vs_market_indices_col_names] = quarterly_df.apply(compare_to_market_index, axis=1)


In [385]:
quarterly_df.shape

(2195, 70)

In [386]:
def get_avg_recommendation_score(row: pd.Series):
    if row[QuarterlyColumns.AVG_RECOMMENDATIONS] is None:
        return pd.Series([None])
    
    avg_recommendation = np.mean([float(v) for v in json.loads(row[QuarterlyColumns.AVG_RECOMMENDATIONS]).values()])
    return pd.Series([avg_recommendation])


quarterly_df[QuarterlyColumns.AVG_RECOMMENDATION_SCORE] = quarterly_df.apply(get_avg_recommendation_score, axis=1)

In [387]:
recommendations = [json.loads(contents) if contents else {} 
                   for contents in quarterly_df[QuarterlyColumns.AVG_RECOMMENDATIONS]]
recommendations = [
    {f'{AVG_REC_SCORE_PREFIX}{firm}': value if value is not None else 0 for firm, value in recommendation.items()} 
    for recommendation in recommendations]
recommendations_df = pd.DataFrame(recommendations).fillna(0)


quarterly_df = pd.merge(quarterly_df, recommendations_df, left_index=True, right_index=True)

In [396]:
feature_df = quarterly_df[FEATURE_COLS]
feature_df

Unnamed: 0,Quarter,Sector,MarketCap,AgeOfData,Ebit,Volatility,AvgRecommendationScore,AvgPriceToEarningsRatio,DebtToEquityRatio,ReturnOnEquity,AssetsToLiabilitiesRatio,PriceToBookRatio,FreeCashFlow,ProfitMargin,Delta_PriceAvg,Delta_Ebit,Delta_Cash,Delta_Earnings,Delta_AvgPriceToEarningsRatio,Delta_DebtToEquityRatio,Delta_ReturnOnEquity,Delta_AssetsToLiabilitiesRatio,Delta_PriceToBookRatio,Delta_FreeCashFlow,Delta_ProfitMargin,Volatility_vs_^DJI,Volatility_vs_VTSAX,Volatility_vs_^IXIC,Volatility_vs_^GSPC,Volatility_vs_^RUT,Volatility_vs_^NYA
0,1,Healthcare,2.745336e+10,0.655556,2.270000e+08,0.310545,0.416667,4.014219e-07,0.375000,,2.017282,0.173676,-7.840000e+08,0.081583,-0.068790,-0.013043,0.079935,-0.015464,-0.054163,0.017348,,-0.012061,-0.016502,-0.125975,-0.438028,0.761474,0.806366,0.928053,0.834559,0.663034,0.742060
1,4,Healthcare,2.745336e+10,1.655556,2.300000e+08,0.178538,0.416667,4.244093e-07,0.368606,0.051568,2.041908,0.176590,-8.970000e+08,0.145173,0.131945,-0.265176,-0.112880,-0.015228,0.149450,-0.020079,-0.012729,0.016202,0.021061,0.210526,0.022947,1.928639,1.502414,1.213024,1.556878,1.375642,1.761685
2,3,Healthcare,2.745336e+10,2.677778,3.130000e+08,0.170337,0.416667,3.692283e-07,0.376158,0.052233,2.009354,0.172948,-7.410000e+08,0.141917,0.023922,0.252000,-0.216997,0.950495,-0.475045,0.379926,0.631235,-0.096548,0.000211,-0.042636,-0.053394,2.416611,2.689583,2.275114,2.719017,2.018225,2.527764
3,2,Healthcare,2.745336e+10,3.700000,2.500000e+08,0.173849,0.416667,7.033522e-07,0.272593,0.032020,2.224085,0.172911,-7.740000e+08,0.149922,,,,,,,,,,,,2.339802,2.398075,1.646554,2.387804,1.801222,2.723679
4,1,Industrials,6.385700e+09,0.988889,-1.216000e+09,0.868725,-0.029412,3.515934e-08,-7.979135,-0.267451,0.956939,-0.412797,-1.094700e+10,-0.263183,-0.178940,-2.308934,0.692857,0.557647,-0.472885,-0.954942,-0.932566,-0.041179,21.338983,0.157800,-8.191751,2.130161,2.255744,2.596152,2.334611,1.854784,2.075853
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2190,2,Financial Services,5.872814e+09,4.044444,0.000000e+00,0.144248,0.325000,3.183991e-06,0.175743,0.010664,1.121650,1.293928,-1.570000e+08,0.290749,,,,,,,,,,,,1.941411,1.989762,1.366200,1.981240,1.494533,2.259926
2191,1,Healthcare,6.509066e+10,0.988889,5.430000e+08,0.395742,0.416667,3.555365e-07,2.177624,0.169270,1.313732,0.042295,-4.480000e+08,0.275750,0.067219,-0.093489,0.008790,-0.143187,0.245568,-0.008906,-0.104721,0.005583,0.016617,-0.058824,0.202096,0.970382,1.027590,1.182661,1.063517,0.844935,0.945642
2192,4,Healthcare,6.509066e+10,2.000000,5.990000e+08,0.137140,0.416667,2.854412e-07,2.197194,0.189069,1.306439,0.041604,-4.760000e+08,0.229391,0.029209,0.056437,0.065565,0.127604,-0.087261,-0.087314,0.095948,-0.003945,0.011202,0.057778,-0.160843,1.481445,1.154049,0.931760,1.195884,1.056672,1.353202
2193,3,Healthcare,6.509066e+10,3.022222,5.670000e+08,0.131486,0.416667,3.127302e-07,2.407394,0.172517,1.311613,0.041143,-4.500000e+08,0.273359,0.153738,0.071834,0.034188,-0.092199,0.270915,-0.099562,-0.169784,0.023765,0.110742,-0.079755,0.139854,1.865424,2.076136,1.756200,2.098857,1.557903,1.951225


[datetime.datetime(2020, 4, 30, 0, 0),
 datetime.datetime(2020, 1, 31, 0, 0),
 datetime.datetime(2019, 10, 31, 0, 0),
 datetime.datetime(2019, 7, 31, 0, 0),
 datetime.datetime(2020, 3, 31, 0, 0),
 datetime.datetime(2019, 12, 31, 0, 0),
 datetime.datetime(2019, 9, 30, 0, 0),
 datetime.datetime(2019, 6, 30, 0, 0),
 datetime.datetime(2020, 3, 31, 0, 0),
 datetime.datetime(2019, 12, 31, 0, 0),
 datetime.datetime(2019, 9, 30, 0, 0),
 datetime.datetime(2019, 6, 30, 0, 0),
 datetime.datetime(2020, 4, 18, 0, 0),
 datetime.datetime(2019, 12, 28, 0, 0),
 datetime.datetime(2019, 10, 5, 0, 0),
 datetime.datetime(2019, 7, 13, 0, 0),
 datetime.datetime(2020, 3, 28, 0, 0),
 datetime.datetime(2019, 12, 28, 0, 0),
 datetime.datetime(2019, 9, 28, 0, 0),
 datetime.datetime(2019, 6, 29, 0, 0),
 datetime.datetime(2020, 3, 31, 0, 0),
 datetime.datetime(2019, 12, 31, 0, 0),
 datetime.datetime(2019, 9, 30, 0, 0),
 datetime.datetime(2019, 6, 30, 0, 0),
 datetime.datetime(2020, 3, 31, 0, 0),
 datetime.datetime(

In [36]:
stockpup_df

Unnamed: 0,TickerSymbol,LongTermDebt,Earnings,PriceHigh,PriceLow,Quarter,Year,DividendsPaid,Date,OperatingIncome
0,WM,0,-3.328000e+09,,,2,2008,1.705359e+07,2008-06-30,3.052593e+09
1,WM,0,-1.138000e+09,,,1,2008,1.588242e+08,2008-03-31,3.282366e+09
2,WM,0,-1.867000e+09,36.07,13.07,4,2007,4.942321e+08,2007-12-31,4.995274e+09
3,WM,0,1.860000e+08,43.68,32.57,3,2007,4.864847e+08,2007-09-30,-1.250961e+09
4,WM,0,8.300000e+08,44.41,38.87,2,2007,4.788212e+08,2007-06-30,3.377866e+09
...,...,...,...,...,...,...,...,...,...,...
61645,WAG,0,5.399400e+07,5.30,4.63,4,1994,2.399875e+07,1994-11-30,-8.614938e+06
61646,WAG,0,6.908300e+07,5.08,4.27,3,1994,2.092199e+07,1994-08-31,-8.614938e+06
61647,WAG,0,7.101800e+07,5.34,4.98,2,1994,2.092199e+07,1994-05-31,2.092199e+07
61648,WAG,0,9.761500e+07,5.28,4.70,1,1994,2.092199e+07,1994-02-28,7.384232e+06
