In [136]:
import calcbench as cb
import pandas as pd
import requests
pd.options.display.float_format = '${:,.2f}'.format # make floats look like dollar amounts.

In [None]:
def get_holdings(file_name: str):
    holdings = pd.read_csv(
        file_name, header=0
    )  
    holdings = holdings[:-1]
    holdings.Symbol = holdings.Symbol.replace({"GOOGL": "GOOG", "BRK.B": "BRK"})
    holdings = holdings.rename({"Symbol": "ticker"}, axis=1)
    holdings = holdings.set_index("ticker")
    holdings = holdings["% Holding"].str.rstrip("%").astype("float") * 0.01
    holdings = holdings[holdings.index.dropna()]
    holdings = (
        holdings.groupby("ticker").sum().sort_values(ascending=False)
    )  # remove duplicates from different share classes.
    return holdings

In [129]:
# From https://www.barchart.com/etfs-funds/quotes/QQQ/constituents
QQQ_holdings = get_holdings("./etf-constituents-06-02-2022 QQQ.csv")
SPY_holdings = get_holdings("./etf-constituents-06-02-2022 SPY.csv")

In [128]:
# Metrics ordered as they appear on an income statement.
income_statement_metrics = [
    "Revenue",
    "CostOfRevenue",
    "GrossProfit",
    "SGAExpense",
    "OperatingExpenses",
    "OperatingIncome",
    "EBIT",
    "InterestExpense",
    "IncomeTaxes",
    "NetIncome",
]

In [6]:
company_identifiers = SPY_holdings.index.union(QQQ_holdings.index).unique().unique()

In [127]:
d = cb.standardized(
    metrics=income_statement_metrics,
    company_identifiers=company_identifiers,
    period_type=cb.api_query_params.PeriodType.Annual,
    revisions=cb.api_query_params.Revisions.MostRecent,
)
d = d[d.index.get_level_values("fiscal_period") < "2023-0"] # There are a few extraneous values from the future.

In [112]:
def build_portfolio_income_statement(d: pd.DataFrame, holdings):
    by_fund_weight = (
        d[["value"]].unstack(["metric", "fiscal_period"]).mul(holdings, axis=0)
    )
    totals = by_fund_weight.sum()
    percent_of_revenue = totals / totals.loc[:, "Revenue"]
    horizontal = percent_of_revenue.unstack("fiscal_period").sort_index(
        axis=1, ascending=False
    )  # Sort columns by fiscal year descending.
    return horizontal.loc["value"].loc[income_statement_metrics]

In [130]:
x = build_portfolio_income_statement(d, SPY_holdings)

In [135]:
x

fiscal_period,2022-0,2021-0,2020-0,2019-0,2018-0,2017-0,2016-0,2015-0,2014-0,2013-0,2012-0,2011-0,2010-0,2009-0,2008-0,2007-0
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Revenue,$1.00,$1.00,$1.00,$1.00,$1.00,$1.00,$1.00,$1.00,$1.00,$1.00,$1.00,$1.00,$1.00,$1.00,$1.00,$1.00
CostOfRevenue,$0.52,$0.52,$0.54,$0.53,$0.54,$0.53,$0.50,$0.50,$0.54,$0.54,$0.52,$0.52,$0.50,$0.50,$0.54,$0.53
GrossProfit,$0.48,$0.48,$0.46,$0.47,$0.46,$0.46,$0.47,$0.47,$0.46,$0.46,$0.47,$0.48,$0.50,$0.50,$0.46,$0.47
SGAExpense,$0.13,$0.13,$0.14,$0.15,$0.14,$0.15,$0.15,$0.15,$0.16,$0.15,$0.15,$0.16,$0.17,$0.16,$0.16,$0.14
OperatingExpenses,$0.26,$0.26,$0.29,$0.27,$0.27,$0.27,$0.27,$0.27,$0.27,$0.25,$0.26,$0.25,$0.27,$0.27,$0.27,$0.27
OperatingIncome,$0.22,$0.23,$0.18,$0.18,$0.19,$0.19,$0.19,$0.19,$0.19,$0.20,$0.20,$0.20,$0.20,$0.18,$0.17,$0.18
EBIT,$0.21,$0.24,$0.18,$0.19,$0.20,$0.19,$0.19,$0.20,$0.20,$0.20,$0.21,$0.20,$0.19,$0.17,$0.16,$0.17
InterestExpense,$0.01,$0.01,$0.01,$0.02,$0.02,$0.02,$0.01,$0.01,$0.01,$0.01,$0.01,$0.02,$0.02,$0.03,$0.04,$0.06
IncomeTaxes,$0.03,$0.04,$0.03,$0.03,$0.05,$0.05,$0.05,$0.05,$0.05,$0.05,$0.05,$0.05,$0.05,$0.04,$0.05,$0.05
NetIncome,$0.18,$0.20,$0.15,$0.16,$0.15,$0.14,$0.14,$0.14,$0.15,$0.15,$0.15,$0.15,$0.14,$0.13,$0.10,$0.12
