In [7]:
import calcbench as cb
from calcbench.models.revisions import Revisions
import pandas as pd
import requests
pd.options.display.float_format = '${:,.2f}'.format # make floats look like dollar amounts.

In [2]:
def get_holdings(file_name: str):
    holdings = pd.read_csv(
        file_name, header=0
    )  
    holdings = holdings[:-1]
    holdings.Symbol = holdings.Symbol.replace({"GOOGL": "GOOG", "BRK.B": "BRK"})
    holdings = holdings.rename({"Symbol": "ticker"}, axis=1)
    holdings = holdings.set_index("ticker")
    holdings = holdings["% Holding"].str.rstrip("%").astype("float") * 0.01
    holdings = holdings[holdings.index.dropna()]
    holdings = (
        holdings.groupby("ticker").sum().sort_values(ascending=False)
    )  # remove duplicates from different share classes.
    return holdings

In [3]:
# From https://www.barchart.com/etfs-funds/quotes/QQQ/constituents
QQQ_holdings = get_holdings("./etf-constituents-06-02-2022 QQQ.csv")
SPY_holdings = get_holdings("./etf-constituents-06-02-2022 SPY.csv")

In [4]:
# Metrics ordered as they appear on an income statement.
income_statement_metrics = [
    "Revenue",
    "CostOfRevenue",
    "GrossProfit",
    "SGAExpense",
    "OperatingExpenses",
    "OperatingIncome",
    "EBIT",
    "InterestExpense",
    "IncomeTaxes",
    "NetIncome",
]

In [5]:
company_identifiers = SPY_holdings.index.union(QQQ_holdings.index).unique().unique()

In [8]:
d = cb.standardized(
    metrics=income_statement_metrics,
    company_identifiers=company_identifiers,
    period_type=cb.api_query_params.PeriodType.Annual,
    revisions=Revisions.MostRecent,
)
d = d[d.index.get_level_values("fiscal_period") < "2023-0"] # There are a few extraneous values from the future.

In [10]:
def build_portfolio_income_statement(d: pd.DataFrame, holdings):
    by_fund_weight = (
        d[["value"]].unstack(["metric", "fiscal_period"]).mul(holdings, axis=0)
    )
    totals = by_fund_weight.sum()
    percent_of_revenue = totals / totals.loc[:, "Revenue"]
    horizontal = percent_of_revenue.unstack("fiscal_period").sort_index(
        axis=1, ascending=False
    )  # Sort columns by fiscal year descending.
    return horizontal.loc["value"].loc[income_statement_metrics]

In [42]:
d

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,CIK,period_start,period_end,calendar_year,calendar_period
ticker,metric,fiscal_period,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A,CostOfRevenue,2007-0,"$2,451,000,000.00",0001090872,2006-11-01,2007-10-31,2007,0
A,CostOfRevenue,2008-0,"$2,578,000,000.00",0001090872,2007-11-01,2008-10-31,2008,0
A,CostOfRevenue,2009-0,"$4,434,000,000.00",0001090872,2008-11-01,2009-10-31,2009,0
A,CostOfRevenue,2010-0,"$4,878,000,000.00",0001090872,2009-11-01,2010-10-31,2010,0
A,CostOfRevenue,2011-0,"$5,544,000,000.00",0001090872,2010-11-01,2011-10-31,2011,0
...,...,...,...,...,...,...,...,...
ZTS,SGAExpense,2018-0,"$1,484,000,000.00",0001555280,2018-01-01,2018-12-31,2018,0
ZTS,SGAExpense,2019-0,"$1,638,000,000.00",0001555280,2019-01-01,2019-12-31,2019,0
ZTS,SGAExpense,2020-0,"$1,726,000,000.00",0001555280,2020-01-01,2020-12-31,2020,0
ZTS,SGAExpense,2021-0,"$2,001,000,000.00",0001555280,2021-01-01,2021-12-31,2021,0


In [13]:
build_portfolio_income_statement(d, SPY_holdings)

AssertionError: Length of order must be same as number of levels (3), got 2

In [44]:
d[['value']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value
ticker,metric,fiscal_period,Unnamed: 3_level_1
A,CostOfRevenue,2007-0,"$2,451,000,000.00"
A,CostOfRevenue,2008-0,"$2,578,000,000.00"
A,CostOfRevenue,2009-0,"$4,434,000,000.00"
A,CostOfRevenue,2010-0,"$4,878,000,000.00"
A,CostOfRevenue,2011-0,"$5,544,000,000.00"
...,...,...,...
ZTS,SGAExpense,2018-0,"$1,484,000,000.00"
ZTS,SGAExpense,2019-0,"$1,638,000,000.00"
ZTS,SGAExpense,2020-0,"$1,726,000,000.00"
ZTS,SGAExpense,2021-0,"$2,001,000,000.00"


In [45]:
SPY_holdings

ticker
GOOG    $0.08
AAPL    $0.07
MSFT    $0.06
AMZN    $0.03
TSLA    $0.02
BRK     $0.02
JNJ     $0.01
UNH     $0.01
NVDA    $0.01
FB      $0.01
XOM     $0.01
JPM     $0.01
PG      $0.01
V       $0.01
CVX     $0.01
HD      $0.01
MA      $0.01
PFE     $0.01
BAC     $0.01
ABBV    $0.01
KO      $0.01
LLY     $0.01
AVGO    $0.01
MRK     $0.01
PEP     $0.01
TMO     $0.01
VZ      $0.01
COST    $0.01
ABT     $0.01
DIS     $0.01
CMCSA   $0.01
ADBE    $0.01
CSCO    $0.01
ACN     $0.01
MCD     $0.01
WMT     $0.01
INTC    $0.01
WFC     $0.01
LIN     $0.00
DHR     $0.00
AMD     $0.00
TXN     $0.00
PM      $0.00
BMY     $0.00
QCOM    $0.00
CRM     $0.00
T       $0.00
NKE     $0.00
NEE     $0.00
COP     $0.00
RTX     $0.00
AMGN    $0.00
UNP     $0.00
MDT     $0.00
HON     $0.00
UPS     $0.00
LOW     $0.00
CVS     $0.00
IBM     $0.00
ANTM    $0.00
MS      $0.00
SPGI    $0.00
INTU    $0.00
AMT     $0.00
CAT     $0.00
GS      $0.00
ORCL    $0.00
LMT     $0.00
SCHW    $0.00
AXP     $0.00
AMAT    $0.00