# By Thomas Philips - tp55@nyu.edu

In [None]:
%pip install calcbench-api-client[Pandas]>=5.13.0 numpy matplotlib scipy

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import calcbench as cb

# Get the version of scipy.stats that explicitly allows for masked arrays, as there are missing values, NaNs etc. in the data
import scipy.stats.mstats as stats

In [None]:
# For a list of metrics, visit https://www.calcbench.com/home/standardizedmetrics
# Obtain a Pandas dataframe with all the data needed to run the P/B-ROE model for the S&P 500.
# The dataframe is indexed by a MultiIndex, as it has 3 dimensions: data items, securities and time
# see https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html# for details
tickers = cb.tickers(index="SP500")
data_items = ["ROE", "StockholdersEquity", "MarketCapAtEndOfPeriod"]
data = cb.standardized(
    company_identifiers=tickers, metrics=data_items, fiscal_year=2021, fiscal_period=0
)
data = data.unstack("metric")["value"]

In [None]:
lnPB = np.log(data.MarketCapAtEndOfPeriod / data.StockholdersEquity)

# Now create masked arrays to identify all the missing / invalid data items
lnPB_masked = np.ma.masked_array(lnPB, mask=np.isnan(lnPB))
ROE_masked = np.ma.masked_array(data.ROE, mask=np.isnan(data.ROE))

# Theil-Sen parameters are entered as (y,x), OLS parameters are entered as (x,y)
# Regress y on x and x on y to compute a robust R squared
robPB_ROE_fit = stats.theilslopes(lnPB_masked, ROE_masked)
olsPB_ROE_fit = stats.linregress(ROE_masked, lnPB_masked)
robROE_PB_fit = stats.theilslopes(ROE_masked, lnPB_masked)
olsROE_PB_fit = stats.linregress(lnPB_masked, ROE_masked)

robR2_PB_ROE = robPB_ROE_fit[0] * robROE_PB_fit[0]
olsR2_PB_ROE = olsPB_ROE_fit[0] * olsROE_PB_fit[0]

print("\nln(P/B) vs. ROE: Robust R2=", robR2_PB_ROE, "OLS R2=", olsR2_PB_ROE)
print(robPB_ROE_fit)
print(olsPB_ROE_fit)

print("\nROE vs. ln(P/B): Robust R2=", robR2_PB_ROE, "OLS R2=", olsR2_PB_ROE)
print(robROE_PB_fit)
print(olsROE_PB_fit)

fig = plt.figure()
axes = fig.add_subplot(111)
axes.set_xlim([-2, 3])
axes.set_ylim([-2, 6])
axes.plot(ROE_masked[0], lnPB_masked[0], "b.")
axes.plot(ROE_masked[0], (robPB_ROE_fit[1] + robPB_ROE_fit[0] * ROE_masked)[0], "g-")
axes.plot(ROE_masked[0], (olsPB_ROE_fit[1] + olsPB_ROE_fit[0] * ROE_masked)[0], "r-")
plt.show()