In [4]:
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import statsmodels.api as sm
import wrds
import pandas_datareader.data as web

def get_factors(factors='CAPM',freq='daily'):   
    
    if freq=='monthly':
        freq_label=''
    else:
        freq_label='_'+freq


    if factors=='CAPM':
        fama_french = web.DataReader("F-F_Research_Data_Factors"+freq_label, "famafrench",start="1921-01-01")
        daily_data = fama_french[0]
    
     
        df_factor = daily_data[['RF','Mkt-RF']] 
    elif factors=='FF3':
        fama_french = web.DataReader("F-F_Research_Data_Factors"+freq_label, "famafrench",start="1921-01-01")
        daily_data = fama_french[0]

        df_factor = daily_data[['RF','Mkt-RF','SMB','HML']]
    elif factors=='FF5':

        fama_french = web.DataReader("F-F_Research_Data_Factors"+freq_label, "famafrench",start="1921-01-01")
        daily_data = fama_french[0]

        df_factor = daily_data[['RF','Mkt-RF','SMB','HML']]
        fama_french2 = web.DataReader("F-F_Research_Data_5_Factors_2x3"+freq_label, "famafrench",start="1921-01-01")
        daily_data2 = fama_french2[0]

        df_factor2 = daily_data2[['RMW','CMA']]
        df_factor=df_factor.merge(df_factor2,on='Date',how='outer')    
        
    else:
        fama_french = web.DataReader("F-F_Research_Data_Factors"+freq_label, "famafrench",start="1921-01-01")
        daily_data = fama_french[0]

        df_factor = daily_data[['RF','Mkt-RF','SMB','HML']]
        fama_french2 = web.DataReader("F-F_Research_Data_5_Factors_2x3"+freq_label, "famafrench",start="1921-01-01")
        daily_data2 = fama_french2[0]

        df_factor2 = daily_data2[['RMW','CMA']]
        df_factor=df_factor.merge(df_factor2,on='Date',how='outer')   
        fama_french = web.DataReader("F-F_Momentum_Factor"+freq_label, "famafrench",start="1921-01-01")
        df_factor=df_factor.merge(fama_french[0],on='Date')
        df_factor.columns=['RF','Mkt-RF','SMB','HML','RMW','CMA','MOM']    
    if freq=='monthly':
        df_factor.index = pd.to_datetime(df_factor.index.to_timestamp())
    else:
        df_factor.index = pd.to_datetime(df_factor.index)
        


    return df_factor/100

def get_daily_wrds_multiple_ticker(tickers,conn):
  
    # Retrieve PERMNOs for the specified tickers
    permnos = conn.get_table(library='crsp', table='stocknames', columns=['permno', 'ticker', 'namedt', 'nameenddt'])
    permnos['nameenddt']=pd.to_datetime(permnos['nameenddt'])
    permnos = permnos[(permnos['ticker'].isin(tickers)) & (permnos['nameenddt']==permnos['nameenddt'].max())]
    # Extract unique PERMNOs
    permno_list = permnos['permno'].unique().tolist()
    print(permno_list)

    # Query daily stock file for the specified PERMNOs
    query = f"""
        SELECT permno, date, ret, retx, prc       
        FROM crsp.dsf
        WHERE permno IN ({','.join(map(str, permno_list))})
        ORDER BY date
    """
    daily_returns = conn.raw_sql(query, date_cols=['date'])
    daily_returns = daily_returns.merge(permnos[['permno', 'ticker']], on='permno', how='left')
    # Pivot data to have dates as index and tickers as columns
    daily_returns = daily_returns.pivot(index='date', columns='ticker', values='ret')    
    daily_returns=daily_returns[tickers]



    return daily_returns

# Multi-factor models

So far we have focused on the market as our single factor. 

In practice it is standard to use factor models with many factors

Additional factors

- soak up risk making measure of alpha easier
- Difference out other sources of expected excess returns that are easy to get access to
- Allows for better risk management


We deal with this, by simply adding more factors to our model. Say we now have $m$ different factors

$$r_t^i=b_{i,1}f_t^1+b_{i,2}f_t^2+b_{i,3}f_t^3+...+b_{i,m}f_t^m+\epsilon_{i,t}$$


Where $b_{i,j}$ measures the exposure of asset $i$ to factor $j$

IF we stack these exposures in a m by 1 vector $B_i=[b_{i,1},b_{i,2},...b_{i,M}]$ and the factors in a m by 1 vector $F_t=[f^1_t,f^2_t,...,f^m_t]$ we can write this in matrix notation

$$r_t^i=B_i@F_t+u_{i,t}$$


As before we can also stack the individual returns :

$$R_t=B@F_t+U_t$$

where

-  $R_t$ is a n by 1 vector with the excess returns of the n assets
-  $B$ is n by m matrix where each row has the exposure of an asset with respect to each M factor and each column has the exposures of the different assets with respect to a particular factor 
- $U_t$ as before is a n by 1 vector with the residual risk of each asset


## Estimating a multi-factor model: The Time-series approach

Again, this is a multivariate regression

For each asset We run a tim-series regression with the excess returns of the asset as the the dependent variable and the excess returns on the factors as the independent variables



## Application

What do you get when you invest in a Momentum ETF?

1. Get daily return data on the larger ETFs claiming to implement the momentum factor
2. Get factors excess returns: Market, Size (SMB), Value (HML), Profitability (RMW), Investment (CMA) , and Momentum (MOM)
3. Run a time series regression for each ETF on the factors
4. Look at alphas and betas



In [11]:
tickers = ["MTUM", "SPMO", "XMMO", "IMTM", "XSMO", "PDP", "JMOM", "DWAS", "VFMO", "XSVM", "QMOM"]
conn=wrds.Connection()
# Get daily returns for the specified tickers
df_ETF=get_daily_wrds_multiple_ticker(tickers,conn)
# Get daily factors
df_factor=get_factors('FF6','daily')
# Align the dataframes
df_ETF, df_factor = df_ETF.align(df_factor, join='inner', axis=0)
# Subtract risk-free rate from ETF returns
df_ETF=df_ETF.subtract(df_factor['RF'],axis=0)

WRDS recommends setting up a .pgpass file.
Created .pgpass file successfully.
You can create this file yourself at any time with the create_pgpass_file() function.
Loading library list...
Done
[13512, 13851, 15161, 15725, 17085, 17392, 17622, 90621, 90622, 90623, 91876]


  df = read_csv(StringIO("Date" + src[start:]), **params)
  df = read_csv(StringIO("Date" + src[start:]), **params)
  df = read_csv(StringIO("Date" + src[start:]), **params)


In [12]:


import statsmodels.api as sm

X = df_factor.drop(columns=['RF'])
X = sm.add_constant(X)  # Adds a constant term to the predictor
y = df_ETF[tickers[1]]
X=X[y.isna()==False]
y=y[y.isna()==False]
model = sm.OLS(y, X).fit(dropna=True)
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                   SPMO   R-squared:                       0.867
Model:                            OLS   Adj. R-squared:                  0.867
Method:                 Least Squares   F-statistic:                     2242.
Date:                Sat, 21 Dec 2024   Prob (F-statistic):               0.00
Time:                        20:05:28   Log-Likelihood:                 8217.0
No. Observations:                2069   AIC:                        -1.642e+04
Df Residuals:                    2062   BIC:                        -1.638e+04
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       4.572e-05      0.000      0.454      0.6

In [None]:
Results=pd.DataFrame([],index=tickers,columns=X.columns)
for ticker in tickers:
    y = df_ETF[ticker]
    X = df_factor.drop(columns=['RF'])
    X = sm.add_constant(X) 
    X=X[y.isna()==False]
    y=y[y.isna()==False]
    model = sm.OLS(y, X).fit(dropna=True)
    Results.loc[ticker,:]=model.params
    Results.at[ticker,'t_alpha']=model.tvalues['const']
    Results.at[ticker,'ivol']=model.resid.std()*252**0.5
    #Results.at[ticker,X.columns[1:]]=model.params[X.columns[1:]]

Results.loc[:,'const']=Results.loc[:,'const']*252
Results.rename(columns={'const':'alpha'},inplace=True)
Results=Results[['alpha','t_alpha','Mkt-RF','SMB','HML','RMW','CMA','MOM','ivol']]
Results

Unnamed: 0,alpha,t_alpha,Mkt-RF,SMB,HML,RMW,CMA,MOM,ivol
MTUM,-0.005012,-0.300789,1.01803,-0.107,-0.062138,-0.117292,-0.011854,0.316422,0.054317
SPMO,0.011521,0.454353,0.999749,-0.167316,-0.038834,-0.024316,0.076116,0.245338,0.072404
XMMO,0.006211,0.322732,1.043315,0.320758,0.020201,0.030898,-0.090631,0.193039,0.083264
IMTM,-0.028412,-0.849759,0.798808,-0.019567,0.075919,-0.126253,0.078522,0.127906,0.099686
XSMO,-0.011826,-0.570276,0.987091,0.846153,0.173362,0.084136,-0.077522,0.161215,0.089721
PDP,-0.012504,-0.811631,1.054572,0.138325,-0.006651,-0.079877,-0.205302,0.252667,0.063018
JMOM,0.006281,0.276272,0.963352,0.007072,-0.07626,-0.07507,-0.052893,0.099189,0.056012
DWAS,-0.001649,-0.076007,1.093378,1.079318,0.233582,-0.230806,-0.087208,0.408747,0.07313
VFMO,0.018697,0.859105,1.030837,0.446082,0.183954,-0.222156,-0.053664,0.396948,0.052463
XSVM,-0.002226,-0.115417,0.935784,0.977751,0.502337,0.319454,0.254056,-0.064561,0.083454


How should we evaluate these funds?

Which fund is "better"? Is it all about alpha in this case?

What are other things that we should be looking at?

Is this table providing a fair comparison across funds?


## Variance decomposition

The betas measure the exposure of the asset return to the factor, but it does not give an accurate way of thinking which factor drives the most of the variation in the asset as factors can have very different variances

$$1=\frac{Cov(r^i_t,r^i_t)}{Var(r^i_t)}=\frac{Cov(r^i_t,\sum_j^m \beta_{i,j}f^j_t+\epsilon^i_t)}{Var(r^i_t)}$$

$$1=\frac{\sum_j^m \beta_{i,j}Cov(r^i_t,f^j_t)+\sigma^2_{\epsilon}}{Var(r^i_t)}$$


The the variance share of factor j is $\frac{\beta_{i,j}Cov(r^i_t,f^j_t)}{Var(r^i_t)}$ and the share of non-factor variance is $\frac{\sigma^2_{\epsilon}}{Var(r^i_t)}$



In [None]:
Results=pd.DataFrame([],index=tickers,columns=X.columns[1:])
for ticker in tickers:
    y = df_ETF[ticker]
    X = df_factor.drop(columns=['RF'])
    X = sm.add_constant(X) 
    X=X[y.isna()==False]
    y=y[y.isna()==False]
    model = sm.OLS(y, X).fit(dropna=True)
    # get the covariance matrix of the factors and the dependent variable
    CovMatrix=pd.concat([y,X.iloc[:,1:]],axis=1).cov()
    # get the column of the covariance matrix corresponding to the dependent variable and exclude itself to get the covariance of 
    # the dependent variable with each factor
    Results.loc[ticker,:]=model.params[1:]*CovMatrix.iloc[1:,0]/y.var()*100
    # Get the residual variance
    Results.at[ticker,'epsilon']=model.resid.var()/y.var()*100



np.floor(Results)

Unnamed: 0,Mkt-RF,SMB,HML,RMW,CMA,MOM,epsilon
MTUM,85,-1,0,1,0,4,8.0
SPMO,85,-1,0,0,-1,2,13.0
XMMO,83,4,0,-1,0,-2,13.0
IMTM,68,-1,-1,1,-1,-1,31.0
XSMO,70,16,1,-1,0,-3,14.0
PDP,90,1,-1,0,1,-2,7.0
JMOM,90,0,1,0,0,-1,6.0
DWAS,64,23,-1,2,0,1,8.0
VFMO,82,7,-2,2,0,2,4.0
XSVM,60,18,10,-2,0,1,10.0


What do we learn?

Decompositions like that extensively in the money management industry

- Used to classify managers in terms of styles--often called style analysis
- Used to control own portfolio factor risk to satisfy investment mandates 

When looking at a portfolio/fund you have two approaches to measure it's factor exposures
- Top down: what we did so far. run a time-series regression of the portfolio returns on the factor return
- Bottom up: from the asset factor exposures build the portfolio factor exposure
  - The bottom up is more data intensive as it requires data on portfolio holding
  - The bottom up allows allows for sharp changes in factor exposures as the portfolio composition changes


## From assets factor Risk to portfolio factor risk

The basic approach:

Consider portfolio $X$ that earns excess returns $r=X@R$ where R is the vector of asset excess returns.

Asset's excess returns satisfy a factor model

$$R=A+B@F+U$$

then the portfolio satisfies

$$r=X@R=X@(A+B@F+U)=X@A+X@B@F+X@U$$

In scalar notation this is simply

$$r=\sum_i x_i r_i=\sum_i x_i\alpha_i+ \sum_j \sum_i x_i \beta_{i,j}f_j+\sum_i x_i\epsilon_i$$

So the portfolio exposure to factor j is simply the dollar-weighted average of the asset betas

$$\beta_{p,j}=\sum_i x_i \beta_{i,j}$$

- For portfolios with high turnover, this approach will lead to better measurement of factor risk
- For portfolios that do not trade, then measuring individual asset betas might introduce unnecessary noise

Example:


Things to do

- Cross-sectional regression
- Factor management?

Big to do list

- Class on the factors
- Machine learning

In [103]:
import pandas as pd

date1='2014-12-31'
date2='2016-12-31'
# Define the portfolio data
portfolio_data1 = {
    'date': [date1,date1,date1,date1,date1],
    'ticker': ['AAPL', 'GOOGL', 'MSFT','NVDA','AMZN'],
    'weight': [0.25,0.25, 0.25,0.25,0.25]
}

portfolio_data2 = {
    'date': [date2,date2,date2,date2,date2],
    'ticker': ['COST', 'WMT', 'TGT','KR','AMZN'],
    'weight': [0.25,0.25, 0.25,0.25,0.25]
}
# Concatenate the two dataframes
portfolio_df1 = pd.DataFrame(portfolio_data1)
portfolio_df2 = pd.DataFrame(portfolio_data2)
portfolio_df = pd.concat([portfolio_df1, portfolio_df2], ignore_index=True)



# Convert the date column to datetime
portfolio_df['date'] = pd.to_datetime(portfolio_df['date'])

print(portfolio_df)

        date ticker  weight
0 2014-12-31   AAPL    0.25
1 2014-12-31  GOOGL    0.25
2 2014-12-31   MSFT    0.25
3 2014-12-31   NVDA    0.25
4 2014-12-31   AMZN    0.25
5 2016-12-31   COST    0.25
6 2016-12-31    WMT    0.25
7 2016-12-31    TGT    0.25
8 2016-12-31     KR    0.25
9 2016-12-31   AMZN    0.25


In [46]:
import datetime as dt
import wrds
import psycopg2 
from dateutil.relativedelta import *
from pandas.tseries.offsets import *

###################
conn=wrds.Connection() 


crsp = conn.raw_sql("""
                      select a.permno,a.permco, a.date, b.shrcd, b.exchcd,b.ticker,
                      a.ret, a.shrout, a.prc
                      from crsp.msf as a
                      left join crsp.msenames as b
                      on a.permno=b.permno
                      and b.namedt<=a.date
                      and a.date<=b.nameendt
                      where a.date between '01/31/2013' and '12/31/2018'
                      and b.exchcd between 1 and 3
                      and b.shrcd between 10 and 11
                      """, date_cols=['date']) 


  df_holdings_tab = pd.read_csv("C:/Dropbox/Teaching/Fin 418/Datasets/Thompsonreuters_stockholdings.txt", delim_whitespace=True)



Tab separator:
      fdate     cusip fundno  shares
0  20061231  D1819089      4   35198
1  20061231  N7248210      4  164919
2  20061231  00176510      7  200000
3  20061231  00911910      7  118387
4  20061231  01958930      7  950141


In [3]:
Beta=pd.DataFrame(index=Assets.columns[:-1],columns=Factors.columns,dtype=float)
VarU=pd.DataFrame(index=Assets.columns[:-1],columns=['VarU'],dtype=float)
x= sm.add_constant(Factors)
for stocki in Assets.columns:
    y= Assets[stocki]
    # run the regression
    results= sm.OLS(y,x).fit()
    # store beta parameters skipping the constant
    Beta.loc[stocki,:]=results.params[1:]
    # store the residual variance
    VarU.loc[stocki,'VarU']=results.resid.var()

Beta

Unnamed: 0,Mkt-RF,SMB,HML,Cnsmr,Manuf,HiTec,Hlth,Other
CTL,1.773661,-0.584068,0.013224,0.591461,-0.151144,-0.368082,0.024628,-0.729825
T,-0.765983,-0.70887,0.519897,0.565646,0.168277,0.789765,0.338093,-0.314557
CSCO,-1.209827,0.063438,0.304196,-0.0304,-0.0173,1.789234,0.132417,0.326659
FCX,0.579567,0.240257,-0.104024,0.38478,1.676865,-0.040493,-0.662841,-0.3798
XL,-2.138662,0.087942,-0.098236,0.743264,0.194891,0.325159,-0.088233,2.389057
IVZ,-2.943381,0.143413,-0.059142,0.369939,0.983337,1.722916,0.600754,1.188089
AMT,-3.074392,0.521988,0.450322,-0.642305,1.21921,2.357371,0.86701,0.144035
WHR,-0.123852,0.510154,1.116104,1.142745,-0.24719,0.323533,0.157214,0.360687
IR,-1.389932,0.025733,0.109128,1.001246,0.943648,0.638801,-0.102587,0.521383
WFT,-2.30703,0.411222,-0.381759,-0.359601,2.801833,0.651049,-0.450652,0.940941


## Reconstructing the co-variance matrix

We have

$$R_t=B@F_t+u_t$$

Then

$$Var(R_t)=B@Var(F_t)@B.T+Var(U_t)$$

The big difference is that now $F$ is a vector of factors


so $Var(F_t)$ is a M by M variance covariance matrix

In [4]:
Var_F=Factors.cov()
Cov_F=Beta @ Var_F @ Beta.T + np.diag(VarU['VarU'].values)
Cov_F

Unnamed: 0,CTL,T,CSCO,FCX,XL,IVZ,AMT,WHR,IR,WFT,...,JCI,SWK,DVN,TMO,PEP,LNC,EMR,MLM,CCI,NU
CTL,0.00655,0.001408,0.002038,0.002353,0.002344,0.003068,0.001478,0.002345,0.002524,0.002014,...,0.001866,0.001834,0.001465,0.001725,0.000972,0.002767,0.001863,0.001482,0.002324,0.000972
T,0.001408,0.004688,0.00184,0.001529,0.002097,0.002673,0.001342,0.002003,0.002108,0.001364,...,0.001478,0.001464,0.001073,0.001628,0.000898,0.002289,0.001655,0.001333,0.001848,0.000682
CSCO,0.002038,0.00184,0.011381,0.004141,0.003665,0.007108,0.005885,0.004123,0.004269,0.003808,...,0.003918,0.002949,0.001776,0.003202,0.000737,0.004472,0.002989,0.002185,0.006565,0.000827
FCX,0.002353,0.001529,0.004141,0.016977,0.004431,0.006325,0.004123,0.004492,0.005487,0.007251,...,0.004302,0.004351,0.005067,0.002811,0.000827,0.005794,0.003777,0.003399,0.005176,0.001782
XL,0.002344,0.002097,0.003665,0.004431,0.016149,0.00639,0.001929,0.006672,0.005633,0.004734,...,0.005489,0.00478,0.003223,0.002946,0.001755,0.008175,0.004011,0.004223,0.003393,0.001667
IVZ,0.003068,0.002673,0.007108,0.006325,0.00639,0.013659,0.006441,0.006358,0.006517,0.006437,...,0.005675,0.004862,0.004039,0.004611,0.001651,0.007721,0.004788,0.004047,0.007738,0.001823
AMT,0.001478,0.001342,0.005885,0.004123,0.001929,0.006441,0.031007,0.002818,0.003462,0.004313,...,0.002842,0.002246,0.002495,0.003106,0.000404,0.0034,0.002591,0.001729,0.006626,0.000725
WHR,0.002345,0.002003,0.004123,0.004492,0.006672,0.006358,0.002818,0.013016,0.005424,0.004185,...,0.005186,0.004573,0.002741,0.002847,0.001674,0.007106,0.003566,0.003761,0.003953,0.001647
IR,0.002524,0.002108,0.004269,0.005487,0.005633,0.006517,0.003462,0.005424,0.010436,0.005557,...,0.004651,0.004379,0.003777,0.003135,0.001402,0.006352,0.003864,0.003627,0.004728,0.001701
WFT,0.002014,0.001364,0.003808,0.007251,0.004734,0.006437,0.004313,0.004185,0.005557,0.015806,...,0.004336,0.004459,0.006108,0.002894,0.000642,0.006455,0.004048,0.003746,0.005082,0.001747


- If we compare the in-sample Variance of our minimum variance portfolios for
    - The unrestricted case
    - The single-factor covariance
    - The multi-factor covariance

- which one will have lowest variance? What will have the highest?
- Now split the sample in two. Repeat the covariance estimation procedure for each of these approaches for the first half of the sample
- Now use the weights to compute the variance of each of the portfolios in the second half
- Is the order likely to change? Why? Why not?

## Application: How will your portfolio risk change as you add positions


You have portfolio $X_0$ and you want to sell w of your positions to invest in a fund with portfolio $X_1$. How your portfolio variance will change as a function of you reallocation?

- The answer is simple

$$Var(wX_1R_t+(1-w)X_0R_t)-Var(X_0R_t)$$

- But also kind of misleading since you might not have good data to estimate the variance of the new portfolio

- Now if you know each portfolio factor betas,$\beta_0=X_0@B$ and $\beta_1=X_1@B$ , and at least one of this portfolio is large and well diversified, then for small tilts, i.e. $w$ small, we have 


$$\frac{Var(wX_1R_t+(1-w)X_0R_t)-Var(X_0R_t)}{\Delta w}|_{w\approx 0} =\beta_1Var(F)\beta_0'$$


- The fact that one is well diversified just means that you can ignore the covariance-terms of the portfolios asset specific risks

- So you see above why a large pool of money when allocating money to an active manager will want to regulate their factor exposure

- funds with similar volatilities will be perceived as very different risks depending on how the exposure of portfolio relates to the exposure of the fund




## Performance Attribution


- We can use factor models to decompose a manager strategy

- What explains their returns? 

- tilts they have?  What kind of stocks they like?


### Application: What does Cathie Wood  Likes ? 

![fig](../../assets/plots/CW_image.jfif)



Cathie Wood is a renowned stock-picker and the founder of ARK Invest, which manages around 60 billion in assets and invests in innovative technologies such as self-driving cars and genomics. She gained fame for her success in the male-dominated world of investing, her persuasive investment arguments, and her proven track record in the stock market. Prior to founding ARK Invest, she gained experience at The Capital Group, Jennison Associates, and AllianceBernstein, and co-founded Tupelo Capital Management, a hedge fund. Wood is known for her unconventional investment strategies and her advocacy for investing in disruptive technologies, which has garnered her a large following in the investing world. Her estimated net worth is around $250 million.


Citations:
https://www.nytimes.com/2021/08/22/business/cathie-wood-ark-stocks.html




In [5]:
df=pd.read_pickle('https://raw.githubusercontent.com/amoreira2/Fin418/main/assets/data/df_WarrenBAndCathieW.pkl')
_temp=df.dropna()
# select the columns to use as factors
Factors=_temp.drop(['BRK','RF','ARKK'],axis=1)
Factors.head(3)
ArK=_temp.ARKK-_temp.RF

What are these factors?

* HML is the value strategy that buys high book to market firms and sell low book to market firms

* SMB is a size strategy that buys firms with low market capitalization and sell firms with high market capitalizations

* RmW is the strategy that buys firms with high gross profitability and sell firms with low gross profitability

* CmA is the strategy that buys firms that are investing little (low CAPEX) and sell firms that are investing a lot (high CAPEX) 

* MOM is the momentum strategy that buy stocks that did well in the last 12 months and short the ones that did poorly


We will discuss more later

for now just think of them as important trading strategies that practicioners know

In [6]:

x= sm.add_constant(Factors)
y= ArK
results= sm.OLS(y,x).fit()
results.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.781
Model:,OLS,Adj. R-squared:,0.78
Method:,Least Squares,F-statistic:,1069.0
Date:,"Thu, 28 Mar 2024",Prob (F-statistic):,0.0
Time:,16:24:08,Log-Likelihood:,5908.9
No. Observations:,1804,AIC:,-11800.0
Df Residuals:,1797,BIC:,-11770.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0004,0.000,1.821,0.069,-3.03e-05,0.001
Mkt-RF,1.1736,0.020,58.714,0.000,1.134,1.213
SMB,0.6944,0.037,18.984,0.000,0.623,0.766
HML,-0.6521,0.038,-16.938,0.000,-0.728,-0.577
RMW,-0.9037,0.054,-16.883,0.000,-1.009,-0.799
CMA,-0.5034,0.071,-7.129,0.000,-0.642,-0.365
Mom,-0.0397,0.025,-1.559,0.119,-0.090,0.010

0,1,2,3
Omnibus:,55.31,Durbin-Watson:,2.135
Prob(Omnibus):,0.0,Jarque-Bera (JB):,128.873
Skew:,0.116,Prob(JB):,1.04e-28
Kurtosis:,4.289,Cond. No.,345.0


- How much can we explain of ARKK return behavior?

- What kind of stocks CW likes?

- How much of her portfolio variance comes from market exposure alone?

- If you were to construct a replicating portfolio of her fund

- What would be the volatility of your residual risk?






## "Endogenous" Benchmarking


* it is common for large portfolio allocators to set benchmarks for the managers that they allocate to

* The most common benchmark is simply returns of the S&P500 which is almost the same thing as the returns of the market portfolio ( large caps dominate the returns of any market-cap portfolio)

* You might also have endogenous benchmarks

* Use a set of Factors F and estimate $r^b_t=\sum \beta_j F_{j,t}$ 

* I.e use as a bechmark the multifactor combination that best replicates the portfolio.

* typically this is not done contractually but implicitly: You will allocate to the different funds based on their alpha

* Captures the idea that one should pay different prices for alpha (very hard to get) and beta( easier, the gains are in implementation)



