## <center> Multi factor Model and Futures </center>

### Install packages

In [2]:
from scipy import stats
import pandas as pd
import numpy as np
import statsmodels.api as sm
import yfinance as yf
import pandas_datareader as pdr
from IPython.display import display, HTML
import datetime as dt
import getFamaFrenchFactors as gff
from fredapi import Fred
fred = Fred(api_key='8e31dd837475abc71d8beb8400da3bdf')

### Data

Get data of difference in yields between BAA and AAA rated U.S. corporate bonds.

In [3]:
BAA = fred.get_series('BAA', observation_start="1992-02-01", observation_end="2022-02-01", frequency='m')
AAA = fred.get_series('AAA', observation_start="1992-02-01", observation_end="2022-02-01", frequency='m')
credit = BAA-AAA
credit = credit.tolist()

Get data of difference in yields between 10 year and 3 months U.S. Treasuries

In [4]:
term = fred.get_series('T10Y3M', observation_start="1992-02-01", observation_end= "2022-02-01" ,frequency='m')
term = term.tolist()

Get data of S$P 500 

In [5]:
sp500 = yf.download('^GSPC','1992-01-01','2022-03-01', interval ='1mo')
sp500_rtn = sp500.pct_change()
sp500_rtn = sp500_rtn['Adj Close']
sp500_rtn.fillna(0, inplace=True)
sp500_rtn.drop(index = sp500_rtn.index[0], axis=0, inplace=True)
sp500_rtn = sp500_rtn.apply(lambda x: x* 100) # get % return
sp500_rtn.index = sp500_rtn.index + pd.offsets.MonthEnd()

[*********************100%***********************]  1 of 1 completed


Get Fama French Data

In [6]:
fama_data = gff.famaFrench3Factor(frequency="m")
fama_data.rename(columns={"date_ff_factors": 'Date'}, inplace=True)
fama_data.set_index('Date', inplace=True)
fama_data = fama_data.loc[fama_data.index >= '1992-02-01']
fama_data = fama_data.loc[fama_data.index <= '2022-02-28']
fama_data.columns = ['mkt', 'smb', 'hml', 'rf']
fama_data = fama_data.drop('mkt', axis=1)
fama_data = fama_data.apply(lambda x: x* 100) # transform data in %

In [7]:
ff_data = fama_data
ff_data['credit'] = credit
ff_data['term'] = term
ff_data['mkt'] = sp500_rtn
ff_data.fillna(0, inplace=True)
ff_data = ff_data.replace([np.inf, -np.inf], 0)
factors = ff_data.drop(['rf'], axis=1)

Load data of S&P500 Futures - Jun 2022 expiring date

In [8]:
sp500_futures = pd.read_csv('S&P 500 Futures Historical Data.csv', index_col = 0)
sp500_futures.index = pd.to_datetime(sp500_futures.index, format= '%b %y') # date index transforming
sp500_futures.index = sp500_futures.index + pd.offsets.MonthEnd()
sp500_futures_rtn = sp500_futures['Change %']
sp500_futures_rtn= sp500_futures_rtn.str.replace('%','') # remove str % from value
sp500_futures_rtn = pd.to_numeric(sp500_futures_rtn, errors='coerce')
sp500_futures_rtn.fillna(0, inplace=True)
sp500_futures_rtn = sp500_futures_rtn.iloc[::-1]
sp500_futures_rtn.drop(sp500_futures_rtn.tail(1).index,inplace=True) # adjusting size to match stocks month

Get data of The Walt Disney stock

In [9]:
disney = yf.download('DIS','1992-01-01','2022-03-1', interval ='1mo')
disney = disney.dropna()
disney_change = disney.pct_change()
disney_rtn = disney_change['Adj Close']
disney_rtn.fillna(0, inplace=True)
disney_rtn.drop(index = disney_rtn.index[0], axis=0, inplace=True)
disney_rtn = disney_rtn.apply(lambda x: x* 100) # get % return
disney_rtn.index = disney_rtn.index + pd.offsets.MonthEnd()

[*********************100%***********************]  1 of 1 completed


Get Stocks data for portfolio creation

In [10]:
tickers = ['DIS', 'CVX', 'WFC', 'BAC', 'IBM', 'PEP', 'JPM', 'GE', 'AXP', 'BRK-A']
start = dt.datetime(1992,1,1)
end = dt.datetime(2022,2,28)
portfolio = pdr.get_data_yahoo(tickers, start, end, interval='m')
portfolio.fillna(0, inplace=True)
portfolio.index = portfolio.index + pd.offsets.MonthEnd()

In [11]:
single_stocks_rtn = portfolio['Adj Close'].pct_change(1, fill_method='ffill')
single_stocks_rtn.fillna(0, inplace=True)
stocks_rtn = single_stocks_rtn.replace([np.inf, -np.inf], 0)
wts1 = [0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1] #set weights of the stocks in the portfolio
port_ret = (stocks_rtn* wts1).sum(axis = 1) # total montlhy return balanced by sotcks weights
port_ret.drop(index = port_ret.index[0], axis=0, inplace=True)
port_ret = port_ret.apply(lambda x: x* 100)

### Multi-factor model

"A multi-factor model is a financial model that employs multiple factors in its calculations to explain market phenomena and/or equilibrium asset prices. A multi-factor model can be used to explain either an individual security or a portfolio of securities. It does so by comparing two or more factors to analyze relationships between variables and the resulting performance."
<a href="https://www.investopedia.com/terms/m/multifactor-model.asp#:~:text=A%20multi%2Dfactor%20model%20is,or%20a%20portfolio%20of%20securities." title="Investopedia">Investopedia</a>

In the previous project we considered the market return as the only factor affecting the return of any asset/portfolio with the following formula:  

$E_{r} : α + β_{1}(R_{m}-R_{f}) + \epsilon$  

In this project we are also considering other factors deriving the following formula:
  
$E_{r} : α + β_{1}Mkt + β_{2}SMB + β_{3}HML + β_{4}Term + β_{5}Credit + \epsilon$

Where:
- <strong>$E_{r}$</strong> : expected return of stock/portfolio
- <strong>α</strong> : intercept
- <strong>$β_{i}$</strong> : slope coefficient for each explanatory variable
- <strong>MKT</strong> : the excess return of the market. It's the value-weighted return of all CRSP firms incorporated in the US and
listed on the NYSE, AMEX, or NASDAQ minus the 1-month Treasury Bill rate.
- <strong>SMB</strong> : (Small Minus Big) measures the excess return of stocks with small market cap over those with larger market cap. It's a size discriminant factor also called (Short-Long portoflio), long on small companies stock and short on big companies stock. The use of this factor helps to include in the evaluation the size of the companies in the portfolio which is not considered with only the risk premium as factor.
- <strong>HML</strong> : (High Minus Low) measures the excess return of value stocks over growth stocks. Value stocks have high book to price ratio (B/P) than growth stocks. It is a disciminant value, usually small companies have high evaluation (book value) compared to market value. There are 2 values:
    - growth : young corporations have a market value > book value
    - mature/value stocks : corporations have market value < book value
- <strong>term</strong> : difference in yields between 10 year and 3 months U.S. Treasuries;
- <strong>credit</strong> : difference in yields between BAA and AAA rated U.S. corporate bonds
- <strong>$\epsilon$</strong> : model error term (residual)

We can say 'even if ... valuation' no statistically signif.


### Multi factor with one stock

Defining a function to make a regression with explanatory variable (our factors) and dependent variable (stocks return)

In [12]:
def regression(explanatory, dependent):
    X = explanatory
    y = dependent
    #X1 = sm.add_constant(X)
    # make regression model 
    ff_model = sm.OLS(y, X).fit()
    # fit model and print results
    print(ff_model.summary())
    global saved_values
    saved_values = ff_model.params
    saved_values = saved_values.tolist()

Regression between The Walt Disney Stock and our factors

In [13]:
regression(disney_rtn, (port_ret - ff_data['rf']))

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.466
Model:                            OLS   Adj. R-squared (uncentered):              0.464
Method:                 Least Squares   F-statistic:                              313.7
Date:                Sun, 03 Apr 2022   Prob (F-statistic):                    6.30e-51
Time:                        20:04:43   Log-Likelihood:                         -1003.9
No. Observations:                 361   AIC:                                      2010.
Df Residuals:                     360   BIC:                                      2014.
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [14]:
def beta(valx, valy):
    X = valx # before clean_monthly_returns['^GSPC'].tail(period)
    y = valy   # before clean_monthly_returns['DISNEY'].tail(period)
    #X1 = sm.add_constant(X)
    slope, intercept, r_value, p_value, std_err = stats.linregress(X, y)
    return round(slope,4)

## Multi factor portfolio


In [15]:
regression(factors, (port_ret - ff_data['rf']))

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.830
Model:                            OLS   Adj. R-squared (uncentered):              0.827
Method:                 Least Squares   F-statistic:                              346.6
Date:                Sun, 03 Apr 2022   Prob (F-statistic):                   2.22e-134
Time:                        20:04:43   Log-Likelihood:                         -797.61
No. Observations:                 361   AIC:                                      1605.
Df Residuals:                     356   BIC:                                      1625.
Df Model:                           5                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

By analyzing the results we have and R^2 of 0.827, it means that the 82% variation of returns of our portfolio can be explained by our selected factors : SMB, HML, CREDIT, TERM, MARKET.

## Futures

Run a regression where the dependent variable is your portfolio returns and the
explanatory variable is %change in futures prices; the coefficient of the explanatory
variable is the “optimal hedge ratio”

In [16]:
# N of futures needed = optimal hedge * (portfolio value/value of one SP500 futures)
def futures_hedging_number():
    slope = beta(sp500_futures_rtn, port_ret.tail(len(sp500_futures_rtn))) #data len should match, we have less futures historical data
    sp500_futures['Price']= sp500_futures['Price'].str.replace(',','')
    futures_price = pd.to_numeric(sp500_futures['Price'])
    futures_price = futures_price.iloc[0]
    futures_price = pd.to_numeric(futures_price)
    futures_number = slope * (1000000/futures_price)
    return futures_number
futures_hedging_number()

226.61448140900197

In [17]:
def p_val(valx, valy):
    X = valx
    y = valy
    X1 = sm.add_constant(X)
    slope, intercept, r_value, p_value, std_err = stats.linregress(X, y)
    return p_value

In [18]:
def alpha(valx, valy):
    X = valx # before clean_monthly_returns['^GSPC'].tail(period)
    y = valy   # before clean_monthly_returns['DISNEY'].tail(period)
    #X1 = sm.add_constant(X)
    slope, intercept, r_value, p_value, std_err = stats.linregress(X, y)
    return round(intercept,4)

In [19]:

def fill(valx, valy):
    results = pd.DataFrame({ "Factor": ['SMB', 'HML', 'CREDIT', 'TERM', 'MARKET'],
                "Values": [
                    saved_values[0],
                    saved_values[1],
                    saved_values[2],
                    saved_values[3],
                    saved_values[4]],
                "Beta": [ 
                    beta(valx.iloc[:, 0],(valy)),
                    beta(valx.iloc[:, 1], (valy)),
                    beta(valx.iloc[:, 2], (valy)),
                    beta(valx.iloc[:, 3], (valy)),
                    beta(valx.iloc[:, 4], (valy)),
                    ],
                "p-value": [
                    p_val(valx.iloc[:, 0], (valy)),
                    p_val(valx.iloc[:, 1], (valy)),
                    p_val(valx.iloc[:, 2], (valy)),
                    p_val(valx.iloc[:, 3], (valy)),
                    p_val(valx.iloc[:, 4], (valy)),
                    ],
                })
    results.set_index('Factor', inplace=True)
    display(HTML(results.to_html(classes='table table-stripde')))
    
fill(factors, (port_ret - ff_data['rf'] ))

Unnamed: 0_level_0,Values,Beta,p-value
Factor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
SMB,-0.036911,-0.0136,0.8757265
HML,0.515776,0.4593,3.757393e-08
CREDIT,0.099995,-1.1162,0.1167839
TERM,0.027377,0.0077,0.975133
MARKET,1.087601,1.0684,1.7384119999999998e-100


In [20]:

def fill(valx, valy):
    results = pd.DataFrame({ "Factor": ['SMB', 'HML', 'CREDIT', 'TERM', 'MARKET', 'intercept'],

                "Beta": [ 
                    beta(valx.iloc[:, 0],(valy)),
                    beta(valx.iloc[:, 1], (valy)),
                    beta(valx.iloc[:, 2], (valy)),
                    beta(valx.iloc[:, 3], (valy)),
                    alpha(valx, valy),
                    ],
                "p-value": [
                    p_val(valx.iloc[:, 0], (valy)),
                    p_val(valx.iloc[:, 1], (valy)),
                    p_val(valx.iloc[:, 2], (valy)),
                    p_val(valx.iloc[:, 3], (valy)),
                    p_val(valx.iloc[:, 4], (valy)),
                    p_val(valx.iloc[:, 5], (valy)),
                    ],
                })
    results.set_index('Factor', inplace=True)
    display(HTML(results.to_html(classes='table table-stripde')))
    
fill(factors, (port_ret - ff_data['rf'] ))

ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 5 and the array at index 1 has size 361

In [24]:
def alpha(valx, valy):
    X = valx # before clean_monthly_returns['^GSPC'].tail(period)
    y = valy   # before clean_monthly_returns['DISNEY'].tail(period)
    #X1 = sm.add_constant(X)
    slope, intercept, r_value, p_value, std_err = stats.linregress(X, y)
    print(slope, intercept, p_value)
alpha(factors['mkt'] ,(port_ret - ff_data['rf'] )) 

1.0683595627599742 0.16030652441332516 1.7384123130020317e-100
