# Constrained Linear Regression Model #

### The CAPM decomposition ####

In [1]:
# Import Libraries

# Data Management
import pandas as pd
import numpy as np

#  Statistics
import statsmodels.api as sm 
from scipy.stats import t
from scipy.stats import f
from scipy.stats import chi2

# Handle Files
import sys
import os

# Import Local Functions
sys.path.append(os.path.abspath("../source"))
from functions import get_fred_data
from functions import import_financial_data

# Pretty Notation
from IPython.display import display, Math

In [65]:
# The CAPM Decomposition

display(Math(r"r_{stock} = rfr + \beta\left(r_{mkt}-rfr\right) + \varepsilon"))
display(Math(r"r_{stock} = \left(1-\beta\right)rfr + \beta\left(r_{mkt}\right) + \varepsilon"))

In [14]:
# Data for Y
stock = import_financial_data('AMZN')

y = stock['adj_close'].pct_change().dropna()

y

In [15]:
# Call the sp500
data_sp500 = pd.read_csv(r'..\additional_data\sp500.csv')
data_sp500.set_index('Date', inplace=True)
data_sp500.index = pd.to_datetime(data_sp500.index)
data_sp500 = data_sp500.pct_change().dropna()

data_sp500

In [16]:
# Call the Risk Free Rate
key = '0174cb93931388a2bf305663e4117fd3'
data_rfr = get_fred_data('DGS2', key)
data_rfr.dropna(inplace=True)
data_rfr.name = 'rfr'
data_rfr = data_rfr.loc['2015':]

data_rfr

In [30]:
daily_rfr = (((1 + (data_rfr.div(100)))**(1/360)) - 1)
daily_rfr.dropna(inplace=True)

daily_rfr

In [31]:
# Create the X Matrix
x = pd.DataFrame(index = y.index)

# Variables
x['daily_rfr'] = daily_rfr
x['mkt_returns'] = data_sp500

# Forward Fill
x = x.ffill()

x

In [32]:
# Let us check the correlations

correlation = pd.concat([y, x], axis = 1).corr()

correlation

In [33]:
# The matrix

x = sm.add_constant(x)

Y_Vector = y.dropna()
Information_Matrix = x.dropna()

In [34]:
print(Y_Vector.shape)
print(Information_Matrix.shape)

In [35]:
#Model specification
model = sm.OLS(
    Y_Vector, 
    Information_Matrix,
    missing='drop'
    )   
     
#the results of the model
results = model.fit() 
    
#The Parameters
R2 = results.rsquared  

#here we check the summary
print(results.summary())   

In [36]:
# Let us calculate the betas and the penalization
Information_Matrix_T = Information_Matrix.transpose()

# Information Matrix Squared
A = Information_Matrix_T.dot(Information_Matrix)

# X*Y
b = Information_Matrix_T.dot(Y_Vector)

In [66]:
# The Constrained OLS implies a monotonic transformation

display(Math(r"\beta=(X^⊤X)^{-1}(X^⊤Y)-P"))
display(Math(r"P=\frac{R^⊤(X^⊤X)^{-1}(X^⊤Y)-q}{R^⊤(X^⊤X)^{-1}R}(X^⊤X)^{-1}R"))

In [39]:
# Calculate the penalization components

n = len(Y_Vector)
k = len(Information_Matrix.columns)

iota = np.ones(k).transpose()
iota[0] = 0
iota_T = iota.transpose()
A_inv = np.linalg.inv(A)

In [42]:
# The Penalization

P = (((iota_T @ A_inv @ b) - 1)/(iota_T @ A_inv @ iota)) * (A_inv @ iota)

print(P)

In [43]:
# Now Obtain the betas 

betas = (A_inv @ b)

print(betas)

In [45]:
# Now the adjusted betas

betas_adj = betas - P

betas_adj

In [47]:
# Obtain the fitted values

y_fitted = Information_Matrix @ betas_adj
y_fitted.name = 'fitted_values'

y_fitted

In [49]:
# Obtain the errors

residuals = y - y_fitted
residuals.name = 'residuals'

residuals

In [50]:
# Calculate the R-Squared

# RSS
SSR_restr = np.sum((Y_Vector - y_fitted)**2)

#TSS
SST = np.sum((Y_Vector - np.mean(Y_Vector))**2)

# R_Squared
R2_restr = 1 - (SSR_restr / SST)

print(f"The R2 of the unconstrained regression: {R2}")
print(f"The R2 of the constrained regression: {R2_restr}")

In [51]:
# Calculate Significance of the Constrained OLS

Residuals_Variance = SSR_restr/(n - k)

Covariance_Matrix = (Residuals_Variance)*A_inv

Beta_Standards_Errors = np.sqrt(Covariance_Matrix.diagonal())

T_Values = betas_adj/Beta_Standards_Errors

Beta_Lower_Limit = betas_adj - 1.96*Beta_Standards_Errors
Beta_Upper_Limit = betas_adj + 1.96*Beta_Standards_Errors

Proof_DF = pd.DataFrame(
    {
     "Betas": betas_adj,
     "Std": Beta_Standards_Errors,
     "T_Values": T_Values, 
     "Beta_Inferior_Limit": Beta_Lower_Limit, 
     "Beta_Superior_Limit": Beta_Upper_Limit
     }
    )

Proof_DF["p-values"] = 2*(t.sf(
    abs(Proof_DF.T_Values), 
    n-k,
    ).round(3)
    )

Proof_DF

In [52]:
display(Math(r"F=\frac{\left(SSR_{const}-SSR_{OLS}\right)/m}{SSR_{ols}/n-k}"))

In [53]:
# Test of validity of the constraints

# Obtain the OLS RSS
residuals_ols = results.resid
SSR_ols = np.sum(residuals_ols ** 2)

# Calculate the F-Stat

# Number of restrictions
m = 1  

# F-statistic
F_stat = ((SSR_restr - SSR_ols) / m) / (SSR_ols / (n - k))

F_stat

In [54]:
# The p-value

p_value = 1 - f.cdf(F_stat, m, n - k)

print("p-value:", p_value)

# Conclusion
if p_value < 0.05:
    print("Reject the null hypothesis: The constraint is NOT valid.")
else:
    print("Fail to reject the null: The constraint is valid.")

In [56]:
# Let us make the Wald Test

# Define the Restrictions R and q

R = np.array([[0, 1, 1]])  
q = np.array([[1]])

# get the variances of the OLS betas
var_beta_hat = results.cov_params()

var_beta_hat

In [57]:
# Compute Wald statistic
diff = R@betas - q  # (m x 1)
middle = R @ var_beta_hat @ R.T  # (m x m), scalar here since m=1

# Wald statistic (scalar)
W = diff.T @ np.linalg.inv(middle) @ diff
print("Wald statistic:", W)

In [60]:
# p-value from Chi-squared distribution with m degrees of freedom
m = R.shape[0]  # Number of restrictions
p_value = 1 - chi2.cdf(W, df=m)

print("p-value:", p_value[0][0])

# Conclusion
if p_value < 0.05:
    print("Reject the null hypothesis: The constraint is NOT valid.")
else:
    print("Fail to reject the null hypothesis: The constraint is valid.")

The restrictions are not valid just because the stock is practically uncorrelated with the risk-free rate. We cannot just choose the treasury bond rate to use as the risk-free rate.