# Constrained Linear Regression Model #

### The CAPM decomposition ####

In [1]:
# Import Libraries

# Data Management
import pandas as pd
import numpy as np

#  Statistics
import statsmodels.api as sm 
from scipy.stats import t
from scipy.stats import f
from scipy.stats import chi2

# Handle Files
import sys
import os

# Import Local Functions
sys.path.append(os.path.abspath("../source"))
from functions import get_fred_data
from functions import import_financial_data

# Pretty Notation
from IPython.display import display, Math

In [2]:
# The CAPM Decomposition

display(Math(r"r_{stock} = rfr + \beta\left(r_{mkt}-rfr\right) + \varepsilon"))
display(Math(r"r_{stock} = \left(1-\beta\right)rfr + \beta\left(r_{mkt}\right) + \varepsilon"))

<IPython.core.display.Math object>

<IPython.core.display.Math object>

In [3]:
# Data for Y
stock = import_financial_data('AMZN')

y = stock['adj_close'].pct_change().dropna()

y

Date
2015-01-05   -0.020517
2015-01-06   -0.022833
2015-01-07    0.010600
2015-01-08    0.006836
2015-01-09   -0.011749
                ...   
2024-10-22    0.003332
2024-10-23   -0.026305
2024-10-24    0.009041
2024-10-25    0.007780
2024-10-28    0.002981
Name: adj_close, Length: 2471, dtype: float64

In [4]:
# Call the sp500
data_sp500 = pd.read_csv(r'..\additional_data\sp500.csv')
data_sp500.set_index('Date', inplace=True)
data_sp500.index = pd.to_datetime(data_sp500.index)
data_sp500 = data_sp500.pct_change().dropna()

data_sp500

Unnamed: 0_level_0,sp_500
Date,Unnamed: 1_level_1
2015-01-05,-0.018278
2015-01-06,-0.008893
2015-01-07,0.011630
2015-01-08,0.017888
2015-01-09,-0.008404
...,...
2024-12-24,0.011043
2024-12-26,-0.000406
2024-12-27,-0.011056
2024-12-30,-0.010702


In [5]:
# Call the Risk Free Rate
key = '0174cb93931388a2bf305663e4117fd3'
data_rfr = get_fred_data('DGS2', key)
data_rfr.dropna(inplace=True)
data_rfr.name = 'rfr'
data_rfr = data_rfr.loc['2015':]

data_rfr

2015-01-02    0.66
2015-01-05    0.68
2015-01-06    0.65
2015-01-07    0.62
2015-01-08    0.62
              ... 
2025-04-11    3.96
2025-04-14    3.84
2025-04-15    3.84
2025-04-16    3.77
2025-04-17    3.81
Name: rfr, Length: 2575, dtype: float64

In [6]:
daily_rfr = (((1 + (data_rfr.div(100)))**(1/360)) - 1)
daily_rfr.dropna(inplace=True)

daily_rfr

2015-01-02    0.000018
2015-01-05    0.000019
2015-01-06    0.000018
2015-01-07    0.000017
2015-01-08    0.000017
                ...   
2025-04-11    0.000108
2025-04-14    0.000105
2025-04-15    0.000105
2025-04-16    0.000103
2025-04-17    0.000104
Name: rfr, Length: 2575, dtype: float64

In [7]:
# Create the X Matrix
x = pd.DataFrame(index = y.index)

# Variables
x['daily_rfr'] = daily_rfr
x['mkt_returns'] = data_sp500

# Forward Fill
x = x.ffill()

x

Unnamed: 0_level_0,daily_rfr,mkt_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-05,0.000019,-0.018278
2015-01-06,0.000018,-0.008893
2015-01-07,0.000017,0.011630
2015-01-08,0.000017,0.017888
2015-01-09,0.000016,-0.008404
...,...,...
2024-10-22,0.000110,-0.000475
2024-10-23,0.000111,-0.009191
2024-10-24,0.000111,0.002146
2024-10-25,0.000112,-0.000299


In [8]:
# Let us check the correlations

correlation = pd.concat([y, x], axis = 1).corr()

correlation

Unnamed: 0,adj_close,daily_rfr,mkt_returns
adj_close,1.0,-0.019391,0.62415
daily_rfr,-0.019391,1.0,-0.004452
mkt_returns,0.62415,-0.004452,1.0


In [9]:
# The matrix

x = sm.add_constant(x)

Y_Vector = y.dropna()
Information_Matrix = x.dropna()

In [10]:
print(Y_Vector.shape)
print(Information_Matrix.shape)

(2471,)
(2471, 3)


In [11]:
#Model specification
model = sm.OLS(
    Y_Vector, 
    Information_Matrix,
    missing='drop'
    )   
     
#the results of the model
results = model.fit() 
    
#The Parameters
R2 = results.rsquared  

#here we check the summary
print(results.summary())   

                            OLS Regression Results                            
Dep. Variable:              adj_close   R-squared:                       0.390
Model:                            OLS   Adj. R-squared:                  0.389
Method:                 Least Squares   F-statistic:                     788.4
Date:                Fri, 18 Apr 2025   Prob (F-statistic):          1.73e-265
Time:                        19:07:16   Log-Likelihood:                 6691.9
No. Observations:                2471   AIC:                        -1.338e+04
Df Residuals:                    2468   BIC:                        -1.336e+04
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.0011      0.001      2.108      

In [12]:
# Let us calculate the betas and the penalization
Information_Matrix_T = Information_Matrix.transpose()

# Information Matrix Squared
A = Information_Matrix_T.dot(Information_Matrix)

# X*Y
b = Information_Matrix_T.dot(Y_Vector)

In [13]:
# The Constrained OLS implies a monotonic transformation

display(Math(r"\beta=(X^⊤X)^{-1}(X^⊤Y)-P"))
display(Math(r"P=\frac{R^⊤(X^⊤X)^{-1}(X^⊤Y)-q}{R^⊤(X^⊤X)^{-1}R}(X^⊤X)^{-1}R"))

<IPython.core.display.Math object>

<IPython.core.display.Math object>

In [14]:
# Calculate the penalization components

n = len(Y_Vector)
k = len(Information_Matrix.columns)

iota = np.ones(k).transpose()
iota[0] = 0
iota_T = iota.transpose()
A_inv = np.linalg.inv(A)

In [15]:
# The Penalization

P = (((iota_T @ A_inv @ b) - 1)/(iota_T @ A_inv @ iota)) * (A_inv @ iota)

print(P)

[ 4.28100191e-04 -7.98329571e+00 -2.45110245e-04]


In [16]:
# Now Obtain the betas 

betas = (A_inv @ b)

print(betas)

[ 1.10669434e-03 -8.12704536e+00  1.14350454e+00]


In [17]:
# Now the adjusted betas

betas_adj = betas - P

betas_adj

array([ 6.78594144e-04, -1.43749648e-01,  1.14374965e+00])

In [18]:
# Obtain the fitted values

y_fitted = Information_Matrix @ betas_adj
y_fitted.name = 'fitted_values'

y_fitted

Date
2015-01-05   -0.020230
2015-01-06   -0.009496
2015-01-07    0.013978
2015-01-08    0.021136
2015-01-09   -0.008936
                ...   
2024-10-22    0.000120
2024-10-23   -0.009850
2024-10-24    0.003117
2024-10-25    0.000320
2024-10-28    0.003695
Name: fitted_values, Length: 2471, dtype: float64

In [19]:
# Obtain the errors

residuals = y - y_fitted
residuals.name = 'residuals'

residuals

Date
2015-01-05   -0.000288
2015-01-06   -0.013337
2015-01-07   -0.003378
2015-01-08   -0.014300
2015-01-09   -0.002813
                ...   
2024-10-22    0.003212
2024-10-23   -0.016455
2024-10-24    0.005924
2024-10-25    0.007460
2024-10-28   -0.000714
Name: residuals, Length: 2471, dtype: float64

In [20]:
# Calculate the R-Squared

# RSS
SSR_restr = np.sum((Y_Vector - y_fitted)**2)

#TSS
SST = np.sum((Y_Vector - np.mean(Y_Vector))**2)

# R_Squared
R2_restr = 1 - (SSR_restr / SST)

print(f"The R2 of the unconstrained regression: {R2}")
print(f"The R2 of the constrained regression: {R2_restr}")

The R2 of the unconstrained regression: 0.38983898269772455
The R2 of the constrained regression: 0.3895726971511132


In [21]:
# Calculate Significance of the Constrained OLS

Residuals_Variance = SSR_restr/(n - k)

Covariance_Matrix = (Residuals_Variance)*A_inv

Beta_Standards_Errors = np.sqrt(Covariance_Matrix.diagonal())

T_Values = betas_adj/Beta_Standards_Errors

Beta_Lower_Limit = betas_adj - 1.96*Beta_Standards_Errors
Beta_Upper_Limit = betas_adj + 1.96*Beta_Standards_Errors

Proof_DF = pd.DataFrame(
    {
     "Betas": betas_adj,
     "Std": Beta_Standards_Errors,
     "T_Values": T_Values, 
     "Beta_Inferior_Limit": Beta_Lower_Limit, 
     "Beta_Superior_Limit": Beta_Upper_Limit
     }
    )

Proof_DF["p-values"] = 2*(t.sf(
    abs(Proof_DF.T_Values), 
    n-k,
    ).round(3)
    )

Proof_DF

Unnamed: 0,Betas,Std,T_Values,Beta_Inferior_Limit,Beta_Superior_Limit,p-values
0,0.000679,0.000525,1.292058,-0.000351,0.001708,0.196
1,-0.14375,7.694065,-0.018683,-15.224117,14.936618,0.986
2,1.14375,0.028817,39.690062,1.087268,1.200231,0.0


In [22]:
display(Math(r"F=\frac{\left(SSR_{const}-SSR_{OLS}\right)/m}{SSR_{ols}/n-k}"))

<IPython.core.display.Math object>

In [23]:
# Test of validity of the constraints

# Obtain the OLS RSS
residuals_ols = results.resid
SSR_ols = np.sum(residuals_ols ** 2)

# Calculate the F-Stat

# Number of restrictions
m = 1  

# F-statistic
F_stat = ((SSR_restr - SSR_ols) / m) / (SSR_ols / (n - k))

F_stat

1.077080820309646

In [24]:
# The p-value

p_value = 1 - f.cdf(F_stat, m, n - k)

print("p-value:", p_value)

# Conclusion
if p_value < 0.05:
    print("Reject the null hypothesis: The constraint is NOT valid.")
else:
    print("Fail to reject the null: The constraint is valid.")

p-value: 0.29945318297819434
Fail to reject the null: The constraint is valid.


In [25]:
# Let us make the Wald Test

# Define the Restrictions R and q

R = np.array([[0, 1, 1]])  
q = np.array([[1]])

# get the variances of the OLS betas
var_beta_hat = results.cov_params()

var_beta_hat

Unnamed: 0,const,daily_rfr,mkt_returns
const,2.757192e-07,-0.003173,-4.553023e-07
daily_rfr,-0.00317271,59.172815,0.0009867478
mkt_returns,-4.553023e-07,0.000987,0.0008300589


In [26]:
# Compute Wald statistic
diff = R@betas - q  # (m x 1)
middle = R @ var_beta_hat @ R.T  # (m x m), scalar here since m=1

# Wald statistic (scalar)
W = diff.T @ np.linalg.inv(middle) @ diff
print("Wald statistic:", W)

Wald statistic: [[1.07708082]]


In [27]:
# p-value from Chi-squared distribution with m degrees of freedom
m = R.shape[0]  # Number of restrictions
p_value = 1 - chi2.cdf(W, df=m)

print("p-value:", p_value[0][0])

# Conclusion
if p_value < 0.05:
    print("Reject the null hypothesis: The constraint is NOT valid.")
else:
    print("Fail to reject the null hypothesis: The constraint is valid.")

p-value: 0.29935151544362903
Fail to reject the null hypothesis: The constraint is valid.


The restrictions are not valid just because the stock is practically uncorrelated with the risk-free rate. We cannot just choose the treasury bond rate to use as the risk-free rate.

### Calculate the RFR ###

In [30]:
# Calculate a Model without the risk free rate

X_alt = x['mkt_returns'].dropna()

model_alt = sm.OLS(Y_Vector, sm.add_constant(X_alt), missing='drop')

results = model_alt.fit()

print(results.summary())

                            OLS Regression Results                            
Dep. Variable:              adj_close   R-squared:                       0.390
Model:                            OLS   Adj. R-squared:                  0.389
Method:                 Least Squares   F-statistic:                     1576.
Date:                Fri, 18 Apr 2025   Prob (F-statistic):          6.08e-267
Time:                        19:13:33   Log-Likelihood:                 6691.4
No. Observations:                2471   AIC:                        -1.338e+04
Df Residuals:                    2469   BIC:                        -1.337e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.0007      0.000      2.065      

In [32]:
# Calculate the beta

regression_alpha = results.params.iloc[0]
regression_beta = results.params.iloc[1]

print(f'Regression Alpha: {regression_alpha}')
print(f'Regression Beta: {regression_beta}')

Regression Alpha: 0.0006709409079134515
Regression Beta: 1.1436400617129185


In [36]:
# Real Risk Free Rate

real_rfr = regression_alpha/(1-regression_beta)

real_rfr * 100

-0.4670987327020266