# Multivariate Regression #

In [1]:
# Import Libraries

# Data Management
import pandas as pd
import numpy as np

# Visuals
import matplotlib.pyplot as plt

# Statistics
import statsmodels.api as sm

# Handle Files
import sys
import os

# Import Local Functions
sys.path.append(os.path.abspath("../source"))
from config import get_tickers
from data_downloader import get_market_data
from capm_toolkit import wexp

In [2]:
tickers = get_tickers(mod="3.4")

tickers

In [3]:
# Import data
data_regression = pd.DataFrame()

for ticker in tickers:
    df = get_market_data(
        ticker=ticker, 
        start_date='2010-01-01', 
        end_date='2025-01-01', 
        returns=True
    )
    
    returns = df['returns'].rename(ticker)
    
    data_regression = pd.concat([data_regression, returns], axis=1)
    
    print(f'Data Ready for {ticker}')

In [4]:
data_regression

In [12]:
# X Variables
benchmark = pd.read_csv("../additional_data/benchmark_returns.csv")
benchmark.set_index("Date", inplace=True)
benchmark.index = pd.to_datetime(benchmark.index)
benchmark = benchmark.loc['2010-01-05':]

benchmark

In [14]:
# Y Matrix
Y_Matrix = data_regression

# X Matrix
Information_Matrix = sm.add_constant(benchmark['benchmark_returns'])

In [15]:
def SimpleLinearRegressionCoefficients(
    y_matrix: pd.DataFrame, 
    information_matrix:pd.DataFrame
):
    # Check if both arrays have the same rows
    if information_matrix.shape[0] != y_matrix.shape[0]:
        raise ValueError("The rows are not coincident.")
    
    # Set the components
    X = information_matrix
    Y = y_matrix
    
    # Calculate the interaction arrays
    X_T = X.T
    XTX_inv = np.linalg.inv(X_T.dot(X))
    XTY = X_T.dot(Y)
    
    # Coefficients
    coef = XTX_inv.dot(XTY)
    
    # Fitted values and residuals
    fitted = X.dot(coef)
    residuals = Y.to_numpy() - fitted
    
    # Sigmas
    stds = residuals.std(axis=0, ddof=1).to_numpy()
    
    # Output Series
    alphas = pd.Series(coef[0], index=Y.columns, name='alpha')
    betas = pd.Series(coef[1], index=Y.columns, name='beta')
    sigmas = pd.Series(stds, index=Y.columns, name='sigma')
    
    return alphas, betas, sigmas


In [16]:
# Calculate the betas
alphas, betas, sigmas = SimpleLinearRegressionCoefficients(Y_Matrix, Information_Matrix)

coefficients = pd.DataFrame(
    {'alpha': alphas, 'beta': betas, 'sigma': sigmas},
)

coefficients

In [19]:
# Computationally, this method is more efficient, but the results are the same:
model = sm.OLS(Y_Matrix['WMT'], Information_Matrix)
results = model.fit()

print(results.summary())

In [20]:
# Now how can we make this rolling
def RollingOLSCoefficients(
    y_matrix: pd.DataFrame, 
    x_matrix: pd.Series,
    window: int = 252
) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    
    # Lists to store results
    alphas_list = []
    betas_list = []
    sigmas_list = []
    dates = []

    # Rolling window
    for i in range(window, len(y_matrix)):
        X_window = x_matrix[i - window:i]  # (252, 2)
        Y_window = y_matrix.iloc[i - window:i]  # (252, n_assets)

        # Calculate the coefficients
        try:
            alpha, beta, sigma = SimpleLinearRegressionCoefficients(
                Y_window,
                pd.DataFrame(X_window, index=Y_window.index)
            )
            alphas_list.append(alpha)
            betas_list.append(beta)
            sigmas_list.append(sigma)
            dates.append(y_matrix.index[i])

        except np.linalg.LinAlgError:
            # Singular matrix, skip this window
            continue

    # Create DataFrames
    alphas_df = pd.DataFrame(alphas_list, index=dates)
    betas_df = pd.DataFrame(betas_list, index=dates)
    sigmas_df = pd.DataFrame(sigmas_list, index=dates)

    return alphas_df, betas_df, sigmas_df

In [21]:
alphas_series, betas_series, sigmas_series = RollingOLSCoefficients(
    Y_Matrix, 
    Information_Matrix,
)

In [22]:
# Time Series Graphs
plt.figure(figsize=(10, 6))
plt.plot(alphas_series, label=alphas_series.columns, alpha=1)

# Config
plt.title('Alphas Time Series')
plt.xlabel('Time')
plt.ylabel('Alphas')
plt.legend()

# Show
plt.grid(True)
plt.show()

In [23]:
# Time Series Graphs
plt.figure(figsize=(10, 6))
plt.plot(betas_series, label=betas_series.columns, alpha=1)

# Config
plt.title('Betas Time Series')
plt.xlabel('Time')
plt.ylabel('Betas')
plt.legend()

# Show
plt.grid(True)
plt.show()

In [24]:
# Time Series Graphs
plt.figure(figsize=(10, 6))
plt.plot(sigmas_series, label=sigmas_series.columns, alpha=1)

# Config
plt.title('Sigmas Time Series')
plt.xlabel('Time')
plt.ylabel('Sigmas')
plt.legend()

# Show
plt.grid(True)
plt.show()