# Cahart Four Factor Model #

### Using Beta as a Factor ###

In [116]:
# Import Libraries

# Data Management
import pandas as pd
import numpy as np

# Plots
import matplotlib.pyplot as plt

# Statistics
import statsmodels.api as sm

# Manipulate Files
import os

# Pretty Notation
from IPython.display import display, Math

In [117]:
# Get the important data for the Risk Free Rate

rfr = pd.read_csv(r"..\additional_data\rfr.csv")
rfr = rfr.set_index('Date')
rfr.index = pd.to_datetime(rfr.index, dayfirst=True)

# Get the important data for the S&P500

sp500 = pd.read_csv(r"..\additional_data\sp500.csv")
sp500 = sp500.set_index('Date')
sp500.index = pd.to_datetime(sp500.index)

# Get the data for the Stocks' Betas

betas_df = pd.read_csv(r"..\additional_data\betas.csv")
betas_df = betas_df.set_index('Date')
betas_df.index = pd.to_datetime(betas_df.index)

In [118]:
# Folder Path
folder_path = r"..\stocks"

# Dictionary to store the DataFrames
dataframes = {}

# List all files in the folder
for file in os.listdir(folder_path):
    if file.endswith(".csv"):
        # Full path to the file
        file_path = os.path.join(folder_path, file)
        
        # Read the Excel file
        df = pd.read_csv(file_path)
        df = df.set_index("Date")
        df.index = pd.to_datetime(df.index)

        df = df[['Adjusted_close', 'Market_cap_calculado']]

        df = df.rename(columns={
            'Adjusted_close':'adj_close',
            'Market_cap_calculado':'market_cap',
        })

        # Fill nans
        df['adj_close'] = df['adj_close'].interpolate(method='linear')
        df['market_cap'] = df['market_cap'].interpolate(method='linear')

        df = df.loc['2015-01-01':]

        df.dropna(inplace=True)
        
        if len(df) >= 2000:
            # File name without extension
            file_name = os.path.splitext(file)[0]
            
            # Guardar en el diccionario
            dataframes[file_name] = df
            print(f"File loaded: {file_name} ({len(df)} rows)")
        else:
            print(f"File skipped (less than 2000 rows after cleaning): {file}")

print(f"\nTotal files loaded: {len(dataframes)}")
print("Files loaded:", list(dataframes.keys()))

In [119]:
# Create a whole new dataframe that contains all the stocks betas

rets_series = []

for stock, df in dataframes.items():
    series = df['adj_close'].pct_change(1).rename(stock)  
    series = series.iloc[1:]
    rets_series.append(series)

# Concat
returns_df = pd.concat(rets_series, axis=1)
returns_df = returns_df.apply(lambda x: x.fillna(x.mean()), axis=0)

returns_df.dropna(inplace = True)

returns_df

In [120]:
# Create a whole new dataframe that contains all the stocks betas

mktcap_series = []

for stock, df in dataframes.items():
    series = df['market_cap'].rename(stock)  
    series = series.iloc[1:]
    mktcap_series.append(series)

# Concat
mktcap_df = pd.concat(mktcap_series, axis=1)
mktcap_df = mktcap_df.apply(lambda x: x.fillna(x.mean()), axis=0)

# Drop nans
mktcap_df.dropna(inplace = True)

# Apply Logs and EMA (maybe)
mktcap_df = np.log(mktcap_df)
mktcap_df

In [121]:
# See the betas
betas_df = betas_df[returns_df.columns]

betas_df

In [122]:
# Create Plot

ticker = 'NVDA'

plt.figure(figsize=(10, 6))
plt.plot(betas_df[ticker], label=f'{ticker} Rolling Beta', alpha=1)
plt.axhline(y=1, color='black', linestyle='dashed')

# Config
plt.title('Rolling Beta Time Series')
plt.xlabel('Time')
plt.ylabel('Beta')
plt.legend()
plt.grid()

# Show
plt.show()

In [123]:
# Define the Decomposition Function
def betas_decomposition(
    target_df, 
    mktcap_df, 
    betas_df
):
    # Common Indexes
    common_index = target_df.index.intersection(betas_df.index).intersection(mktcap_df.index)
    
    # Reindex
    target_df = target_df.loc[common_index]
    mktcap_df = mktcap_df.loc[common_index]
    betas_df = betas_df.loc[common_index]

    # Initialize lists to store portfolio returns
    aggressive_list, neutral_list, defensive_list = [], [], []
    
    # Get unique quarters
    months = sorted(set([date.to_period('M') for date in common_index]))
    
    # Dictionary to store quarterly classifications and weights
    monthly_classifications = {}

    for month in months:
        # Select only the last available date of the quarter
        month_dates = [date for date in common_index if date.to_period('M') == month]
        rebalance_date = month_dates[-1]  # Last day of the quarter
        
        # Value factor (P/B ratio) for rebalance date
        betas_factor_df = pd.DataFrame([betas_df.loc[rebalance_date]], index=['beta']).T.dropna()
        
        # Classify stocks into Low, Neutral, and High based on quantiles
        lower = betas_factor_df['beta'].quantile(0.3)
        upper = betas_factor_df['beta'].quantile(0.7)

        betas_factor_df['beta_class'] = 'neutral'
        betas_factor_df.loc[betas_factor_df['beta'] <= lower, 'beta_class'] = 'defensive'
        betas_factor_df.loc[betas_factor_df['beta'] >= upper, 'beta_class'] = 'aggressive'
        
        # Market cap data
        market_caps_df = pd.DataFrame([mktcap_df.loc[rebalance_date]], index=['mkt_cap']).T
        
        # Assign market caps to value classes
        defensive_mktcap_df = market_caps_df.loc[betas_factor_df[betas_factor_df['beta_class'] == 'defensive'].index]
        neutral_mktcap_df = market_caps_df.loc[betas_factor_df[betas_factor_df['beta_class'] == 'neutral'].index]
        aggressive_mktcap_df = market_caps_df.loc[betas_factor_df[betas_factor_df['beta_class'] == 'aggressive'].index]
        
        # Compute weights
        defensive_weights = defensive_mktcap_df['mkt_cap'] / defensive_mktcap_df['mkt_cap'].sum()
        neutral_weights = neutral_mktcap_df['mkt_cap'] / neutral_mktcap_df['mkt_cap'].sum()
        aggressive_weights = aggressive_mktcap_df['mkt_cap'] / aggressive_mktcap_df['mkt_cap'].sum()
        
        # Store classifications and weights
        monthly_classifications[month] = {
            "defensive": defensive_weights,
            "neutral": neutral_weights,
            "aggressive": aggressive_weights
        }
    
    # Iterate over all available dates to compute daily returns
    for date in common_index:
        month_key = date.to_period('M')  # Get quarter of the current date
        
        if month_key in monthly_classifications:
            # Retrieve stored classification and weights
            defensive_weights = monthly_classifications[month_key]["defensive"]
            neutral_weights = monthly_classifications[month_key]["neutral"]
            aggressive_weights = monthly_classifications[month_key]["aggressive"]
            
            # Retrieve daily returns
            target = pd.DataFrame([target_df.loc[date]], index=['returns']).T
            
            defensive_returns = target.reindex(defensive_weights.index).dropna()
            neutral_returns = target.reindex(neutral_weights.index).dropna()
            aggressive_returns = target.reindex(aggressive_weights.index).dropna()
            
            # Compute portfolio returns
            defensive_result = defensive_weights.reindex(defensive_returns.index).T @ defensive_returns
            neutral_result = neutral_weights.reindex(neutral_returns.index).T @ neutral_returns
            aggressive_result = aggressive_weights.reindex(aggressive_returns.index).T @ aggressive_returns
            
            # Store results
            defensive_list.append(defensive_result.values[0] if not defensive_result.empty else None)
            neutral_list.append(neutral_result.values[0] if not neutral_result.empty else None)
            aggressive_list.append(aggressive_result.values[0] if not aggressive_result.empty else None)

    # Create final DataFrame
    betas_portfolios = pd.DataFrame({
        'aggressive': aggressive_list,
        'neutral': neutral_list,
        'defensive': defensive_list
    }, index=common_index)
    
    return betas_portfolios

In [124]:
# Create DataFrames

betas_portfolios_returns = betas_decomposition(returns_df, mktcap_df, betas_df)

betas_portfolios_returns

In [125]:
# Create Plot

plt.figure(figsize=(10, 6))
plt.plot(betas_portfolios_returns.cumsum(), label=betas_portfolios_returns.columns, alpha=1)

# Config
plt.title('Cumulative Returns (Beta Adjusted) Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()
plt.grid()

# Show
plt.show()

In [126]:
# Create the data
daily_rfr = (((1 + (rfr['risk_free_rate'].div(100)))**(1/360)) - 1)
benchmark_returns = sp500['sp_500'].pct_change(1)

# Create the Excess Returns
market_excess_returns = benchmark_returns - daily_rfr

In [127]:
# Create the regression dataframe
beta_regression_df = pd.DataFrame(index = betas_portfolios_returns.index)

beta_regression_df['aggressive_excess_returns'] = betas_portfolios_returns['aggressive'] - daily_rfr
beta_regression_df['defensive_excess_returns'] = betas_portfolios_returns['defensive'] - daily_rfr
beta_regression_df['market_excess_returns'] = market_excess_returns
beta_regression_df.dropna(inplace = True)

beta_regression_df

In [128]:
# Create the Weights function
def wexp(N, half_life):
    c = np.log(0.5)/half_life
    n = np.array(range(N))
    w = np.exp(c*n)
    return np.flip(w/np.sum(w))

In [129]:
# Calculate the Beta for the Aggressive Portfolio

y = beta_regression_df['aggressive_excess_returns']

x = beta_regression_df['market_excess_returns']
x = sm.add_constant(x)

# Calculate Weights
window = len(y)
weights = window * wexp(window, window/2)

#Model specification
model = sm.WLS(
    y, 
    x,
    missing='drop',
    weights=weights,
    )   
     
#the results of the model
results = model.fit() 
    
#here we check the summary
print(results.summary())

In [130]:
# Calculate the Beta for the Defensive Portfolio

y = beta_regression_df['defensive_excess_returns']

x = beta_regression_df['market_excess_returns']
x = sm.add_constant(x)

# Calculate Weights
window = len(y)
weights = window * wexp(window, window/2)

#Model specification
model = sm.WLS(
    y, 
    x,
    missing='drop',
    weights=weights,
    )   
     
#the results of the model
results = model.fit() 
    
#here we check the summary
print(results.summary())

In [131]:
# Calculate the DMA Premium

beta_regression_df['AMD'] = betas_portfolios_returns['aggressive'] - betas_portfolios_returns['defensive']

In [132]:
plt.figure(figsize=(10, 6))
plt.plot(beta_regression_df['AMD'].cumsum(), label='AMD Premium', color = 'salmon', alpha=1)

# Config
plt.title('AMD Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()
plt.grid()

# Show
plt.show()

In [133]:
# Check the Correlation with the Market

beta_regression_df['AMD'].corr(beta_regression_df['market_excess_returns'])

In [134]:
# Calculate the Beta for the Defensive Portfolio

y = beta_regression_df['AMD']

x = beta_regression_df['market_excess_returns']
x = sm.add_constant(x)

# Calculate Weights
window = len(y)
weights = window * wexp(window, window/2)

#Model specification
model = sm.WLS(
    y, 
    x,
    missing='drop',
    weights=weights,
    )   
     
#the results of the model
results = model.fit() 
    
#here we check the summary
print(results.summary())

In [135]:
# Store the Premium

beta_regression_df['AMD'].to_csv(r"..\additional_data\AMD.csv")