# The Cahart Four Factor Model #

### Using Relative Strenght to approach Momentum ###

In [116]:
# Import Libraries

# Data Management
import pandas as pd
import numpy as np

# Plots
import matplotlib.pyplot as plt

# Statistics
import statsmodels.api as sm

# Manipulate Files
import os

# Pretty Notation
from IPython.display import display, Math

In [117]:
# Get the important data for the Risk Free Rate

rfr = pd.read_csv(r"..\additional_data\rfr.csv")
rfr = rfr.set_index('Date')
rfr.index = pd.to_datetime(rfr.index, dayfirst=True)

# Get the important data for the S&P500

sp500 = pd.read_csv(r"..\additional_data\sp500.csv")
sp500 = sp500.set_index('Date')
sp500.index = pd.to_datetime(sp500.index)

# Get the data for the Stocks' Betas

betas_df = pd.read_csv(r"..\additional_data\betas.csv")
betas_df = betas_df.set_index('Date')
betas_df.index = pd.to_datetime(betas_df.index)

In [118]:
# Folder Path
folder_path = r"..\stocks"

# Dictionary to store the DataFrames
dataframes = {}

# List all files in the folder
for file in os.listdir(folder_path):
    if file.endswith(".csv"):
        # Full path to the file
        file_path = os.path.join(folder_path, file)
        
        # Read the Excel file
        df = pd.read_csv(file_path)
        df = df.set_index("Date")
        df.index = pd.to_datetime(df.index)

        df = df[['Adjusted_close', 'Market_cap_calculado']]

        df = df.rename(columns={
            'Adjusted_close':'adj_close',
            'Market_cap_calculado':'market_cap',
        })

        # Fill nans
        df['adj_close'] = df['adj_close'].interpolate(method='linear')
        df['market_cap'] = df['market_cap'].interpolate(method='linear')

        df = df.loc['2015-01-01':]

        df.dropna(inplace=True)
        
        if len(df) >= 2000:
            # File name without extension
            file_name = os.path.splitext(file)[0]
            
            # Guardar en el diccionario
            dataframes[file_name] = df
            print(f"File loaded: {file_name} ({len(df)} rows)")
        else:
            print(f"File skipped (less than 2000 rows after cleaning): {file}")

print(f"\nTotal files loaded: {len(dataframes)}")
print("Files loaded:", list(dataframes.keys()))

In [119]:
# Create a whole new dataframe that contains all the stocks betas

rets_series = []

for stock, df in dataframes.items():
    series = df['adj_close'].pct_change(1).rename(stock)  
    series = series.iloc[1:]
    rets_series.append(series)

# Concat
returns_df = pd.concat(rets_series, axis=1)
returns_df = returns_df.apply(lambda x: x.fillna(x.mean()), axis=0)

returns_df.dropna(inplace = True)

returns_df

In [120]:
# Create a whole new dataframe that contains all the stocks betas

mktcap_series = []

for stock, df in dataframes.items():
    series = df['market_cap'].rename(stock)  
    series = series.iloc[1:]
    mktcap_series.append(series)

# Concat
mktcap_df = pd.concat(mktcap_series, axis=1)
mktcap_df = mktcap_df.apply(lambda x: x.fillna(x.mean()), axis=0)

# Drop nans
mktcap_df.dropna(inplace = True)

# Apply Logs and EMA (maybe)
mktcap_df = np.log(mktcap_df)
mktcap_df

In [121]:
# Create the Weights function
def wexp(N, half_life):
    c = np.log(0.5)/half_life
    n = np.array(range(N))
    w = np.exp(c*n)
    return np.flip(w/np.sum(w))

def n_days_nonmiss(returns, tiny_ret=1e-6):
    ix_ret_tiny = np.abs(returns) <= tiny_ret
    return np.sum(~ix_ret_tiny, axis=0)

def calc_rstr(returns, half_life=126, min_obs=100, tiny_ret=1e-6):
    rstr = np.log(1.+returns)
    if half_life == 0:
        weights = np.ones_like(rstr)
    else:
        weights = len(returns) * np.asmatrix(wexp(len(returns),half_life)).T
    rstr = np.sum(rstr * weights)
    idx = n_days_nonmiss(returns) < min_obs
    rstr.where(~idx, other=np.nan, inplace=True)
    df = pd.Series(rstr)
    df.name = returns.index[-1]
    return df

def rolling_calc_rstr(
        returns,
        window_size=252,
        half_life=126,
        min_obs=100
):
    rolling_results = []
    range_to_iter = range(len(returns) - window_size + 1)
    for i in range_to_iter:
        window_returns = returns.iloc[i:i + window_size]
        rs_i = calc_rstr(
            returns=window_returns,
            half_life=half_life,
            min_obs=min_obs
        )

        rolling_results.append(rs_i)

    return pd.concat(rolling_results, axis=1)


In [122]:
# Create a whole new dataframe that contains all the stocks betas
relative_strenght_long = rolling_calc_rstr(
    returns_df,
    window_size=252,
    half_life=126
).T

relative_strenght_short = rolling_calc_rstr(
    returns_df,
    window_size=28,
    half_life=14,
    min_obs=13
).T

In [123]:
relative_strenght = (relative_strenght_long - relative_strenght_short).dropna()

relative_strenght

In [124]:
# Create Plot

ticker = 'NVDA'

plt.figure(figsize=(10, 6))
plt.plot(relative_strenght[ticker], label=f'{ticker} Relative Strenght', alpha=1)
plt.axhline(y=0, color='black', linestyle='dashed')

# Config
plt.title('Relative Strenght Time Series')
plt.xlabel('Time')
plt.ylabel('Relative Strenght')
plt.legend()
plt.grid()

# Show
plt.show()

In [125]:
# Store the Data

relative_strenght.to_csv(r"..\additional_data\momentum.csv")

In [126]:
# Checar que dice Cahart al respecto >:
# Sharpe Ratio decay
# Fuerza de la señal dura un mes, entonces podemos hacer rebalanceo mensual para clasificar winners y losers pipipi
# Podríamos ver otros factores

### Calculate the Momentum Portfolios ###

In [128]:
# Define the Decomposition Function
def momentum_decomposition(
    target_df, 
    mktcap_df, 
    momentum_df
):
    # Common Indexes
    common_index = target_df.index.intersection(momentum_df.index).intersection(mktcap_df.index)
    
    # Reindex
    target_df = target_df.loc[common_index]
    mktcap_df = mktcap_df.loc[common_index]
    momentum_df = momentum_df.loc[common_index]

    # Initialize lists to store portfolio returns
    winner_list, neutral_list, loser_list = [], [], []
    
    # Get unique quarters
    months = sorted(set([date.to_period('M') for date in common_index]))
    
    # Dictionary to store quarterly classifications and weights
    monthly_classifications = {}

    for month in months:
        # Select only the last available date of the quarter
        month_dates = [date for date in common_index if date.to_period('M') == month]
        rebalance_date = month_dates[-1]  # Last day of the quarter
        
        # Momentum Factor for rebalance date
        momentum_factor_df = pd.DataFrame([momentum_df.loc[rebalance_date]], index=['mom']).T.dropna()
        
        # Classify stocks into Low, Neutral, and High based on quantiles
        lower = momentum_factor_df['mom'].quantile(0.3)
        upper = momentum_factor_df['mom'].quantile(0.7)

        momentum_factor_df['momentum_class'] = 'neutral'
        momentum_factor_df.loc[momentum_factor_df['mom'] <= lower, 'momentum_class'] = 'loser'
        momentum_factor_df.loc[momentum_factor_df['mom'] >= upper, 'momentum_class'] = 'winner'
        
        # Market cap data
        market_caps_df = pd.DataFrame([mktcap_df.loc[rebalance_date]], index=['mkt_cap']).T
        
        # Assign market caps to value classes
        loser_mktcap_df = market_caps_df.loc[momentum_factor_df[momentum_factor_df['momentum_class'] == 'loser'].index]
        neutral_mktcap_df = market_caps_df.loc[momentum_factor_df[momentum_factor_df['momentum_class'] == 'neutral'].index]
        winner_mktcap_df = market_caps_df.loc[momentum_factor_df[momentum_factor_df['momentum_class'] == 'winner'].index]
        
        # Compute weights
        loser_weights = loser_mktcap_df['mkt_cap'] / loser_mktcap_df['mkt_cap'].sum()
        neutral_weights = neutral_mktcap_df['mkt_cap'] / neutral_mktcap_df['mkt_cap'].sum()
        winner_weights = winner_mktcap_df['mkt_cap'] / winner_mktcap_df['mkt_cap'].sum()
        
        # Store classifications and weights
        monthly_classifications[month] = {
            "loser": loser_weights,
            "neutral": neutral_weights,
            "winner": winner_weights
        }
    
    # Iterate over all available dates to compute daily returns
    for date in common_index:
        month_key = date.to_period('M')  # Get quarter of the current date
        
        if month_key in monthly_classifications:
            # Retrieve stored classification and weights
            loser_weights = monthly_classifications[month_key]["loser"]
            neutral_weights = monthly_classifications[month_key]["neutral"]
            winner_weights = monthly_classifications[month_key]["winner"]
            
            # Retrieve daily returns
            target = pd.DataFrame([target_df.loc[date]], index=['returns']).T
            
            loser_returns = target.reindex(loser_weights.index).dropna()
            neutral_returns = target.reindex(neutral_weights.index).dropna()
            winner_returns = target.reindex(winner_weights.index).dropna()
            
            # Compute portfolio returns
            loser_result = loser_weights.reindex(loser_returns.index).T @ loser_returns
            neutral_result = neutral_weights.reindex(neutral_returns.index).T @ neutral_returns
            winner_result = winner_weights.reindex(winner_returns.index).T @ winner_returns
            
            # Store results
            loser_list.append(loser_result.values[0] if not loser_result.empty else None)
            neutral_list.append(neutral_result.values[0] if not neutral_result.empty else None)
            winner_list.append(winner_result.values[0] if not winner_result.empty else None)

    # Create final DataFrame
    momentum_portfolios = pd.DataFrame({
        'winner': winner_list,
        'neutral': neutral_list,
        'loser': loser_list
    }, index=common_index)
    
    return momentum_portfolios

In [129]:
# Create DataFrames

momentum_portfolios_returns = momentum_decomposition(returns_df, mktcap_df, relative_strenght)

momentum_portfolios_returns

In [130]:
# Create Plot

plt.figure(figsize=(10, 6))
plt.plot(momentum_portfolios_returns.cumsum(), label=momentum_portfolios_returns.columns, alpha=1)

# Config
plt.title('Cumulative Returns (Momentum Adjusted) Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()
plt.grid()

# Show
plt.show()

In [131]:
# Create DataFrames

momentum_portfolios_betas = momentum_decomposition(betas_df, mktcap_df, relative_strenght)

momentum_portfolios_betas

In [132]:
# Create Plot
plt.figure(figsize=(10, 6))
plt.plot(momentum_portfolios_betas.ewm(span=21, adjust = False).mean(), label=momentum_portfolios_betas.columns, alpha=1)
plt.axhline(y=1, color='black', linestyle='dashed')

# Config
plt.title('Betas (Size Adjusted) Time Series')
plt.xlabel('Time')
plt.ylabel('Betas')
plt.legend()
plt.grid()

# Show
plt.show()

In [133]:
# Create the data
daily_rfr = (((1 + (rfr['risk_free_rate'].div(100)))**(1/360)) - 1)
benchmark_returns = sp500['sp_500'].pct_change(1)

# Create the Excess Returns
market_excess_returns = benchmark_returns - daily_rfr

In [134]:
# Create the regression dataframe
momentum_regression_df = pd.DataFrame(index = momentum_portfolios_returns.index)

momentum_regression_df['winners_excess_returns'] = momentum_portfolios_returns['winner'] - daily_rfr
momentum_regression_df['losers_excess_returns'] = momentum_portfolios_returns['loser'] - daily_rfr
momentum_regression_df['market_excess_returns'] = market_excess_returns
momentum_regression_df.dropna(inplace = True)

momentum_regression_df

In [135]:
# Calculate the Beta for the Winner Portfolio

y = momentum_regression_df['winners_excess_returns']

x = momentum_regression_df['market_excess_returns']
x = sm.add_constant(x)

# Calculate Weights
window = len(y)
weights = window * wexp(window, window/2)

#Model specification
model = sm.WLS(
    y, 
    x,
    missing='drop',
    weights=weights,
    )   
     
#the results of the model
results = model.fit() 
    
#here we check the summary
print(results.summary())

In [136]:
# Calculate the Beta for the Loser Portfolio

y = momentum_regression_df['losers_excess_returns']

x = momentum_regression_df['market_excess_returns']
x = sm.add_constant(x)

# Calculate Weights
window = len(y)
weights = window * wexp(window, window/2)

#Model specification
model = sm.WLS(
    y, 
    x,
    missing='drop',
    weights=weights,
    )   
     
#the results of the model
results = model.fit() 
    
#here we check the summary
print(results.summary())

In [137]:
# Calculate the WML Premium

momentum_regression_df['WML'] = momentum_portfolios_returns['winner'] - momentum_portfolios_returns['loser']

In [138]:
plt.figure(figsize=(10, 6))
plt.plot(momentum_regression_df['WML'].cumsum(), label='WML Premium', color = 'salmon', alpha=1)

# Config
plt.title('WML Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()
plt.grid()

# Show
plt.show()

In [139]:
# Check the Correlation with the Market

momentum_regression_df['WML'].corr(momentum_regression_df['market_excess_returns'])

In [140]:
# Calculate the Beta for the WML premium

y = momentum_regression_df['WML']

x = momentum_regression_df['market_excess_returns']
x = sm.add_constant(x)

# Calculate Weights
window = len(y)
weights = window * wexp(window, window/2)

#Model specification
model = sm.WLS(
    y, 
    x,
    missing='drop',
    weights=weights,
    )   
     
#the results of the model
results = model.fit() 
    
#here we check the summary
print(results.summary())

In [141]:
# Store the Premium

momentum_regression_df['WML'].to_csv(r"..\additional_data\WML.csv")