# Fama and French Factor Model #

### Import Data ###

In [3]:
# Import Libraries

# Data Management
import pandas as pd
import numpy as np

# Plots
import matplotlib.pyplot as plt

# Statistics
import statsmodels.api as sm

# Manipulate Files
import os

# Pretty Notation
from IPython.display import display, Math

In [4]:
# Get the important data for the Risk Free Rate

rfr = pd.read_csv(r"..\additional_data\rfr.csv")
rfr = rfr.set_index('Date')
rfr.index = pd.to_datetime(rfr.index, dayfirst=True)

# Get the important data for the S&P500

sp500 = pd.read_csv(r"..\additional_data\sp500.csv")
sp500 = sp500.set_index('Date')
sp500.index = pd.to_datetime(sp500.index)

# Get the data for the Stocks' Betas

betas_df = pd.read_csv(r"..\additional_data\betas.csv")
betas_df = betas_df.set_index('Date')
betas_df.index = pd.to_datetime(betas_df.index)

In [5]:
# Create the Weights function
def wexp(N, half_life):
    c = np.log(0.5)/half_life
    n = np.array(range(N))
    w = np.exp(c*n)
    return np.flip(w/np.sum(w))

# Create the CAPM 
def CAPM(
    stock_prices: pd.Series, 
    benchmark_prices: pd.Series = sp500['sp_500'], 
    risk_free_rate: pd.Series = rfr['risk_free_rate'], 
    window: int = 252,
    WLS: bool = True,
):

    # Align time series to the same date range
    common_index = stock_prices.index.intersection(benchmark_prices.index).intersection(risk_free_rate.index)
    stock_prices = stock_prices.loc[common_index]
    benchmark_prices = benchmark_prices.loc[common_index]
    risk_free_rate = risk_free_rate.loc[common_index]
    
    # Compute daily returns
    stock_returns = stock_prices.pct_change(1)
    benchmark_returns = benchmark_prices.pct_change(1)
    risk_free_daily = (((1 + (risk_free_rate.div(100)))**(1/360)) - 1)  # Convert annual rate to daily
    
    # Excess returns
    excess_stock = stock_returns - risk_free_daily
    excess_benchmark = benchmark_returns - risk_free_daily

    alphas, betas = [], []

    # Create weights with exponential decay
    weights = window * wexp(window, window/2)
    
    for t in range(window, len(stock_returns)):
        X = excess_benchmark.iloc[t-window:t]
        y = excess_stock.iloc[t-window:t]
        
        if X.isnull().any() or y.isnull().any():
            continue

        if WLS:
            
            # Fit WLS regression
            model = sm.WLS(y, sm.add_constant(X), weights=weights, missing='drop').fit()

        else:

            # Fit OLS regression
            model = sm.OLS(y, sm.add_constant(X), missing='drop').fit()

        # Avoid KeyError by checking if params exist
        params = model.params
        
        alphas.append(params.iloc[0])
        betas.append(params.iloc[1])
            
    parameters = pd.DataFrame({
        'alpha': alphas,
        'beta': betas,
    }, index=stock_returns.index[window+1:])
    
    return parameters

In [6]:
# Folder Path
folder_path = r"..\stocks"

# Dictionary to store the DataFrames
dataframes = {}

# List all files in the folder
for file in os.listdir(folder_path):
    if file.endswith(".csv"):
        # Full path to the file
        file_path = os.path.join(folder_path, file)
        
        # Read the Excel file
        df = pd.read_csv(file_path)
        df = df.set_index("Date")
        df.index = pd.to_datetime(df.index)

        df = df[['Adjusted_close', 'Market_cap_calculado', 'Price_to_Book_inverse']]

        df = df.rename(columns={
            'Adjusted_close':'adj_close',
            'Market_cap_calculado':'market_cap',
            'Price_to_Book_inverse':'book_to_price',
        })

        # Fill nans
        df['adj_close'] = df['adj_close'].interpolate(method='linear')
        df['market_cap'] = df['market_cap'].interpolate(method='linear')
        df['book_to_price'] = df['book_to_price'].interpolate(method='linear')

        df = df.loc['2015-01-01':]

        df.dropna(inplace=True)
        
        if len(df) >= 2000:
            # File name without extension
            file_name = os.path.splitext(file)[0]
            
            # Guardar en el diccionario
            dataframes[file_name] = df
            print(f"File loaded: {file_name} ({len(df)} rows)")
        else:
            print(f"File skipped (less than 2000 rows after cleaning): {file}")

print(f"\nTotal files loaded: {len(dataframes)}")
print("Files loaded:", list(dataframes.keys()))

In [7]:
# Create a whole new dataframe that contains all the stocks betas

rets_series = []

for stock, df in dataframes.items():
    series = df['adj_close'].pct_change(1).rename(stock)  
    series = series.iloc[1:]
    rets_series.append(series)

# Concat
returns_df = pd.concat(rets_series, axis=1)
returns_df = returns_df.apply(lambda x: x.fillna(x.mean()), axis=0)

# Drop nans
returns_df.dropna(inplace = True)

returns_df

In [8]:
# Create a whole new dataframe that contains all the stocks betas

mktcap_series = []

for stock, df in dataframes.items():
    series = df['market_cap'].rename(stock)  
    series = series.iloc[1:]
    mktcap_series.append(series)

# Concat
mktcap_df = pd.concat(mktcap_series, axis=1)
mktcap_df = mktcap_df.apply(lambda x: x.fillna(x.mean()), axis=0)

# Drop nans
mktcap_df.dropna(inplace = True)

# Apply Logs and EMA (maybe)
mktcap_df = np.log(mktcap_df)
#mktcap_df = mktcap_df.ewm(span=21, adjust = False).mean()
mktcap_df

In [9]:
# Create a whole new dataframe that contains all the stocks betas

btp_series = []

for stock, df in dataframes.items():
    series = df['book_to_price'].rename(stock)  
    series = series.iloc[1:]
    btp_series.append(series)

# Concat
btp_df = pd.concat(btp_series, axis=1)
btp_df = btp_df.apply(lambda x: x.fillna(x.mean()), axis=0)

btp_df.dropna(inplace = True)
#btp_df = btp_df.ewm(span=21, adjust = False).mean()

btp_df

### Create the Fama & French Portfolios ###

In [11]:
# Define the Decomposition Function
def fama_and_french_decomposition(
    target_df, 
    mktcap_df, 
    value_df
):
    # Common Indexes
    common_index = target_df.index.intersection(value_df.index).intersection(mktcap_df.index)
    
    # Reindex
    target_df = target_df.loc[common_index]
    mktcap_df = mktcap_df.loc[common_index]
    value_df = value_df.loc[common_index]

    # Initialize lists to store portfolio returns
    small_low_list, small_neutral_list, small_high_list = [], [], []
    big_low_list, big_neutral_list, big_high_list = [], [], []
    
    # Get unique quarters
    quarters = sorted(set([date.to_period('Q') for date in common_index]))
    
    # Dictionary to store quarterly classifications and weights
    quarterly_classifications = {}

    for quarter in quarters:
        # Select only the last available date of the quarter
        quarter_dates = [date for date in common_index if date.to_period('Q') == quarter]
        rebalance_date = quarter_dates[-1]  # Last day of the quarter
        
        # Size factor for rebalance date
        size_factor_df = pd.DataFrame([mktcap_df.loc[rebalance_date]], index=['mkt_cap']).T.dropna()
        
        # Value factor (P/B ratio) for rebalance date
        value_factor_df = pd.DataFrame([value_df.loc[rebalance_date]], index=['btp']).T.dropna()

        # Threshold for size
        median = size_factor_df['mkt_cap'].median()

        # Classify stocks into Low, Neutral, and High based on quantiles
        lower = value_factor_df['btp'].quantile(0.3)
        upper = value_factor_df['btp'].quantile(0.7)

        # Merge the two
        combined_df = size_factor_df.join(value_factor_df, how='inner')

        # Classify for Size
        combined_df['size_class'] = 'small'
        combined_df.loc[combined_df['mkt_cap'] > median, 'size_class'] = 'big'

        # Classify for Value
        combined_df['value_class'] = 'neutral'
        combined_df.loc[combined_df['btp'] <= lower, 'value_class'] = 'low'
        combined_df.loc[combined_df['btp'] >= upper, 'value_class'] = 'high'
        
        # Create the FF Portfolios
        combined_df['ff_class'] = combined_df['size_class'] + '_' + combined_df['value_class']
        
        # Market cap data
        market_caps_df = pd.DataFrame([mktcap_df.loc[rebalance_date]], index=['mkt_cap']).T
        
        # Assign market caps to value classes
        small_low_mktcap_df = market_caps_df.loc[combined_df[combined_df['ff_class'] == 'small_low'].index]
        small_neutral_mktcap_df = market_caps_df.loc[combined_df[combined_df['ff_class'] == 'small_neutral'].index]
        small_high_mktcap_df = market_caps_df.loc[combined_df[combined_df['ff_class'] == 'small_high'].index]
        big_low_mktcap_df = market_caps_df.loc[combined_df[combined_df['ff_class'] == 'big_low'].index]
        big_neutral_mktcap_df = market_caps_df.loc[combined_df[combined_df['ff_class'] == 'big_neutral'].index]
        big_high_mktcap_df = market_caps_df.loc[combined_df[combined_df['ff_class'] == 'big_high'].index]
        
        # Compute weights
        small_low_weights = small_low_mktcap_df['mkt_cap'] / small_low_mktcap_df['mkt_cap'].sum()
        small_neutral_weights = small_neutral_mktcap_df['mkt_cap'] / small_neutral_mktcap_df['mkt_cap'].sum()
        small_high_weights = small_high_mktcap_df['mkt_cap'] / small_high_mktcap_df['mkt_cap'].sum()
        big_low_weights = big_low_mktcap_df['mkt_cap'] / big_low_mktcap_df['mkt_cap'].sum()
        big_neutral_weights = big_neutral_mktcap_df['mkt_cap'] / big_neutral_mktcap_df['mkt_cap'].sum()
        big_high_weights = big_high_mktcap_df['mkt_cap'] / big_high_mktcap_df['mkt_cap'].sum()
        
        # Store classifications and weights
        quarterly_classifications[quarter] = {
            "small_low": small_low_weights,
            "small_neutral": small_neutral_weights,
            "small_high": small_high_weights,
            "big_low": big_low_weights, 
            "big_neutral": big_neutral_weights,
            "big_high": big_high_weights,
        }
    
    # Iterate over all available dates to compute daily returns
    for date in common_index:
        quarter_key = date.to_period('Q')  # Get quarter of the current date
        
        if quarter_key in quarterly_classifications:
            # Retrieve stored classification and weights
            small_low_weights = quarterly_classifications[quarter_key]["small_low"]
            small_neutral_weights = quarterly_classifications[quarter_key]["small_neutral"]
            small_high_weights = quarterly_classifications[quarter_key]["small_high"]
            big_low_weights = quarterly_classifications[quarter_key]["big_low"]
            big_neutral_weights = quarterly_classifications[quarter_key]["big_neutral"]
            big_high_weights = quarterly_classifications[quarter_key]["big_high"]
            
            # Retrieve daily returns
            target = pd.DataFrame([target_df.loc[date]], index=['returns']).T
            
            small_low_returns = target.reindex(small_low_weights.index).dropna()
            small_neutral_returns = target.reindex(small_neutral_weights.index).dropna()
            small_high_returns = target.reindex(small_high_weights.index).dropna()
            big_low_returns = target.reindex(big_low_weights.index).dropna()
            big_neutral_returns = target.reindex(big_neutral_weights.index).dropna()
            big_high_returns = target.reindex(big_high_weights.index).dropna()
            
            # Compute portfolio returns
            small_low_result = small_low_weights.reindex(small_low_returns.index).T @ small_low_returns
            small_neutral_result = small_neutral_weights.reindex(small_neutral_returns.index).T @ small_neutral_returns
            small_high_result = small_high_weights.reindex(small_high_returns.index).T @ small_high_returns
            big_low_result = big_low_weights.reindex(big_low_returns.index).T @ big_low_returns
            big_neutral_result = big_neutral_weights.reindex(big_neutral_returns.index).T @ big_neutral_returns
            big_high_result = big_high_weights.reindex(big_high_returns.index).T @ big_high_returns
            
            # Store results
            small_low_list.append(small_low_result.values[0] if not small_low_result.empty else None)
            small_neutral_list.append(small_neutral_result.values[0] if not small_neutral_result.empty else None)
            small_high_list.append(small_high_result.values[0] if not small_high_result.empty else None)
            big_low_list.append(big_low_result.values[0] if not big_low_result.empty else None)
            big_neutral_list.append(big_neutral_result.values[0] if not big_neutral_result.empty else None)
            big_high_list.append(big_high_result.values[0] if not big_high_result.empty else None)

    # Create final DataFrame
    ff_portfolios = pd.DataFrame({
        'small_high': small_high_list,
        'small_neutral': small_neutral_list,
        'small_low': small_low_list,
        'big_high': big_high_list,
        'big_neutral': big_neutral_list,
        'big_low': big_low_list
    }, index=common_index)
    
    return ff_portfolios

In [12]:
# Create DataFrames

ff_portfolio_returns = fama_and_french_decomposition(returns_df, mktcap_df, btp_df)

ff_portfolio_returns

In [13]:
# Check the Annualized Mean Returns

ff_portfolio_returns.mean().mul(100).mul(252)

In [14]:
# Check the Annualized Volatility

ff_portfolio_returns.std().mul(100).mul(np.sqrt(252))

In [15]:
# Check the Risk Adjusted Returns

ff_portfolio_returns.mean().mul(100).mul(252) / ff_portfolio_returns.std().mul(100).mul(np.sqrt(252))

In [16]:
# Create Plot

plt.figure(figsize=(10, 6))
plt.plot(ff_portfolio_returns.cumsum(), label=ff_portfolio_returns.columns, alpha=1)

# Config
plt.title('Cumulative Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()
plt.grid()

# Show
plt.show()

In [17]:
# Create DataFrames

ff_portfolio_betas = fama_and_french_decomposition(betas_df, mktcap_df, btp_df)

ff_portfolio_betas

In [18]:
plt.figure(figsize=(10, 6))
plt.plot(ff_portfolio_betas.ewm(span=21, adjust = False).mean(), label=ff_portfolio_betas.columns, alpha=1)
plt.axhline(y=1, color='black', linestyle='dashed')

# Config
plt.title('Betas (Size Adjusted) Time Series')
plt.xlabel('Time')
plt.ylabel('Betas')
plt.legend()
plt.grid()

# Show
plt.show()

In [19]:
# Calculate the SMB Premium

SMB = (1/3)*(ff_portfolio_returns['small_low'] + ff_portfolio_returns['small_neutral'] + ff_portfolio_returns['small_high']) \
    - (1/3)*(ff_portfolio_returns['big_low'] + ff_portfolio_returns['big_neutral'] + ff_portfolio_returns['big_high'])

SMB

In [20]:
# Plot SMB

plt.figure(figsize=(10, 6))
plt.plot(SMB.cumsum(), label='SMB Premium', color = 'salmon', alpha=1)
plt.axhline(y=0, color='black', linestyle='dashed')

# Config
plt.title('SMB Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()
plt.grid()

# Show
plt.show()

In [21]:
# Calculate the HML Premium

HML = (1/2)*(ff_portfolio_returns['small_high'] + ff_portfolio_returns['big_high']) \
    - (1/2)*(ff_portfolio_returns['small_low'] + ff_portfolio_returns['big_low'])

HML

In [22]:
# Plot HML

plt.figure(figsize=(10, 6))
plt.plot(HML.cumsum(), label='HML Premium', color = 'salmon', alpha=1)
plt.axhline(y=0, color='black', linestyle='dashed')

# Config
plt.title('HML Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()
plt.grid()

# Show
plt.show()

In [23]:
# Create the market data
daily_rfr = (((1 + (rfr['risk_free_rate'].div(100)))**(1/360)) - 1)
benchmark_returns = sp500['sp_500'].pct_change(1)

# Create the Excess Returns
market_excess_returns = benchmark_returns - daily_rfr

In [24]:
# Check the Correlation

print(f'SMB premium correlation  with HML premium: {SMB.corr(HML)}')
print(f'SMB premium correlation  with market premium: {SMB.corr(market_excess_returns)}')
print(f'HML premium correlation  with market premium: {HML.corr(market_excess_returns)}')

In [25]:
# Plot HML

plt.figure(figsize=(10, 6))
plt.plot(market_excess_returns.cumsum(), label='Market Premium', alpha=1)
plt.plot(SMB.cumsum(), label='SMB Premium', alpha=1)
plt.plot(HML.cumsum(), label='HML Premium', alpha=1)
plt.axhline(y=0, color='black', linestyle='dashed')

# Config
plt.title('HML Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()
plt.grid()

# Show
plt.show()

In [26]:
# Store both series

SMB.to_csv(r"..\additional_data\SMB.csv")
HML.to_csv(r"..\additional_data\HML.csv")