# Fama and French Factor Model #

### Import Data ###

In [73]:
# Import Libraries

# Data Management
import pandas as pd
import numpy as np

# Plots
import matplotlib.pyplot as plt

# Handle Files
import sys
import os

# Import Local Functions
sys.path.append(os.path.abspath("../source"))
from functions import import_stock_universe
from capm_toolkit import compute_daily_returns
from capm_toolkit import compute_excess_returns
from capm_toolkit import capm_regression
from portfolios_toolkit import calculate_analytics

In [3]:
# Get the important data for the Risk Free Rate

rfr = pd.read_csv(r"..\additional_data\rfr.csv")
rfr = rfr.set_index('Date')
rfr.index = pd.to_datetime(rfr.index, dayfirst=True)

# Get the important data for the S&P500

sp500 = pd.read_csv(r"..\additional_data\sp500.csv")
sp500 = sp500.set_index('Date')
sp500.index = pd.to_datetime(sp500.index)

# Get the data for the Stocks' Betas

betas_df = pd.read_csv(r"..\additional_data\capm_hbetas.csv")
betas_df = betas_df.set_index('date')
betas_df.index = pd.to_datetime(betas_df.index)

In [4]:
# Dictionary to store the DataFrames
folder_path = r"..\stocks"

dataframes = import_stock_universe(
    folder_path,
    ['Adjusted_close', 'Company Market Cap', 'Price_to_Book'],
    ['adj_close', 'mkt_cap', 'ptb'],
)

In [5]:
dataframes

In [6]:
# Create a whole new dataframe that contains all the stocks betas

rets_series = []

for stock, df in dataframes.items():
    series = df['adj_close'].pct_change(1).rename(stock)  
    series = series.iloc[1:]
    rets_series.append(series)

# Concat
returns_df = pd.concat(rets_series, axis=1)
returns_df = returns_df.apply(lambda x: x.fillna(x.mean()), axis=0)

returns_df.dropna(inplace = True)

returns_df

In [14]:
# Create a whole new dataframe that contains all the stocks betas
mktcap_series = []

for stock, df in dataframes.items():
    series = df['mkt_cap'].rename(stock)  
    mktcap_series.append(series)

# Concat
mktcap_df = pd.concat(mktcap_series, axis=1)

# Apply Logs and EMA (maybe)
mktcap_df = np.log(mktcap_df)
mktcap_df = mktcap_df.ewm(span=5, adjust = False).mean()
mktcap_df = mktcap_df.bfill()
mktcap_df

In [19]:
# Create a whole new dataframe that contains all the stocks betas

ptb_series = []

for stock, df in dataframes.items():
    series = df['ptb'].rename(stock)  
    series = series.iloc[1:]
    ptb_series.append(series)

# Concat
ptb_df = pd.concat(ptb_series, axis=1)

# Apply an ema
ptb_df = ptb_df.ewm(span=5, adjust = False).mean()
ptb_df = ptb_df.bfill()

ptb_df

In [21]:
# We prefer to use book to price
btp_df = 1 / ptb_df

btp_df

In [45]:
# Create the data
daily_rfr = compute_daily_returns(rfr['risk_free_rate'])
market_excess_returns = compute_excess_returns(sp500['sp_500'], rfr['risk_free_rate'])

### Testing for Size Factor ###

In [31]:
# Create Plot

tickers = ['NVDA', 'MSFT', 'WMT', 'DG', 'GOOG', 'FMC']

plt.figure(figsize=(10, 6))
plt.plot(mktcap_df[tickers], label=tickers, alpha=1)

# Config
plt.title('Market Capitalization Time Series')
plt.xlabel('Time')
plt.ylabel('Mkt Cap')
plt.legend()
plt.grid()

# Show
plt.show()

In [32]:
# Define the Decomposition Function
def size_decomposition(
    target_df, 
    mktcap_df
):
    # Common Indexes
    common_index = target_df.index.intersection(mktcap_df.index)
    
    # Reindex
    target_df = target_df.loc[common_index]
    mktcap_df = mktcap_df.loc[common_index]

    # Initialize lists to store portfolio returns
    big_list, mid_list, small_list = [], [], []
    
    # Get unique quarters
    quarters = sorted(set([date.to_period('Q') for date in common_index]))
    
    # Dictionary to store quarterly classifications and weights
    quarterly_classifications = {}

    for quarter in quarters:
        # Select only the last available date of the quarter
        quarter_dates = [date for date in common_index if date.to_period('Q') == quarter]
        rebalance_date = quarter_dates[-1]  # Last day of the quarter
        
        # Size factor for rebalance date
        size_factor_df = pd.DataFrame([mktcap_df.loc[rebalance_date]], index=['mkt_cap']).T.dropna()
        
        # Classify stocks into based on the median
        median = size_factor_df['mkt_cap'].median()

        size_factor_df['Size_Class'] = 'Small'
        size_factor_df.loc[size_factor_df['mkt_cap'] > median, 'Size_Class'] = 'Big'
        
        # Weights
        market_caps_df = pd.DataFrame([mktcap_df.loc[rebalance_date]], index=['mkt_cap']).T
        
        # Assign market caps to value classes
        small_mktcap_df = market_caps_df.loc[size_factor_df[size_factor_df['Size_Class'] == 'Small'].index]
        big_mktcap_df = market_caps_df.loc[size_factor_df[size_factor_df['Size_Class'] == 'Big'].index]
        
        # Compute weights
        small_weights = small_mktcap_df['mkt_cap'] / small_mktcap_df['mkt_cap'].sum()
        big_weights = big_mktcap_df['mkt_cap'] / big_mktcap_df['mkt_cap'].sum()
        
        # Store classifications and weights
        quarterly_classifications[quarter] = {
            "small": small_weights,
            "big": big_weights
        }
    
    # Iterate over all available dates to compute daily returns
    for date in common_index:
        quarter_key = date.to_period('Q')  # Get quarter of the current date
        
        if quarter_key in quarterly_classifications:
            # Retrieve stored classification and weights
            small_weights = quarterly_classifications[quarter_key]["small"]
            big_weights = quarterly_classifications[quarter_key]["big"]
            
            # Retrieve daily returns
            target = pd.DataFrame([target_df.loc[date]], index=['returns']).T
            
            small_returns = target.reindex(small_weights.index).dropna()
            big_returns = target.reindex(big_weights.index).dropna()
            
            # Compute portfolio returns
            small_result = small_weights.reindex(small_returns.index).T @ small_returns
            big_result = big_weights.reindex(big_returns.index).T @ big_returns
            
            # Store results
            small_list.append(small_result.values[0] if not small_result.empty else None)
            big_list.append(big_result.values[0] if not big_result.empty else None)

    # Create final DataFrame
    size_portfolios = pd.DataFrame({
        'big': big_list,
        'small': small_list
    }, index=common_index)
    
    return size_portfolios

In [33]:
# Create DataFrames

size_portfolio_returns = size_decomposition(returns_df, mktcap_df)

size_portfolio_returns

In [74]:
# Analytics Table

size_analytics_table = calculate_analytics(size_portfolio_returns)

size_analytics_table

In [37]:
# Create Plot

plt.figure(figsize=(10, 6))
plt.plot(size_portfolio_returns.cumsum(), label=size_portfolio_returns.columns, alpha=1)

# Config
plt.title('Cumulative Returns (Size Adjusted) Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()
plt.grid()

# Show
plt.show()

In [38]:
# Create DataFrames

size_portfolio_betas = size_decomposition(betas_df, mktcap_df)

size_portfolio_betas

In [39]:
plt.figure(figsize=(10, 6))
plt.plot(size_portfolio_betas.ewm(span=21, adjust = False).mean(), label=size_portfolio_betas.columns, alpha=1)
plt.axhline(y=1, color='black', linestyle='dashed')

# Config
plt.title('Betas (Size Adjusted) Time Series')
plt.xlabel('Time')
plt.ylabel('Betas')
plt.legend()
plt.grid()

# Show
plt.show()

In [46]:
# Create the regression dataframe
size_regression_df = pd.DataFrame(index = size_portfolio_returns.index)

size_regression_df['big_excess_returns'] = size_portfolio_returns['big'] - daily_rfr
size_regression_df['small_excess_returns'] = size_portfolio_returns['small'] - daily_rfr
size_regression_df['market_excess_returns'] = market_excess_returns
size_regression_df.dropna(inplace = True)

size_regression_df

In [47]:
# Calculate the Beta for the Large Portfolio

y = size_regression_df['big_excess_returns']
x = size_regression_df['market_excess_returns']

# Calculate Weights
window = len(y) 
     
#the results of the model
results = capm_regression(
    y,
    x,
    window,
    True
)
    
#here we check the summary
print(results.summary())

In [48]:
# Calculate the Beta for the Small Portfolio

y = size_regression_df['small_excess_returns']
x = size_regression_df['market_excess_returns']

# Calculate Weights
window = len(y) 
     
#the results of the model
results = capm_regression(
    y,
    x,
    window,
    True
)
    
#here we check the summary
print(results.summary())

In [49]:
# Calculate the Small Minus Big Premium

size_regression_df['SMB'] = size_portfolio_returns['small'] - size_portfolio_returns['big']

In [50]:
plt.figure(figsize=(10, 6))
plt.plot(size_regression_df['SMB'].cumsum(), label='SMB Premium', color = 'salmon', alpha=1)
plt.axhline(y=0, color='black', linestyle='dashed')

# Config
plt.title('SMB Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()
plt.grid()

# Show
plt.show()

In [51]:
# Check the Correlation with the Market

size_regression_df['SMB'].corr(size_regression_df['market_excess_returns'])

In [52]:
# Calculate the Beta for the SMB Premium

y = size_regression_df['SMB']

x = size_regression_df['market_excess_returns']

# Calculate Weights
window = len(y) 
     
#the results of the model
results = capm_regression(
    y,
    x,
    window,
    True
)
    
#here we check the summary
print(results.summary())  

### Testing for Value Factor ###

In [54]:
# Create Plot

tickers = ['NVDA', 'MSFT', 'WMT', 'DG', 'GOOG', 'FMC']

plt.figure(figsize=(10, 6))
plt.plot(btp_df[tickers], label=tickers, alpha=1)

# Config
plt.title('Price to Book Time Series')
plt.xlabel('Time')
plt.ylabel('PTB')
plt.legend()
plt.grid()

# Show
plt.show()

In [55]:
# Define the Decomposition Function

def value_decomposition(target_df, mktcap_df, value_df):
    # Common Indexes
    common_index = target_df.index.intersection(value_df.index).intersection(mktcap_df.index)
    
    # Reindex
    target_df = target_df.loc[common_index]
    mktcap_df = mktcap_df.loc[common_index]
    value_df = value_df.loc[common_index]

    # Initialize lists to store portfolio returns
    high_list, neutral_list, low_list = [], [], []
    
    # Get unique quarters
    quarters = sorted(set([date.to_period('Q') for date in common_index]))
    
    # Dictionary to store quarterly classifications and weights
    quarterly_classifications = {}

    for quarter in quarters:
        # Select only the last available date of the quarter
        quarter_dates = [date for date in common_index if date.to_period('Q') == quarter]
        rebalance_date = quarter_dates[-1]  # Last day of the quarter
        
        # Value factor (P/B ratio) for rebalance date
        value_factor_df = pd.DataFrame([value_df.loc[rebalance_date]], index=['btp']).T.dropna()
        
        # Classify stocks into Low, Neutral, and High based on quantiles
        lower = value_factor_df['btp'].quantile(0.3)
        upper = value_factor_df['btp'].quantile(0.7)

        value_factor_df['Value_Class'] = 'Neutral'
        value_factor_df.loc[value_factor_df['btp'] <= lower, 'Value_Class'] = 'Low'
        value_factor_df.loc[value_factor_df['btp'] >= upper, 'Value_Class'] = 'High'
        
        # Market cap data
        market_caps_df = pd.DataFrame([mktcap_df.loc[rebalance_date]], index=['mkt_cap']).T
        
        # Assign market caps to value classes
        low_mktcap_df = market_caps_df.loc[value_factor_df[value_factor_df['Value_Class'] == 'Low'].index]
        neutral_mktcap_df = market_caps_df.loc[value_factor_df[value_factor_df['Value_Class'] == 'Neutral'].index]
        high_mktcap_df = market_caps_df.loc[value_factor_df[value_factor_df['Value_Class'] == 'High'].index]
        
        # Compute weights
        low_weights = low_mktcap_df['mkt_cap'] / low_mktcap_df['mkt_cap'].sum()
        neutral_weights = neutral_mktcap_df['mkt_cap'] / neutral_mktcap_df['mkt_cap'].sum()
        high_weights = high_mktcap_df['mkt_cap'] / high_mktcap_df['mkt_cap'].sum()
        
        # Store classifications and weights
        quarterly_classifications[quarter] = {
            "low": low_weights,
            "neutral": neutral_weights,
            "high": high_weights
        }
    
    # Iterate over all available dates to compute daily returns
    for date in common_index:
        quarter_key = date.to_period('Q')  # Get quarter of the current date
        
        if quarter_key in quarterly_classifications:
            # Retrieve stored classification and weights
            low_weights = quarterly_classifications[quarter_key]["low"]
            neutral_weights = quarterly_classifications[quarter_key]["neutral"]
            high_weights = quarterly_classifications[quarter_key]["high"]
            
            # Retrieve daily returns
            target = pd.DataFrame([target_df.loc[date]], index=['returns']).T
            
            low_returns = target.reindex(low_weights.index).dropna()
            neutral_returns = target.reindex(neutral_weights.index).dropna()
            high_returns = target.reindex(high_weights.index).dropna()
            
            # Compute portfolio returns
            low_result = low_weights.reindex(low_returns.index).T @ low_returns
            neutral_result = neutral_weights.reindex(neutral_returns.index).T @ neutral_returns
            high_result = high_weights.reindex(high_returns.index).T @ high_returns
            
            # Store results
            low_list.append(low_result.values[0] if not low_result.empty else None)
            neutral_list.append(neutral_result.values[0] if not neutral_result.empty else None)
            high_list.append(high_result.values[0] if not high_result.empty else None)

    # Create final DataFrame
    value_portfolios = pd.DataFrame({
        'high': high_list,
        'neutral': neutral_list,
        'low': low_list
    }, index=common_index)
    
    return value_portfolios

In [56]:
# Create DataFrames

value_portfolio_returns = value_decomposition(returns_df, mktcap_df, btp_df)

value_portfolio_returns

In [75]:
# Analytics Table

value_analytics_table = calculate_analytics(value_portfolio_returns)

value_analytics_table

In [60]:
plt.figure(figsize=(10, 6))
plt.plot(value_portfolio_returns.cumsum(), label=value_portfolio_returns.columns, alpha=1)

# Config
plt.title('Cumulative Returns (Value Adjusted) Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()
plt.grid()

# Show
plt.show()

In [61]:
# Create DataFrames

value_portfolio_betas = value_decomposition(betas_df, mktcap_df, ptb_df)

value_portfolio_betas

In [62]:
plt.figure(figsize=(10, 6))
plt.plot(value_portfolio_betas.ewm(span=21, adjust = False).mean(), label=value_portfolio_betas.columns, alpha=1)
plt.axhline(y=1, color='black', linestyle='dashed')

# Config
plt.title('Betas (Value Adjusted) Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()
plt.grid()

# Show
plt.show()

In [63]:
# Create the regression dataframe
value_regression_df = pd.DataFrame(index = value_portfolio_returns.index)

value_regression_df['high_excess_returns'] = value_portfolio_returns['high'] - daily_rfr
value_regression_df['neutral_excess_returns'] = value_portfolio_returns['neutral'] - daily_rfr
value_regression_df['low_excess_returns'] = value_portfolio_returns['low'] - daily_rfr
value_regression_df['market_excess_returns'] = market_excess_returns
value_regression_df.dropna(inplace = True)

value_regression_df

In [64]:
# Calculate the Beta for the High Portfolio

y = value_regression_df['high_excess_returns']
x = value_regression_df['market_excess_returns']

# Calculate Weights
window = len(y)

#Model specification
results = capm_regression(
    y,
    x,
    window,
    True
)
    
#here we check the summary
print(results.summary())

In [65]:
# Calculate the Beta for the Neutral Portfolio

y = value_regression_df['neutral_excess_returns']
x = value_regression_df['market_excess_returns']

# Calculate Weights
window = len(y)

#Model specification
results = capm_regression(
    y,
    x,
    window,
    True
)
    
#here we check the summary
print(results.summary()) 

In [66]:
# Calculate the Beta for the Low Portfolio

y = value_regression_df['low_excess_returns']
x = value_regression_df['market_excess_returns']

# Calculate Weights
window = len(y)

#Model specification
results = capm_regression(
    y,
    x,
    window,
    True
)
    
#here we check the summary
print(results.summary())  

In [67]:
# Calculate the Small Minus Big Premium

value_regression_df['HML'] = value_regression_df['high_excess_returns'] - value_regression_df['low_excess_returns']

In [68]:
plt.figure(figsize=(10, 6))
plt.plot(value_regression_df['HML'].cumsum(), label='HML Premium', color = 'salmon', alpha=1)
plt.axhline(y=0, color='black', linestyle='dashed')

# Config
plt.title('HML Cumulative Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()
plt.grid()

# Show
plt.show()

In [69]:
# Check the Correlation with the Market

value_regression_df['HML'].corr(value_regression_df['market_excess_returns'])

In [70]:
# Calculate the Beta for the HML Premium

y = value_regression_df['HML']
x = value_regression_df['market_excess_returns']

# Calculate Weights
window = len(y)

#Model specification
results = capm_regression(
    y,
    x,
    window,
    True
)
    
#here we check the summary
print(results.summary())  

### Collinearity among factors ###

In [71]:
value_regression_df['HML'].corr(size_regression_df['SMB'])

In [72]:
# Create Figure
fig, ax1 = plt.subplots(dpi = 300)

# Market Returns Plot
value_regression_df['HML'].cumsum().plot(color = 'blue', ax = ax1, alpha=0.7)
ax1.set_xlabel('Date')
ax1.set_ylabel(
    'HML Factor', 
    color='blue'
    )

# ZBP Returns Plot
ax2 = ax1.twinx()

size_regression_df['SMB'].cumsum().plot(color = 'red', ax = ax2, alpha=0.7)
ax2.set_ylabel(
    'SMB Factor', 
    color='orange'
    )

plt.title('Factor vs Factor Time Series')
plt.show()

In [77]:
df_premiums = pd.DataFrame()

df_premiums['HML'] = value_regression_df['HML'].cumsum()
df_premiums['SMB'] = size_regression_df['SMB'].cumsum()

df_premiums

In [78]:
premiums_analytics = calculate_analytics(df_premiums)

premiums_analytics