# Fama and French Factor Model: Understanding the Factors #

In [1]:
# Import Libraries

# Data Management
import pandas as pd

# Plots
import matplotlib.pyplot as plt

# Handle Files
import sys
import os

# Import Local Functions
sys.path.append(os.path.abspath("../source"))
from config import get_tickers
from data_downloader import get_market_data
from portfolios_toolkit import calculate_analytics

### Value Factor ###

In [9]:
# Vanguard Created 9 Portfolio Categorizing by Size and Value
categories_dict = {
    "VTV": "largecap_value",
    "VOE": "midcap_value",
    "VBR": "smallcap_value",
    "VV": "largecap_blend",
    "VO": "midcap_blend",
    "VB": "smallcap_blend",
    "VUG": "largecap_growth",
    "VOT": "midcap_growth",
    "VBK": "smallcap_growth"
}

# ":)"

In [10]:
# Tickers
tickers = get_tickers(mod="5.2")

tickers

In [11]:
# Import data
df_returns = pd.DataFrame()

for ticker in tickers:
    df = get_market_data(
        ticker=ticker, 
        start_date='2015-01-01', 
        end_date='2025-01-01', 
        returns=True
    )
    
    returns = df['returns'].rename(ticker)
    
    df_returns = pd.concat([df_returns, returns], axis=1)

df_returns = df_returns.rename(columns=categories_dict)

In [12]:
df_returns

In [13]:
# Create Plot
plt.figure(figsize=(10, 6))
plt.plot(df_returns.cumsum(), label=df_returns.columns, alpha=1)

# Config
plt.title('Cumulative ETF Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()
plt.grid()

# Show
plt.show()

In [15]:
# Analytics Table
value_analytics_table = calculate_analytics(df_returns)

value_analytics_table.sort_values(by = 'Sharpe Ratio', ascending = False)

In [16]:
# Value DataFrame
value_df = pd.DataFrame(index = df_returns.index)
value_df['value'] = df_returns[['largecap_value', 'midcap_value', 'smallcap_value']].mean(axis=1)
value_df['blend'] = df_returns[['largecap_blend', 'midcap_blend', 'smallcap_blend']].mean(axis=1)
value_df['growth'] = df_returns[['largecap_growth', 'midcap_growth', 'smallcap_growth']].mean(axis=1)

value_df

In [17]:
# Create Plot
plt.figure(figsize=(10, 6))
plt.plot(value_df.cumsum(), label=value_df.columns, alpha=1)

# Config
plt.title('Cumulative ETF Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()
plt.grid()

# Show
plt.show()

In [21]:
# Analytics Table
value_portfolios_analytics_table = calculate_analytics(value_df)

value_portfolios_analytics_table.sort_values(by = 'Sharpe Ratio', ascending = False)

In [22]:
# Annualized Returns
value_annualized_returns = value_df.mean() * 252 * 100
value_annualized_returns.name = 'annualized_returns'

value_annualized_returns

In [23]:
# Create Plot
plt.figure(figsize=(10, 6))
plt.plot(value_annualized_returns.iloc[::-1], label='Annualized Returns', alpha=1, marker='o')

# Config
plt.title('Annualized Returns by Value')
plt.xlabel('Value')
plt.ylabel('Annualized Returns')
plt.legend()
plt.grid()

# Show
plt.show()

In [25]:
# Calculate the approximation of the HML prime
HML = 1/2*(df_returns['largecap_value'] + df_returns['smallcap_value']) - 1/2*(df_returns['largecap_growth'] + df_returns['smallcap_growth'])

In [26]:
# Create Plot
plt.figure(figsize=(10, 6))
plt.plot(HML.cumsum(), label='HML', alpha=1)

# Config
plt.title('Cumulative HML Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()
plt.grid()

# Show
plt.show()

In [27]:
# We can download the premiums in the Fama and French website
premiums_df = pd.read_csv(r'..\additional_data\famafrench_premiums.csv')
premiums_df.set_index('Date', inplace=True)
premiums_df.index = pd.to_datetime(premiums_df.index)
premiums_df.columns = ['mkt_premium', 'smb_premium', 'hml_premium', 'risk_free_rate']
premiums_df = premiums_df.div(100)

premiums_df

In [28]:
# Create Plot
plt.figure(figsize=(10, 6))
plt.plot(HML.cumsum(), label='HML (using ETFs)', alpha=1)
plt.plot(premiums_df['hml_premium'].cumsum(), label='HML (by Fama and French)', alpha=1)

# Config
plt.title('Cumulative HML Returns Time Series')
plt.xlabel('Time')
plt.ylabel('Returns')
plt.legend()
plt.grid()

# Show
plt.show()

In [29]:
# Calculate the Correlation:
hml_correlation = HML.corr(premiums_df['hml_premium'])

hml_correlation

Thanks to public ETF's we can calculate a factor model even if we cannot purchase the MarketCap and Book-to-Price Data.