In [1]:
import yfinance as yf
import pandas as pd
import numpy as np

# Download historical data for the S&P 500 from a chosen start date to today
start_date = '2022-02-17'
end_date = '2025-02-17'
sp500 = yf.download('AAPL', start=start_date, end=end_date)

# Check the first few rows
sp500.head()


[*********************100%***********************]  1 of 1 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['AAPL']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


Price,Adj Close,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2


In [None]:
# Calculate daily returns
sp500['Daily Return'] = sp500['Close'].pct_change()

# Drop missing values resulting from the pct_change calculation
daily_returns = sp500['Daily Return'].dropna()

# Calculate the average daily return
avg_daily_return = daily_returns.mean()

# Annualize the daily return
annual_return = avg_daily_return * 252
print("Historical Annualized Return of the S&P 500: {:.2%}".format(annual_return))


Historical Annualized Return of the S&P 500: nan%


In [None]:
risk_free_rate = 0.04  # 2%


In [None]:
market_risk_premium = annual_return - risk_free_rate
print("Calculated Market Risk Premium: {:.2%}".format(market_risk_premium))
print("Expected Returns: ", {annual_return + risk_free_rate})


Calculated Market Risk Premium: 23.20%
Expected Returns:  {0.3119936433535968}


Carhart - 4 factor model

In [None]:
# Step 0: Import necessary libraries
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import pandas_datareader.data as web
from datetime import datetime

# For reproducibility
np.random.seed(42)


In [None]:
# Define the time period
start_date = '2015-01-01'
end_date = '2023-12-31'

# Download AAPL data
aapl = yf.download('AAPL', start=start_date, end=end_date)

# Calculate daily returns (percentage change on Adjusted Close)
aapl['Return'] = aapl['Close'].pct_change()
aapl = aapl.dropna()  # Remove the first NaN value

aapl.head()


[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume,Return
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL,Unnamed: 6_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2015-01-05,23.635283,24.169162,23.448426,24.08908,257142000,-0.028172
2015-01-06,23.637512,23.897778,23.274918,23.699798,263188400,9.4e-05
2015-01-07,23.968964,24.069065,23.735391,23.846616,160423600,0.014022
2015-01-08,24.889906,24.947743,24.180291,24.29819,237458000,0.038422
2015-01-09,24.916597,25.192435,24.516187,25.063413,214798000,0.001072


In [None]:
# Define the period for factor data
ff_start = start_date
ff_end = end_date

# Get Fama-French 3-factor data (daily)
ff_factors = web.DataReader('F-F_Research_Data_Factors_daily', 'famafrench', start_date, end_date)[0]

# Get Momentum factor data (daily)
momentum = web.DataReader('F-F_Momentum_Factor_daily', 'famafrench', start_date, end_date)[0]

# Merge the datasets on date index
factors = ff_factors.join(momentum, how='inner')

# Rename columns for clarity
factors.rename(columns={'Mkt-RF': 'MKT_RF', 'SMB': 'SMB', 'HML': 'HML', 'Mom   ': 'MOM'}, inplace=True)
factors.head()


  ff_factors = web.DataReader('F-F_Research_Data_Factors_daily', 'famafrench', start_date, end_date)[0]
  momentum = web.DataReader('F-F_Momentum_Factor_daily', 'famafrench', start_date, end_date)[0]


Unnamed: 0_level_0,MKT_RF,SMB,HML,RF,MOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-02,-0.12,-0.61,0.08,0.0,-0.08
2015-01-05,-1.84,0.33,-0.68,0.0,1.16
2015-01-06,-1.04,-0.78,-0.31,0.0,0.51
2015-01-07,1.19,0.2,-0.66,0.0,0.9
2015-01-08,1.81,-0.12,-0.28,0.0,0.14


In [None]:
# Convert percentage factors to decimals
factors = factors / 100


In [None]:
print("AAPL columns:", aapl.columns)
print("Factors columns:", factors.columns)




AAPL columns: MultiIndex([( 'Close', 'AAPL'),
            (  'High', 'AAPL'),
            (   'Low', 'AAPL'),
            (  'Open', 'AAPL'),
            ('Volume', 'AAPL'),
            ('Return',     '')],
           names=['Price', 'Ticker'])
Factors columns: Index(['MKT_RF', 'SMB', 'HML', 'RF', 'MOM'], dtype='object')


In [None]:
# 1) Flatten the MultiIndex columns for aapl
aapl.columns = aapl.columns.droplevel(1)

# Now aapl.columns should be a single level:
# ['Close', 'High', 'Low', 'Open', 'Volume', 'Return']
print(aapl.columns)

# 2) Check if the index is a DatetimeIndex
print(aapl.index)
# If it's already a DatetimeIndex, that's good. If not, you may need:
# aapl.index = pd.to_datetime(aapl.index)

# 3) For factors, confirm it has a DatetimeIndex as well
print(factors.index)
# If it's not DatetimeIndex, similarly:
# factors.index = pd.to_datetime(factors.index)

# 4) Reset the index on both so 'Date' becomes a column
aapl_reset = aapl[['Return']].reset_index()  # This creates columns ['Date', 'Return']
factors_reset = factors.reset_index()        # Typically columns ['Date', 'MKT_RF', 'SMB', 'HML', 'RF', 'MOM']

# 5) Merge on 'Date'
data = pd.merge(aapl_reset, factors_reset, on='Date', how='inner')

# 6) Compute the excess return
data['Excess_Return'] = data['Return'] - data['RF']

data.tail()


Index(['Close', 'High', 'Low', 'Open', 'Volume', 'Return'], dtype='object', name='Price')
DatetimeIndex(['2015-01-05', '2015-01-06', '2015-01-07', '2015-01-08',
               '2015-01-09', '2015-01-12', '2015-01-13', '2015-01-14',
               '2015-01-15', '2015-01-16',
               ...
               '2023-12-15', '2023-12-18', '2023-12-19', '2023-12-20',
               '2023-12-21', '2023-12-22', '2023-12-26', '2023-12-27',
               '2023-12-28', '2023-12-29'],
              dtype='datetime64[ns]', name='Date', length=2263, freq=None)
DatetimeIndex(['2015-01-02', '2015-01-05', '2015-01-06', '2015-01-07',
               '2015-01-08', '2015-01-09', '2015-01-12', '2015-01-13',
               '2015-01-14', '2015-01-15',
               ...
               '2023-12-15', '2023-12-18', '2023-12-19', '2023-12-20',
               '2023-12-21', '2023-12-22', '2023-12-26', '2023-12-27',
               '2023-12-28', '2023-12-29'],
              dtype='datetime64[ns]', name='Date', leng

Unnamed: 0,Date,Return,MKT_RF,SMB,HML,RF,MOM,Excess_Return
2258,2023-12-22,-0.005548,0.002,0.0064,0.001,0.00021,-0.0048,-0.005758
2259,2023-12-26,-0.002841,0.0048,0.0071,0.0043,0.00021,-0.0021,-0.003051
2260,2023-12-27,0.000518,0.0016,0.0014,0.001,0.00021,0.0011,0.000308
2261,2023-12-28,0.002226,-0.0001,-0.0036,0.0002,0.00021,-0.0048,0.002016
2262,2023-12-29,-0.005424,-0.0043,-0.0114,-0.0036,0.00021,0.0009,-0.005634


In [None]:
# 5) Regression setup
import statsmodels.api as sm

X = data[['MKT_RF', 'SMB', 'HML', 'MOM']]
X = sm.add_constant(X)
y = data['Excess_Return']

# 6) Fit OLS
model = sm.OLS(y, X).fit()
#print(model.summary())

# 7) Factor premia and expected return
avg_MKT_RF = data['MKT_RF'].mean()
avg_SMB = data['SMB'].mean()
avg_HML = data['HML'].mean()
avg_MOM = data['MOM'].mean()

alpha_est = model.params['const']
beta_MKT = model.params['MKT_RF']
beta_SMB = model.params['SMB']
beta_HML = model.params['HML']
beta_MOM = model.params['MOM']

expected_excess_daily = (alpha_est +
                         beta_MKT * avg_MKT_RF +
                         beta_SMB * avg_SMB +
                         beta_HML * avg_HML +
                         beta_MOM * avg_MOM)

expected_excess_annual = expected_excess_daily * 252
current_risk_free_rate = 0.04  # 2% annual
expected_return_annual = current_risk_free_rate + expected_excess_annual

print("Expected Annual Return (Carhart 4-Factor): {:.2%}".format(expected_return_annual))

Expected Annual Return (Carhart 4-Factor): 29.91%


Vanilla CAPM

5y Beta, Correlation

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
import datetime

# --- 1) Download daily price data for Apple and S&P 500 ---
start_date = '2019-01-01'
end_date = '2023-12-31'

aapl_data = yf.download('AAPL', start=start_date, end=end_date)
sp500_data = yf.download('^GSPC', start=start_date, end=end_date)

# --- 2) Compute daily returns ---
aapl_data['AAPL_Return'] = aapl_data['Close'].pct_change()
sp500_data['SP500_Return'] = sp500_data['Close'].pct_change()

aapl_data.dropna(inplace=True)
sp500_data.dropna(inplace=True)

# --- 3) Combine into one DataFrame ---
df = pd.DataFrame({
    'AAPL_Return': aapl_data['AAPL_Return'],
    'SP500_Return': sp500_data['SP500_Return']
}).dropna()

# --- 4) Define daily risk-free rate (constant) ---
annual_rf = 0.04478  # 2% annual
daily_rf = annual_rf / 252

# --- 5) Calculate excess returns ---
df['AAPL_Excess'] = df['AAPL_Return'] - daily_rf
df['SP500_Excess'] = df['SP500_Return'] - daily_rf

# --- 6) CAPM Regression ---
Y = df['AAPL_Excess']            # Dependent variable
X = df[['SP500_Excess']]         # Independent variable
X = sm.add_constant(X)           # For alpha

capm_model = sm.OLS(Y, X).fit()
print(capm_model.summary())

alpha = capm_model.params['const']
beta = capm_model.params['SP500_Excess']

print(f"Daily Alpha: {alpha:.6f}")
print(f"Daily Beta: {beta:.4f}")

# --- 7) Calculate CAPM-Implied Expected Return ---
# Approx. market risk premium (annual) from historical S&P data
avg_sp500_return_daily = df['SP500_Return'].mean()
annual_sp500_return = avg_sp500_return_daily * 252
mrp = annual_sp500_return - annual_rf

capm_expected_return = annual_rf + beta * mrp
print(f"CAPM Expected Return (annual) = {capm_expected_return:.2%}")

# Optional: annualize alpha
alpha_annual = alpha * 252
print(f"Annualized Alpha = {alpha_annual:.2%}")


[*********************100%***********************]  1 of 1 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['AAPL']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')
[*********************100%***********************]  1 of 1 completed
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['^GSPC']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


ValueError: zero-size array to reduction operation maximum which has no identity

Vanilla CAPM - 3y beta correlation

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
import datetime

# --- 1) Download daily price data for Apple and S&P 500 ---
start_date = '2022-02-14'
end_date = '2025-02-14'

aapl_data = yf.download('AAPL', start=start_date, end=end_date)
sp500_data = yf.download('^GSPC', start=start_date, end=end_date)


print(aapl_data.tail())
print(sp500_data.tail())

# --- 2) Compute daily returns ---
aapl_data['AAPL_Return'] = aapl_data['Close'].pct_change()
sp500_data['SP500_Return'] = sp500_data['Close'].pct_change()

aapl_data.dropna(inplace=True)
sp500_data.dropna(inplace=True)

# --- 3) Combine into one DataFrame ---
df = pd.DataFrame({
    'AAPL_Return': aapl_data['AAPL_Return'],
    'SP500_Return': sp500_data['SP500_Return']
}).dropna()

# --- 4) Define daily risk-free rate (constant) ---
annual_rf = 0.04478  # 2% annual
daily_rf = annual_rf / 252

# --- 5) Calculate excess returns ---
df['AAPL_Excess'] = df['AAPL_Return'] - daily_rf
df['SP500_Excess'] = df['SP500_Return'] - daily_rf

# --- 6) CAPM Regression ---
Y = df['AAPL_Excess']            # Dependent variable
X = df[['SP500_Excess']]         # Independent variable
X = sm.add_constant(X)           # For alpha

capm_model = sm.OLS(Y, X).fit()
print(capm_model.summary())

alpha = capm_model.params['const']
beta = capm_model.params['SP500_Excess']

print(f"Daily Alpha: {alpha:.6f}")
print(f"Daily Beta: {beta:.4f}")

# --- 7) Calculate CAPM-Implied Expected Return ---
# Approx. market risk premium (annual) from historical S&P data
avg_sp500_return_daily = df['SP500_Return'].mean()
annual_sp500_return = avg_sp500_return_daily * 252
mrp = annual_sp500_return - annual_rf

capm_expected_return = annual_rf + beta * mrp
print(f"CAPM Expected Return (annual) = {capm_expected_return:.2%}")

# Optional: annualize alpha
alpha_annual = alpha * 252
print(f"Annualized Alpha = {alpha_annual:.2%}")


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Price            Close        High         Low        Open    Volume
Ticker            AAPL        AAPL        AAPL        AAPL      AAPL
Date                                                                
2025-02-07  227.380005  233.743004  227.010401  232.344548  39707200
2025-02-10  227.649994  230.589996  227.199997  229.570007  33115600
2025-02-11  232.619995  235.229996  228.130005  228.199997  53718400
2025-02-12  236.869995  236.960007  230.679993  231.199997  45243300
2025-02-13  241.529999  242.339996  235.570007  236.910004  53614100
Price             Close         High          Low         Open      Volume
Ticker            ^GSPC        ^GSPC        ^GSPC        ^GSPC       ^GSPC
Date                                                                      
2025-02-07  6025.990234  6101.279785  6019.959961  6083.129883  4766900000
2025-02-10  6066.439941  6073.379883  6044.839844  6046.399902  4458760000
2025-02-11  6068.500000  6076.279785  6042.339844  6049.319824  432488000


