
# 8. Modelo CAPM y factores

## Modelo CAPM

In [None]:
import pandas as pd
import yfinance as yf
import statsmodels.api as sm

RISKY_ASSET = "AMZN"
MARKET_BENCHMARK = "^GSPC"
START_DATE = "2020-01-01"
END_DATE = "2024-12-31"

In [None]:
df = yf.download([RISKY_ASSET, MARKET_BENCHMARK],
                 start=START_DATE,
                 end=END_DATE,
                 auto_adjust=True,
                 multi_level_index=False)

[*********************100%***********************]  2 of 2 completed


In [None]:
X = (
    df["Close"]
    .rename(columns={RISKY_ASSET: "asset",
                     MARKET_BENCHMARK: "market"})
    .resample("ME")
    .last()
    .pct_change()
    .dropna()
)
X

Ticker,asset,market
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-02-29,-0.062214,-0.08411
2020-03-31,0.035021,-0.125119
2020-04-30,0.2689,0.126844
2020-05-31,-0.012785,0.045282
2020-06-30,0.129567,0.018388
2020-07-31,0.147114,0.055101
2020-08-31,0.090461,0.070065
2020-09-30,-0.087579,-0.039228
2020-10-31,-0.035754,-0.027666
2020-11-30,0.04344,0.107546


In [None]:
covariance = X.cov().iloc[0,1]
benchmark_variance = X.market.var()
beta = covariance / benchmark_variance
beta

np.float64(1.1533821866569285)

In [None]:
import statsmodels.formula.api as smf

X = df["Close"].rename(columns={RISKY_ASSET: "asset",
                                    MARKET_BENCHMARK: "market"}) \
                   .resample("ME") \
                   .last() \
                   .pct_change() \
                   .dropna()

# define and fit the regression model
capm_model = smf.ols(formula="asset ~ market", data=X).fit()

# print results
print(capm_model.summary())

                            OLS Regression Results                            
Dep. Variable:                  asset   R-squared:                       0.409
Model:                            OLS   Adj. R-squared:                  0.399
Method:                 Least Squares   F-statistic:                     39.50
Date:                Wed, 05 Nov 2025   Prob (F-statistic):           4.89e-08
Time:                        23:29:27   Log-Likelihood:                 70.999
No. Observations:                  59   AIC:                            -138.0
Df Residuals:                      57   BIC:                            -133.8
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0043      0.010      0.441      0.6

## Fama - French

In [None]:
import pandas as pd
import yfinance as yf
import statsmodels.formula.api as smf
import pandas_datareader.data as web

In [None]:
RISKY_ASSET = "AMZN"
START_DATE = "2020-01-01"
END_DATE = "2024-12-31"

In [None]:
ff_dict = web.DataReader("F-F_Research_Data_Factors",
                         "famafrench",
                         start=START_DATE,
                         end=END_DATE)

  ff_dict = web.DataReader("F-F_Research_Data_Factors",
  ff_dict = web.DataReader("F-F_Research_Data_Factors",


In [None]:
print(ff_dict['DESCR'])

F-F Research Data Factors
-------------------------

This file was created using the 202508 CRSP database. The 1-month TBill rate data until 202405 are from Ibbotson Associates. Starting from 202406, the 1-month TBill rate is from ICE BofA US 1-Month Treasury Bill Index. Copyright 2025 Eugene F. Fama and Kenneth R. French

  0 : (60 rows x 4 cols)
  1 : Annual Factors: January-December (5 rows x 4 cols)


In [None]:
factor_3_df = ff_dict[0].rename(columns={"Mkt-RF": "MKT"}) \
                        .div(100)

factor_3_df.tail()

Unnamed: 0_level_0,MKT,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-08,0.016,-0.0349,-0.011,0.0048
2024-09,0.0172,-0.0013,-0.0277,0.004
2024-10,-0.01,-0.0099,0.0086,0.0039
2024-11,0.0649,0.0446,0.0015,0.004
2024-12,-0.0317,-0.0271,-0.03,0.0037


In [None]:

asset_df = yf.download(RISKY_ASSET,
                       start=START_DATE,
                       end=END_DATE,
                       auto_adjust=True,
                       multi_level_index=False)

[*********************100%***********************]  1 of 1 completed


In [None]:
y = asset_df["Close"].resample("ME") \
                         .last() \
                         .pct_change() \
                         .dropna()

y.index = y.index.to_period("M")
y.name = "rtn"
y.head()

Unnamed: 0_level_0,rtn
Date,Unnamed: 1_level_1
2020-02,-0.062214
2020-03,0.035021
2020-04,0.2689
2020-05,-0.012785
2020-06,0.129567


In [None]:
factor_3_df = factor_3_df.join(y)
factor_3_df["excess_rtn"] = (
    factor_3_df["rtn"] - factor_3_df["RF"]
)
factor_3_df.head()

Unnamed: 0_level_0,MKT,SMB,HML,RF,rtn,excess_rtn
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01,-0.0011,-0.031,-0.0622,0.0013,,
2020-02,-0.0815,0.0108,-0.0382,0.0012,-0.062214,-0.063414
2020-03,-0.1337,-0.0469,-0.1383,0.0012,0.035021,0.033821
2020-04,0.136,0.025,-0.0134,0.0,0.2689,0.2689
2020-05,0.0557,0.024,-0.05,0.0001,-0.012785,-0.012885


In [None]:
# define and fit the regression model
ff_model = smf.ols(formula="excess_rtn ~ MKT + SMB + HML",
                   data=factor_3_df).fit()

# print results
print(ff_model.summary())

                            OLS Regression Results                            
Dep. Variable:             excess_rtn   R-squared:                       0.633
Model:                            OLS   Adj. R-squared:                  0.613
Method:                 Least Squares   F-statistic:                     31.58
Date:                Wed, 05 Nov 2025   Prob (F-statistic):           5.27e-12
Time:                        23:30:23   Log-Likelihood:                 85.111
No. Observations:                  59   AIC:                            -162.2
Df Residuals:                      55   BIC:                            -153.9
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0041      0.008      0.526      0.6

## Modelo de 4 factores

In [None]:
import pandas as pd
import yfinance as yf
import statsmodels.formula.api as smf
import pandas_datareader.data as web

# three factors
factor_3_df = web.DataReader("F-F_Research_Data_Factors",
                             "famafrench",
                             start=START_DATE,
                             end=END_DATE)[0]

# momentum factor
momentum_df = web.DataReader("F-F_Momentum_Factor",
                             "famafrench",
                             start=START_DATE,
                             end=END_DATE)[0]

  factor_3_df = web.DataReader("F-F_Research_Data_Factors",
  factor_3_df = web.DataReader("F-F_Research_Data_Factors",
  momentum_df = web.DataReader("F-F_Momentum_Factor",
  momentum_df = web.DataReader("F-F_Momentum_Factor",


In [None]:
# join all datasets on the index
factor_4_df = factor_3_df.join(momentum_df).join(y)

# rename columns
factor_4_df.columns = ["mkt", "smb", "hml", "rf", "mom", "rtn"]

# divide everything (except returns) by 100
factor_4_df.loc[:, factor_4_df.columns != "rtn"] /= 100

# calculate excess returns
factor_4_df["excess_rtn"] = (
    factor_4_df["rtn"] - factor_4_df["rf"]
)

factor_4_df.head()

Unnamed: 0_level_0,mkt,smb,hml,rf,mom,rtn,excess_rtn
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-01,-0.0011,-0.031,-0.0622,0.0013,0.0602,,
2020-02,-0.0815,0.0108,-0.0382,0.0012,-0.0028,-0.062214,-0.063414
2020-03,-0.1337,-0.0469,-0.1383,0.0012,0.0821,0.035021,0.033821
2020-04,0.136,0.025,-0.0134,0.0,-0.0548,0.2689,0.2689
2020-05,0.0557,0.024,-0.05,0.0001,0.0016,-0.012785,-0.012885


In [None]:
four_factor_model = smf.ols(
    formula="excess_rtn ~ mkt + smb + hml + mom",
    data=factor_4_df
).fit()

print(four_factor_model.summary())

                            OLS Regression Results                            
Dep. Variable:             excess_rtn   R-squared:                       0.644
Model:                            OLS   Adj. R-squared:                  0.617
Method:                 Least Squares   F-statistic:                     24.39
Date:                Wed, 05 Nov 2025   Prob (F-statistic):           1.45e-11
Time:                        23:31:08   Log-Likelihood:                 86.011
No. Observations:                  59   AIC:                            -162.0
Df Residuals:                      54   BIC:                            -151.6
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0052      0.008      0.657      0.5