<a href="https://colab.research.google.com/github/james130625/notebook/blob/main/%ED%8C%A9%ED%84%B0_%EB%AA%A8%EB%8D%B8%EB%A7%81_Factor_Modeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!pip install yfinance
!pip install pandas
!pip install numpy
!pip install statsmodels

import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
import zipfile
import io
import requests
from datetime import datetime

# 오늘 날짜 가져오기
today = datetime.today().strftime('%Y-%m-%d')

# 주식 데이터 다운로드 (예: 애플 주식)
stock_data = yf.download('AAPL', start='2020-01-01', end=today)

# 시장 데이터 다운로드 (예: S&P 500 지수)
market_data = yf.download('^GSPC', start='2020-01-01', end=today)

# 팩터 데이터 다운로드 (Fama-French 3-팩터)
url = 'https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_daily_CSV.zip'
response = requests.get(url)
with zipfile.ZipFile(io.BytesIO(response.content)) as z:
    with z.open('F-F_Research_Data_Factors_daily.CSV') as f:
        ff_factors = pd.read_csv(f, skiprows=3)

# 데이터 전처리
stock_data['Daily Return'] = stock_data['Adj Close'].pct_change()
market_data['Daily Return'] = market_data['Adj Close'].pct_change()
ff_factors = ff_factors.iloc[:-1]
ff_factors['Date'] = pd.to_datetime(ff_factors['Unnamed: 0'], format='%Y%m%d')
ff_factors.set_index('Date', inplace=True)
ff_factors = ff_factors[['Mkt-RF', 'SMB', 'HML', 'RF']].astype(float)

# 결측값 및 무한대 값 제거
stock_data = stock_data.dropna()
market_data = market_data.dropna()
ff_factors = ff_factors.replace([np.inf, -np.inf], np.nan).dropna()

# 공통 날짜 인덱스 설정
common_index = stock_data.index.intersection(market_data.index).intersection(ff_factors.index)
stock_data = stock_data.loc[common_index]
market_data = market_data.loc[common_index]
ff_factors = ff_factors.loc[common_index]

# CAPM: 주식 수익률과 시장 수익률 간의 회귀 분석
X_capm = market_data['Daily Return']
y_capm = stock_data['Daily Return'] - ff_factors['RF'] / 100

X_capm = sm.add_constant(X_capm)
capm_model = sm.OLS(y_capm, X_capm).fit()
print(capm_model.summary())

# Fama-French 3-팩터 모델: 주식 수익률과 팩터 수익률 간의 회귀 분석
ff_data = ff_factors.join(stock_data[['Daily Return']])
ff_data = ff_data.dropna()
X_ff = ff_data[['Mkt-RF', 'SMB', 'HML']]
y_ff = ff_data['Daily Return'] - ff_factors['RF'] / 100

X_ff = sm.add_constant(X_ff)
ff_model = sm.OLS(y_ff, X_ff).fit()
print(ff_model.summary())




[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.650
Model:                            OLS   Adj. R-squared:                  0.649
Method:                 Least Squares   F-statistic:                     2014.
Date:                Wed, 12 Jun 2024   Prob (F-statistic):          1.33e-249
Time:                        02:21:46   Log-Likelihood:                 3247.3
No. Observations:                1088   AIC:                            -6491.
Df Residuals:                    1086   BIC:                            -6481.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const            0.0003      0.000      0.870   