In [15]:
import numpy as np
import pandas as pd
import pandas_datareader.data as web
from datetime import datetime
import yfinance as yf
import warnings
warnings.filterwarnings('ignore')

## Fama-French

In [None]:
# Fama-French 5팩터 + Momentum + Short-term Reversal 다운로드 (일별 데이터)
print("Downloading Fama-French factors (daily)...")

# 시작 날짜 설정
start_date = datetime(2005, 1, 1)
end_date = datetime(2024, 12, 31) 

# Fama-French 5팩터 다운로드 (일별)
ff5_factors = web.DataReader('F-F_Research_Data_5_Factors_2x3_daily', 'famafrench', start_date, end_date)[0]

# Momentum 팩터 다운로드 (일별)
momentum = web.DataReader('F-F_Momentum_Factor_daily', 'famafrench', start_date, end_date)[0]

# Short-term Reversal 팩터 다운로드 (일별)
st_reversal = web.DataReader('F-F_ST_Reversal_Factor_daily', 'famafrench', start_date, end_date)[0]

# 데이터 병합
factors = ff5_factors.join(momentum).join(st_reversal)
factors.columns = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF', 'Mom', 'ST_Rev']

# 날짜 인덱스를 날짜 형식으로 변환
factors.index = pd.to_datetime(factors.index, format='%Y%m%d')

print(f"Factors data shape: {factors.shape}")
print(f"Date range: {factors.index.min()} to {factors.index.max()}")
print("\nFactors preview:")
print(factors.head())

Downloading Fama-French factors (weekly)...
Factors data shape: (20, 8)
Date range: 2005 to 2024

Factors preview:
      Mkt-RF   SMB    HML    RMW   CMA    RF    Mom  ST_Rev
Date                                                       
2005    3.14 -0.81   9.36   1.49 -4.81  2.98  14.91   -1.36
2006   10.58  1.59  11.71   3.41  8.52  4.80  -7.70    5.98
2007    1.07 -8.42 -16.92   4.51 -7.36  4.66  21.80  -16.43
2008  -38.25  2.96   1.24  14.86  3.11  1.60  13.80   -5.67
2009   28.35  7.94  -8.22   2.51 -1.27  0.10 -83.81    0.10
Factors data shape: (20, 8)
Date range: 2005 to 2024

Factors preview:
      Mkt-RF   SMB    HML    RMW   CMA    RF    Mom  ST_Rev
Date                                                       
2005    3.14 -0.81   9.36   1.49 -4.81  2.98  14.91   -1.36
2006   10.58  1.59  11.71   3.41  8.52  4.80  -7.70    5.98
2007    1.07 -8.42 -16.92   4.51 -7.36  4.66  21.80  -16.43
2008  -38.25  2.96   1.24  14.86  3.11  1.60  13.80   -5.67
2009   28.35  7.94  -8.22   2.51 -

In [None]:
# Fama-French 49 Industry Portfolio 다운로드 (일별 데이터)
print("\nDownloading Fama-French 49 Industry Portfolios (daily)...")

# 49 Industry Portfolio 다운로드 (일별 데이터)
industry49 = web.DataReader('49_Industry_Portfolios_daily', 'famafrench', start_date, end_date)[0]

# 날짜 인덱스를 날짜 형식으로 변환
industry49.index = pd.to_datetime(industry49.index, format='%Y%m%d')

print(f"Industry 49 data shape: {industry49.shape}")
print(f"Date range: {industry49.index.min()} to {industry49.index.max()}")
print("\nIndustry 49 preview:")
print(industry49.head())
print("\nIndustry names:")
print(industry49.columns.tolist())


Downloading Fama-French 49 Industry Portfolios...
Industry 49 data shape: (5033, 49)
Date range: 2005-01-03 00:00:00 to 2024-12-31 00:00:00

Industry 49 preview:
            Agric  Food  Soda  Beer  Smoke  Toys   Fun  Books  Hshld  Clths  \
Date                                                                          
2005-01-03  -0.53 -1.12 -0.04 -0.32  -0.73 -2.73 -0.72  -1.13  -0.55  -0.89   
2005-01-04  -1.87 -0.32 -0.96 -1.01   0.56 -0.76 -1.38  -0.38  -1.12  -1.37   
2005-01-05  -1.08 -0.35 -1.07 -0.69  -0.18 -1.40 -0.44  -0.50  -0.02  -1.06   
2005-01-06  -0.06  0.22  0.83  0.27  -0.58  0.44  0.92   0.70   0.70   0.42   
2005-01-07  -0.54  0.33 -0.39  0.27   1.34 -1.43 -0.60  -0.16   0.70  -0.06   

            ...  Boxes  Trans  Whlsl  Rtail  Meals  Banks  Insur  RlEst   Fin  \
Date        ...                                                                 
2005-01-03  ...  -0.84  -1.21  -1.57   0.08  -0.94  -0.53  -0.55  -0.80 -0.60   
2005-01-04  ...  -1.89  -2.17  -1.84  -1

In [None]:
# 데이터 저장
print("\nSaving data to CSV files...")

# factors.csv 저장
factors.to_csv('factors.csv')
print("Factors data saved to factors.csv")

# industry49.csv 저장  
industry49.to_csv('industry49.csv')
print("Industry 49 portfolio data saved to industry49.csv")

print("\nData download and save completed successfully!")


Saving data to CSV files...
Factors data saved to factors.csv
Industry 49 portfolio data saved to industry49.csv

Data download and save completed successfully!


# SNP

In [3]:
import yfinance as yf
import pandas as pd

snp50_tickers = [
    'MSFT', 'AAPL', 'JNJ', 'XOM', 'JPM', 'PG', 'CVX', 'MRK', 'KO', 'PEP',
    'PFE', 'INTC', 'IBM', 'ORCL', 'MCD', 'CSCO', 'WMT', 'HD', 'MMM', 'HON',
    'CAT', 'CL', 'BA', 'AMGN', 'GE', 'ABT', 'CVS', 'BMY', 'CI', 'LLY',
    'LMT', 'AXP', 'BAC', 'GILD', 'WFC', 'GS', 'TROW', 'MS', 'F', 'WBA',
    'KR', 'COP', 'UPS', 'NOC', 'KMB', 'MO', 'ADM', 'GIS', 'PPG', 'DE'
]

start_date = '2005-01-01'
end_date = '2024-12-31'

print(f"Downloading {len(snp50_tickers)} stocks...")

snp50_data = yf.download(
    tickers=' '.join(snp50_tickers),
    start=start_date,
    end=end_date,
    group_by='ticker',
    auto_adjust=True,
    threads=True
)

snp50_close = pd.DataFrame()
for ticker in snp50_tickers:
    try:
        snp50_close[ticker] = snp50_data[ticker]['Close']
    except:
        print(f"Warning: {ticker} data not available")

print(f"\nData shape: {snp50_close.shape}")
print(f"Date range: {snp50_close.index.min()} to {snp50_close.index.max()}")

missing = snp50_close.isnull().sum()
if missing.sum() > 0:
    print(f"\nMissing data:")
    print(missing[missing > 0])

snp50_close.to_csv('snp50.csv')
print("\n✓ Data saved to snp50.csv")

print("\nData preview:")
print(snp50_close.head())

Downloading 50 stocks...


[*********************100%***********************]  50 of 50 completed



Data shape: (5032, 50)
Date range: 2005-01-03 00:00:00 to 2024-12-30 00:00:00

✓ Data saved to snp50.csv

Data preview:
                 MSFT      AAPL        JNJ        XOM        JPM         PG  \
Date                                                                          
2005-01-03  18.454880  0.949987  34.751911  25.093662  22.368937  31.104504   
2005-01-04  18.523895  0.959743  34.641403  24.923332  22.138399  30.715654   
2005-01-05  18.482491  0.968149  34.619312  24.793068  22.184502  31.036873   
2005-01-06  18.461790  0.968900  34.718746  25.108692  22.311295  31.189054   
2005-01-07  18.406567  1.039446  34.591671  24.943365  22.132635  31.515942   

                  CVX        MRK         KO        PEP  ...        KR  \
Date                                                    ...             
2005-01-03  23.147207  14.135262  11.093916  28.976854  ...  6.069591   
2005-01-04  22.988049  14.076481  10.939011  28.770435  ...  5.906789   
2005-01-05  23.138121  14.171439 