In [2]:
import pandas as pd
import numpy as np

import yfinance as yf
import pandas_datareader as pdr

# Fama French Factor Data

In [3]:
STARTDATE = '1990-01-01'
ENDDATE = '2023-05-31'

### Available datasets

In [4]:
pdr.famafrench.get_available_datasets()

['F-F_Research_Data_Factors',
 'F-F_Research_Data_Factors_weekly',
 'F-F_Research_Data_Factors_daily',
 'F-F_Research_Data_5_Factors_2x3',
 'F-F_Research_Data_5_Factors_2x3_daily',
 'Portfolios_Formed_on_ME',
 'Portfolios_Formed_on_ME_Wout_Div',
 'Portfolios_Formed_on_ME_Daily',
 'Portfolios_Formed_on_BE-ME',
 'Portfolios_Formed_on_BE-ME_Wout_Div',
 'Portfolios_Formed_on_BE-ME_Daily',
 'Portfolios_Formed_on_OP',
 'Portfolios_Formed_on_OP_Wout_Div',
 'Portfolios_Formed_on_OP_Daily',
 'Portfolios_Formed_on_INV',
 'Portfolios_Formed_on_INV_Wout_Div',
 'Portfolios_Formed_on_INV_Daily',
 '6_Portfolios_2x3',
 '6_Portfolios_2x3_Wout_Div',
 '6_Portfolios_2x3_weekly',
 '6_Portfolios_2x3_daily',
 '25_Portfolios_5x5',
 '25_Portfolios_5x5_Wout_Div',
 '25_Portfolios_5x5_Daily',
 '100_Portfolios_10x10',
 '100_Portfolios_10x10_Wout_Div',
 '100_Portfolios_10x10_Daily',
 '6_Portfolios_ME_OP_2x3',
 '6_Portfolios_ME_OP_2x3_Wout_Div',
 '6_Portfolios_ME_OP_2x3_daily',
 '25_Portfolios_ME_OP_5x5',
 '25_Portf

## Factor data

In [5]:
datasets = {'factors':'F-F_Research_Data_Factors',
            'momentum': 'F-F_Momentum_Factor', 
            'portfolios':'12_Industry_Portfolios'}

key = 'factors'
facs_base = pdr.DataReader(datasets[key], 'famafrench',start=STARTDATE,end=ENDDATE)[0].rename(columns={'Mkt-RF':'MKT'})
facs_base /= 100
facs_base.index = facs_base.index.to_timestamp()
facs_base = facs_base.resample('M').last()

### Append momentum

In [6]:
key = 'momentum'
mom = pdr.DataReader(datasets[key], 'famafrench',start=STARTDATE,end=ENDDATE)[0]
mom /= 100
mom.columns = mom.columns.str.replace(' ', '')
mom.rename(columns={'Mom':'UMD'},inplace=True)
mom.index = mom.index.to_timestamp()
mom = mom.resample('M').last()

In [7]:
facs = pd.concat([facs_base,mom],axis=1)
rf = facs['RF']
facsx = facs.drop(columns=['RF'])

## Load Portfolio data

In [8]:
key = 'portfolios'
port = pdr.DataReader(datasets[key], 'famafrench',start=STARTDATE,end=ENDDATE)[0]
port /= 100
port.columns = port.columns.str.replace(' ', '')
port.index = port.index.to_timestamp()
port = port.resample('M').last()

## Process Data

### Convert to Excess Returns

In [9]:
rf, port = rf.align(port,axis=0)
portx = port.sub(rf,axis=0)

### Align

In [10]:
portx, facsx = portx.align(facsx,axis=0)

In [11]:
portx.dropna(inplace=True)
facsx.dropna(inplace=True)

## Export Data

In [12]:
with pd.ExcelWriter('../data/factor_pricing_data.xlsx') as writer:  
    facsx.to_excel(writer, sheet_name='factors (excess)')
    portx.to_excel(writer, sheet_name='assets (excess)')
    rf.to_excel(writer, sheet_name='risk-free rate')