In [40]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [41]:
start_date = "1960-01-01"
end_date = "2022-12-31"

# [Fama-French Data](https://www.tidy-finance.org/python/accessing-and-managing-financial-data.html#fama-french-data)

In [42]:
import pandas_datareader as pdr

In [43]:
# return time series of the market (mkt_excess), size (smb), and value (hml) factors alongside the risk-free rates (rf).
factors_ff3_monthly_raw = pdr.DataReader(
    name="F-F_Research_Data_Factors",
    data_source="famafrench",
    start=start_date,
    end=end_date
)[0]

factors_ff3_monthly = (factors_ff3_monthly_raw
    .divide(100)
    .reset_index(names="month")
    .assign(month=lambda x: pd.to_datetime(x["month"].astype(str)))
    .rename(str.lower, axis="columns")
    .rename(columns={"mkt-rf": "mkt_excess"})
)

In [44]:
factors_ff5_monthly_raw = pdr.DataReader(
    name="F-F_Research_Data_5_Factors_2x3",
    data_source="famafrench",
    start=start_date,
    end=end_date
)[0]

factors_ff5_monthly = (factors_ff5_monthly_raw
    .divide(100)
    .reset_index(names="month")
    .assign(month=lambda x: pd.to_datetime(x["month"].astype(str)))
    .rename(str.lower, axis="columns")
    .rename(columns={"mkt-rf": "mkt_excess"})
)

In [45]:
factors_ff3_daily_raw = pdr.DataReader(
  name="F-F_Research_Data_Factors_daily",
  data_source="famafrench", 
  start=start_date, 
  end=end_date)[0]

factors_ff3_daily = (factors_ff3_daily_raw
  .divide(100)
  .reset_index(names="date")
  .rename(str.lower, axis="columns")
  .rename(columns={"mkt-rf": "mkt_excess"})
)

In [46]:
industries_ff_monthly_raw = pdr.DataReader(
  name="10_Industry_Portfolios",
  data_source="famafrench", 
  start=start_date, 
  end=end_date)[0]

industries_ff_monthly = (industries_ff_monthly_raw
  .divide(100)
  .reset_index(names="month")
  .assign(month=lambda x: pd.to_datetime(x["month"].astype(str)))
  .rename(str.lower, axis="columns")
)

In [47]:
# check out the other Fama/Frech datasets
pdr.famafrench.get_available_datasets()

['F-F_Research_Data_Factors',
 'F-F_Research_Data_Factors_weekly',
 'F-F_Research_Data_Factors_daily',
 'F-F_Research_Data_5_Factors_2x3',
 'F-F_Research_Data_5_Factors_2x3_daily',
 'Portfolios_Formed_on_ME',
 'Portfolios_Formed_on_ME_Wout_Div',
 'Portfolios_Formed_on_ME_Daily',
 'Portfolios_Formed_on_BE-ME',
 'Portfolios_Formed_on_BE-ME_Wout_Div',
 'Portfolios_Formed_on_BE-ME_Daily',
 'Portfolios_Formed_on_OP',
 'Portfolios_Formed_on_OP_Wout_Div',
 'Portfolios_Formed_on_OP_Daily',
 'Portfolios_Formed_on_INV',
 'Portfolios_Formed_on_INV_Wout_Div',
 'Portfolios_Formed_on_INV_Daily',
 '6_Portfolios_2x3',
 '6_Portfolios_2x3_Wout_Div',
 '6_Portfolios_2x3_weekly',
 '6_Portfolios_2x3_daily',
 '25_Portfolios_5x5',
 '25_Portfolios_5x5_Wout_Div',
 '25_Portfolios_5x5_Daily',
 '100_Portfolios_10x10',
 '100_Portfolios_10x10_Wout_Div',
 '100_Portfolios_10x10_Daily',
 '6_Portfolios_ME_OP_2x3',
 '6_Portfolios_ME_OP_2x3_Wout_Div',
 '6_Portfolios_ME_OP_2x3_daily',
 '25_Portfolios_ME_OP_5x5',
 '25_Portf

# [q-Factors](https://www.tidy-finance.org/python/accessing-and-managing-financial-data.html#q-factors)

In [54]:
factors_q_monthly_link = (
  "https://global-q.org/uploads/1/2/2/6/122679606/q5_factors_monthly_2023.csv"
)
factors_q_monthly = (pd.read_csv(factors_q_monthly_link)
    .assign(
        month=lambda x: (
            pd.to_datetime(x["year"].astype(str) + "-" +
                x["month"].astype(str) + "-01")
        )
    )
    .drop(columns=["R_F", "R_MKT", "year"])
    .rename(columns=lambda x: x.replace("R_", "").lower())
    .query(f"month >= '{start_date}' and month <= '{end_date}'")
    .assign(
        **{col: lambda x: x[col]/100 for col in ["me", "ia", "roe", "eg"]}
    )
)

# [Macroeconomic Predictors](https://www.tidy-finance.org/python/accessing-and-managing-financial-data.html#macroeconomic-predictors)

In [55]:
sheet_id = "1g4LOaRj4TvwJr9RIaA_nwrXXWTOy46bP"
sheet_name = "macro_predictors.xlsx"
macro_predictors_link = (
  f"https://docs.google.com/spreadsheets/d/{sheet_id}" 
  f"/gviz/tq?tqx=out:csv&sheet={sheet_name}"
)

In [56]:
from pandas import DataFrame


macro_predictors: DataFrame = (
    pd.read_csv(macro_predictors_link, thousands=",")
    .assign(
        month=lambda x: pd.to_datetime(x["yyyymm"], format="%Y%m"),
        dp=lambda x: np.log(x["D12"])-np.log(x["Index"]),
        dy=lambda x: np.log(x["D12"])-np.log(x["D12"].shift(1)),
        ep=lambda x: np.log(x["E12"])-np.log(x["Index"]),
        de=lambda x: np.log(x["D12"])-np.log(x["E12"]),
        tms=lambda x: x["lty"]-x["tbl"],
        dfy=lambda x: x["BAA"]-x["AAA"]
    )
    .rename(columns={"b/m": "bm"})
    .get(["month", "dp", "dy", "ep", "de", "svar", "bm",
          "ntis", "tbl", "lty", "ltr", "tms", "dfy", "infl"])
    .query("month >= @start_date and month <= @end_date")
    .dropna()
)

# [Other Macroeconomic Data](https://www.tidy-finance.org/python/accessing-and-managing-financial-data.html#other-macroeconomic-data)

In [58]:
cpi_monthly = (pdr.DataReader(
    name="CPIAUCNS",
    data_source="fred",
    start=start_date,
    end=end_date
    )
    .reset_index(names="month")
    .rename(columns={"CPIAUCNS": "cpi"})
    .assign(cpi=lambda x: x["cpi"]/x["cpi"].iloc[-1])
)