In [19]:
import pandas as pd
import numpy as np
import sqlite3
import statsmodels.formula.api as smf
import wrds
from regtabletotext import prettify_result
from dotenv import load_dotenv

from plotnine import *
from mizani.formatters import comma_format, percent_format
from datetime import datetime

In [20]:
from sqlalchemy import create_engine
import os

load_dotenv()

connection_string = (
  "postgresql+psycopg2://"
 f"{os.getenv('WRDS_USER')}:{os.getenv('WRDS_PASSWORD')}"
  "@wrds-pgdata.wharton.upenn.edu:9737/wrds"
)

wrds = create_engine(connection_string, pool_pre_ping=True)

In [21]:
start_date = "1960-01-01"
end_date = "2023-12-31"

In [22]:
import pandas_datareader as pdr

In [23]:
factors_ff3_monthly_raw = pdr.DataReader(
  name="F-F_Research_Data_Factors",
  data_source="famafrench", 
  start=start_date, 
  end=end_date)[0]

factors_ff3_monthly = (factors_ff3_monthly_raw
  .divide(100)
  .reset_index(names="date")
  .assign(date=lambda x: pd.to_datetime(x["date"].astype(str)))
  .rename(str.lower, axis="columns")
  .rename(columns={"mkt-rf": "mkt_excess"})
)



In [24]:
factors_ff5_monthly_raw = pdr.DataReader(
  name="F-F_Research_Data_5_Factors_2x3",
  data_source="famafrench", 
  start=start_date, 
  end=end_date)[0]

factors_ff5_monthly = (factors_ff5_monthly_raw
  .divide(100)
  .reset_index(names="date")
  .assign(date=lambda x: pd.to_datetime(x["date"].astype(str)))
  .rename(str.lower, axis="columns")
  .rename(columns={"mkt-rf": "mkt_excess"})
)



In [25]:
factors_ff3_daily_raw = pdr.DataReader(
  name="F-F_Research_Data_Factors_daily",
  data_source="famafrench", 
  start=start_date, 
  end=end_date)[0]

factors_ff3_daily = (factors_ff3_daily_raw
  .divide(100)
  .reset_index(names="date")
  .rename(str.lower, axis="columns")
  .rename(columns={"mkt-rf": "mkt_excess"})
)



In [26]:
industries_ff_monthly_raw = pdr.DataReader(
  name="10_Industry_Portfolios",
  data_source="famafrench", 
  start=start_date, 
  end=end_date)[0]

industries_ff_monthly = (industries_ff_monthly_raw
  .divide(100)
  .reset_index(names="date")
  .assign(date=lambda x: pd.to_datetime(x["date"].astype(str)))
  .rename(str.lower, axis="columns")
)



In [27]:
sheet_id = "1bM7vCWd3WOt95Sf9qjLPZjoiafgF_8EG"
sheet_name = "macro_predictors.xlsx"
macro_predictors_link = (
  f"https://docs.google.com/spreadsheets/d/{sheet_id}" 
  f"/gviz/tq?tqx=out:csv&sheet={sheet_name}"
)

In [28]:
macro_predictors = (
  pd.read_csv(macro_predictors_link, thousands=",")
  .assign(
    date=lambda x: pd.to_datetime(x["yyyymm"], format="%Y%m"),
    dp=lambda x: np.log(x["D12"])-np.log(x["Index"]),
    dy=lambda x: np.log(x["D12"])-np.log(x["Index"].shift(1)),
    ep=lambda x: np.log(x["E12"])-np.log(x["Index"]),
    de=lambda x: np.log(x["D12"])-np.log(x["E12"]),
    tms=lambda x: x["lty"]-x["tbl"],
    dfy=lambda x: x["BAA"]-x["AAA"]
  )
  .rename(columns={"b/m": "bm"})
  .get(["date", "dp", "dy", "ep", "de", "svar", "bm", 
        "ntis", "tbl", "lty", "ltr", "tms", "dfy", "infl"])
  .query("date >= @start_date and date <= @end_date")
  .dropna()
)

In [29]:
cpi_monthly = (pdr.DataReader(
    name="CPIAUCNS", 
    data_source="fred", 
    start=start_date, 
    end=end_date
  )
  .reset_index(names="date")
  .rename(columns={"CPIAUCNS": "cpi"})
  .assign(cpi=lambda x: x["cpi"]/x["cpi"].iloc[-1])
)

In [30]:
import sqlite3
tidy_finance = sqlite3.connect(database="data/db.sqlite")

In [31]:
(factors_ff3_monthly
  .to_sql(name="factors_ff3_monthly", 
          con=tidy_finance, 
          if_exists="replace",
          index=False)
)

768

In [32]:
pd.read_sql_query(
  sql="SELECT date, rf FROM factors_ff3_monthly",
  con=tidy_finance,
  parse_dates={"date"}
)

Unnamed: 0,date,rf
0,1960-01-01,0.0033
1,1960-02-01,0.0029
2,1960-03-01,0.0035
3,1960-04-01,0.0019
4,1960-05-01,0.0027
...,...,...
763,2023-08-01,0.0045
764,2023-09-01,0.0043
765,2023-10-01,0.0047
766,2023-11-01,0.0044


In [33]:
data_dict = {
  "factors_ff5_monthly": factors_ff5_monthly,
  "factors_ff3_daily": factors_ff3_daily,
  "industries_ff_monthly": industries_ff_monthly,
  "macro_predictors": macro_predictors,
  "cpi_monthly": cpi_monthly
}

for key, value in data_dict.items():
    value.to_sql(name=key,
                 con=tidy_finance, 
                 if_exists="replace",
                 index=False)

In [34]:
tidy_finance.execute("VACUUM")

<sqlite3.Cursor at 0x1f9810e60c0>

In [35]:
tidy_finance.close()