In [6]:
import sqlite3
import pandas as pd
import numpy as np
import wrds

In [10]:
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv
load_dotenv()

connection_string = (
  "postgresql+psycopg2://"
 f"{os.getenv('WRDS_USER')}:{os.getenv('WRDS_PASSWORD')}"
  "@wrds-pgdata.wharton.upenn.edu:9737/wrds"
)

wrds = create_engine(connection_string, pool_pre_ping=True)

In [8]:
start_date = "2019-01-01"
end_date = "2022-12-31"

In [17]:
compustat_query = (
  "SELECT gvkey, datadate, saleq, cshoq, ceqq, ibq, prccq, rdq, tic, dvy, fyr, fqtr, fyearq "
    "FROM comp.fundq "
    "WHERE indfmt = 'INDL' "
          "AND datafmt = 'STD' "
          "AND consol = 'C' "
          "AND curcdq = 'USD' "
          "AND popsrc = 'D' "
         f"AND datadate BETWEEN '{start_date}' AND '{end_date}'"
)

compustat = pd.read_sql_query(
  sql=compustat_query,
  con=wrds,
  dtype={"gvkey": str},
  parse_dates={"datadate"}
)

In [18]:
compustat = (compustat
  .assign(year=lambda x: pd.DatetimeIndex(x["datadate"]).year)
  .sort_values("datadate")
  .reset_index()
)

tidy_finance = sqlite3.connect('data/compustat.db.sqlite')

(compustat
  .to_sql(name="compustatq", 
          con=tidy_finance, 
          if_exists="replace",
          index=False)
)

tidy_finance.close()