In [2]:
import pandas as pd
import numpy as np
import sqlite3
import statsmodels.formula.api as smf
import wrds
from regtabletotext import prettify_result
from dotenv import load_dotenv

from plotnine import *
from mizani.formatters import comma_format, percent_format
from datetime import datetime

In [3]:
from sqlalchemy import create_engine
import os

load_dotenv()

connection_string = (
  "postgresql+psycopg2://"
 f"{os.getenv('WRDS_USER')}:{os.getenv('WRDS_PASSWORD')}"
  "@wrds-pgdata.wharton.upenn.edu:9737/wrds"
)

wrds = create_engine(connection_string, pool_pre_ping=True)

In [4]:
start_date = "01/01/1960"
end_date = "12/31/2023"

In [5]:
ccmxpf_linktable_query = (
  "SELECT lpermno AS permno, gvkey, linkdt, "
         "COALESCE(linkenddt, CURRENT_DATE) AS linkenddt "
    "FROM crsp.ccmxpf_linktable "
    "WHERE linktype IN ('LU', 'LC') "
          "AND linkprim IN ('P', 'C') "
          "AND usedflag = 1"
)

ccmxpf_linktable = pd.read_sql_query(
  sql=ccmxpf_linktable_query,
  con=wrds,
  dtype={"permno": int, "gvkey": str},
  parse_dates={"linkdt", "linkenddt"}
)


In [8]:
crsp_db = sqlite3.connect("data/crsp.db.sqlite")
crsp_monthly = pd.read_sql_query(
  "SELECT * FROM crsp_monthly",
  crsp_db,
  parse_dates=["date"]
)


ccm_links = (crsp_monthly
  .merge(ccmxpf_linktable, how="inner", on="permno")
  .query("~gvkey.isnull() & (date >= linkdt) & (date <= linkenddt)")
  .get(["permno", "gvkey", "date"])
)

crsp_monthly = (crsp_monthly
  .merge(ccm_links, how="left", on=["permno", "date"])
)

In [9]:
merged = sqlite3.connect("data/merged.db.sqlite")
(crsp_monthly
  .to_sql(name="crsp_monthly", 
          con=merged, 
          if_exists="replace",
          index=False)
)

3378303

In [10]:
crsp_db.close()
merged.close()