In [1]:
import pandas as pd
import numpy as np
import sqlite3
import statsmodels.formula.api as smf
import wrds
from regtabletotext import prettify_result
from dotenv import load_dotenv

from plotnine import *
from mizani.formatters import comma_format, percent_format
from datetime import datetime

In [2]:
from sqlalchemy import create_engine
import os

load_dotenv()

connection_string = (
  "postgresql+psycopg2://"
 f"{os.getenv('WRDS_USER')}:{os.getenv('WRDS_PASSWORD')}"
  "@wrds-pgdata.wharton.upenn.edu:9737/wrds"
)

wrds = create_engine(connection_string, pool_pre_ping=True)

In [3]:
start_date = "01/01/2020"
end_date = "12/31/2022"

In [11]:
crsp_monthly_query = (
  "SELECT msf.permno, date_trunc('day', msf.dlycaldt)::date AS date, "
         "msf.dlyret AS ret, msf.shrout, msf.dlyprc AS altprc, ssih.ticker, "
         "ssih.primaryexch, ssih.siccd "
    "FROM crsp.dsf_v2 AS msf "
    "INNER JOIN crsp.stksecurityinfohist AS ssih "
    "ON msf.permno = ssih.permno AND "
       "ssih.secinfostartdt <= msf.dlycaldt AND "
       "msf.dlycaldt <= ssih.secinfoenddt "
   f"WHERE msf.dlycaldt BETWEEN '{start_date}' AND '{end_date}' "
          "AND ssih.sharetype = 'NS' "
          "AND ssih.securitytype = 'EQTY' "  
          "AND ssih.securitysubtype = 'COM' " 
          "AND ssih.usincflg = 'Y' " 
          "AND ssih.issuertype in ('ACOR', 'CORP') " 
          "AND ssih.primaryexch in ('N', 'A', 'Q') "
          "AND ssih.conditionaltype in ('RW', 'NW') "
          "AND ssih.tradingstatusflg = 'A'"
)

crsp_monthly = (pd.read_sql_query(
    sql=crsp_monthly_query,
    con=wrds,
    dtype={"permno": int, "siccd": int},
    parse_dates={"date"})
  .assign(shrout=lambda x: x["shrout"]*1000)
)

In [12]:
crsp_monthly = (crsp_monthly
  .assign(mktcap=lambda x: x["shrout"]*x["altprc"]/1000000)
  .assign(mktcap=lambda x: x["mktcap"].replace(0, np.nan))
)

In [13]:
crsp_monthly

Unnamed: 0,permno,date,ret,shrout,altprc,ticker,primaryexch,siccd,mktcap
0,10032,2020-01-02,0.003769,29247000,77.23,PLXS,Q,3670,2258.74581
1,10026,2020-01-02,-0.014056,18900000,181.68,JJSF,Q,2052,3433.75200
2,10026,2020-01-03,0.017779,18900000,184.91,JJSF,Q,2052,3494.79900
3,10026,2020-01-06,0.000865,18900000,185.07,JJSF,Q,2052,3497.82300
4,10026,2020-01-07,-0.011023,18900000,183.03,JJSF,Q,2052,3459.26700
...,...,...,...,...,...,...,...,...,...
3051492,93436,2022-12-23,-0.017551,3157752000,123.15,TSLA,Q,9999,388877.15880
3051493,93436,2022-12-27,-0.114089,3157752000,109.10,TSLA,Q,9999,344510.74320
3051494,93436,2022-12-28,0.033089,3157752000,112.71,TSLA,Q,9999,355910.22792
3051495,93436,2022-12-29,0.080827,3157752000,121.82,TSLA,Q,9999,384677.34864


In [14]:
tidy_finance = sqlite3.connect(database="data/crsp.db.sqlite")
(crsp_monthly
  .to_sql(name="crsp_daily", 
          con=tidy_finance, 
          if_exists="replace",
          index=False)
)

3051497