# wrds_crspm

> WRDS CRSP Monthly Stock File

In [None]:
#| default_exp wrds_crspm

In [None]:
#|exports
from __future__ import annotations
from typing import List, Dict, Tuple, Callable
import os

import pandas as pd
import numpy as np
import wrds

import pandasmore as pdm
from finsets import wrds_utils, wrds_links

In [None]:
#| export
def default_raw_vars():
    """Default variables used in `download` if none are specified. Takes about 2 min to download."""
    return ['permno','permco','date',
            'ret', 'retx', 'shrout', 'prc', 
            'shrcd', 'exchcd','siccd','ticker','cusip','ncusip']            

In [None]:
print(default_raw_vars())

['permno', 'permco', 'date', 'ret', 'retx', 'shrout', 'prc', 'shrcd', 'exchcd', 'siccd', 'ticker', 'cusip', 'ncusip']


In [None]:
#| export
def download(vars: List[str]=None, # If None, downloads `default_raw_vars`; else `permno`, `permco`, and `date` are added by default
             wrds_username: str=None, #If None, looks for WRDS_USERNAME with `os.getenv`, then prompts you if needed
             start_date: str="01/01/1900", # Start date in MM/DD/YYYY format
             end_date: str=None # End date. Default is current date             
             ) -> pd.DataFrame:
    """Downloads `vars` from `start_date` to `end_date` from WRDS crsp.msf and crsp.msenames libraries"""

    if wrds_username is None:
        wrds_username = os.getenv('WRDS_USERNAME')
        if wrds_username is None: wrds_username = input("Enter your WRDS username: ") 

    if vars is None: vars = default_raw_vars()
    vars = ['permno','permco','date'] + [x for x in vars if x not in ['permno','permco','date']]

    # Figure out which `vars` come from the `msf` table and which come from the `msenames` table and add a. and b. prefixes
    db = wrds.Connection(wrds_username = wrds_username)
    try:
        all_msf_vars = list(db.describe_table('crsp','msf').name)
        all_mse_vars = list(db.describe_table('crsp','msenames').name)
        my_msf_vars = [f'a.{x}' for x in vars if x in all_msf_vars]
        my_mse_vars = [f'b.{x}' for x in vars if (x in all_mse_vars) and (x not in all_msf_vars)]
        varlist_string = ','.join(my_msf_vars + my_mse_vars)
    except:
        raise RuntimeError("Something went wrong with a WRDS database connection")
    finally: db.close()

    sql_string = f"""SELECT {varlist_string} 
                        FROM crsp.msf AS a 
                        LEFT JOIN crsp.msenames AS b
                            ON a.permno=b.permno AND b.namedt<=a.date AND a.date<=b.nameendt
                            WHERE a.date BETWEEN '{start_date}' AND COALESCE(%(end)s, CURRENT_DATE) 
                """
    return wrds_utils.download(sql_string, wrds_username=wrds_username, params={'end':end_date})

In [None]:
#| eval: false
raw = download(start_date='01/01/2022')

Loading library list...
Done
Approximately 4922867 rows in crsp.msf.
Approximately 111623 rows in crsp.msenames.
Loading library list...
Done


In [None]:
#| eval: false
raw

Unnamed: 0,permno,permco,date,ret,retx,shrout,prc,cusip,shrcd,exchcd,siccd,ticker,ncusip
0,10026.0,7976.0,2022-01-31,-0.039694,-0.039694,19110.0,151.690002,46603210,11.0,3.0,2052.0,JJSF,46603210
1,10028.0,7978.0,2022-01-31,-0.024570,-0.024570,26925.0,3.970000,29402E10,11.0,2.0,5094.0,ELA,29402E10
2,10032.0,7980.0,2022-01-31,-0.191574,-0.191574,27997.0,77.519997,72913210,11.0,3.0,3670.0,PLXS,72913210
3,10044.0,7992.0,2022-01-31,0.007898,0.007898,6180.0,7.912000,77467X10,11.0,3.0,2060.0,RMCF,77467X10
4,10051.0,7999.0,2022-01-31,0.000000,0.000000,38709.0,18.129999,41043F20,11.0,1.0,4813.0,HNGR,41043F20
...,...,...,...,...,...,...,...,...,...,...,...,...,...
115613,93426.0,53443.0,2022-12-30,-0.047326,-0.047326,12551.0,38.650002,92835K10,11.0,1.0,3676.0,VPG,92835K10
115614,93427.0,53445.0,2022-12-30,-0.038903,-0.038903,36587.0,128.220001,G3323L10,12.0,1.0,3827.0,FN,G3323L10
115615,93429.0,53447.0,2022-12-30,-0.010801,-0.010801,106082.0,125.470001,12503M10,11.0,5.0,6211.0,CBOE,12503M10
115616,93434.0,53427.0,2022-12-30,0.342342,0.342342,42623.0,1.490000,78513510,11.0,3.0,9999.0,SANW,78513510


In [None]:
#| export
def clean(df: pd.DataFrame=None, # If None, downloads `vars` using `download` function; else, must contain `permno` and `date` columns
          vars: List[str]=None, # If None, downloads `default_raw_vars`
          wrds_username: str=None, #If None, looks for WRDS_USERNAME with `os.getenv`, then prompts you if needed
          start_date: str="01/01/1900", # Start date in MM/DD/YYYY format
          end_date: str=None, # End date. Default is current date          
          clean_kwargs: dict={}, # Params to pass to `pdm.setup_panel` other than `panel_ids`, `time_var`, and `freq`
          ) -> pd.DataFrame:
    """Applies `pandasmore.setup_panel` to `df`. If `df` is None, downloads `vars` using `download` function."""

    if df is None: df = download(vars=vars, wrds_username=wrds_username, start_date=start_date, end_date=end_date)
    df = pdm.setup_panel(df, panel_ids='permno', time_var='date', freq='M', **clean_kwargs)
    return df 

In [None]:
#| eval: false
df = clean(df=raw)

In [None]:
#| eval: false
df.head(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,date,dtdate,permco,ret,retx,shrout,prc,cusip,shrcd,exchcd,siccd,ticker,ncusip
permno,Mdate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1


In [None]:
#| eval: false
df = clean(vars=['ret','shrcd','exchcd'], start_date='01/01/2020', end_date='12/31/2020')

Loading library list...
Done
Approximately 4922867 rows in crsp.msf.
Approximately 111623 rows in crsp.msenames.
Loading library list...
Done


In [None]:
#| eval: false
df.head(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,date,dtdate,permco,ret,shrcd,exchcd
permno,Mdate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()