# wrds_utils

> Common utilities needed to download and process data from WRDS

In [None]:
#| default_exp wrds_utils

In [None]:
#| exports
from typing import Sequence
import os, glob 

import pandas as pd
from finsets import wrds2 as wrds  

## Examples of useful features of `wrds` library

In [None]:
db = wrds.Connection(wrds_username=os.getenv('WRDS_USERNAME'))

Loading library list...
Done


In [None]:
db.list_libraries()[:5]

['aha_sample', 'ahasamp', 'audit', 'audit_acct_os', 'audit_audit_comp']

In [None]:
db.list_tables(library='crsp')[:5]

['acti', 'asia', 'asib', 'asic', 'asio']

In [None]:
db.describe_table(library='comp',table='funda').iloc[:5]

Approximately 879534 rows in comp.funda.


Unnamed: 0,name,nullable,type,comment
0,gvkey,True,VARCHAR(6),
1,datadate,True,DATE,
2,fyear,True,DOUBLE_PRECISION,
3,indfmt,True,VARCHAR(12),
4,consol,True,VARCHAR(2),


In [None]:
db.get_table(library='ff', 
             table='factors_monthly',
             columns=['date','mktrf','smb','hml','rf'],
             obs=5)

Unnamed: 0,date,mktrf,smb,hml,rf
0,1926-07-01,0.0296,-0.0256,-0.0243,0.0022
1,1926-08-01,0.0264,-0.0117,0.0382,0.0025
2,1926-09-01,0.0036,-0.014,0.0013,0.0023
3,1926-10-01,-0.0324,-0.0009,0.007,0.0032
4,1926-11-01,0.0253,-0.001,-0.0051,0.0031


In [None]:
db.close()

## Key functions used throughout the `wrds_` modules in this package

In [None]:
#| export
def download(sql_string: str=None,
             wrds_username: str=None, #If None, looks for WRDS_USERNAME with `os.getenv`; prompts you if it can't find it
             params: Sequence=None # Params cited in the `sql_string`
             ) -> pd.DataFrame:
    """Downloads data from WRDS using the given PostgreSQL `sql_string`"""

    if wrds_username is None:
        wrds_username = os.getenv('WRDS_USERNAME')
        if wrds_username is None: wrds_username = input("Enter your WRDS username: ") 

    with wrds.Connection(wrds_username = wrds_username) as db:
        return db.raw_sql(sql=sql_string, params=params)

This function will prompt the user for their WRDS password, unless a `pgpass` file is set up.

In [None]:
download("SELECT * from ff.factors_monthly")

Loading library list...
Done


Unnamed: 0,date,mktrf,smb,hml,rf,year,month,umd,dateff
0,1926-07-01,0.0296,-0.0256,-0.0243,0.0022,1926.0,7.0,,1926-07-31
1,1926-08-01,0.0264,-0.0117,0.0382,0.0025,1926.0,8.0,,1926-08-31
2,1926-09-01,0.0036,-0.0140,0.0013,0.0023,1926.0,9.0,,1926-09-30
3,1926-10-01,-0.0324,-0.0009,0.0070,0.0032,1926.0,10.0,,1926-10-30
4,1926-11-01,0.0253,-0.0010,-0.0051,0.0031,1926.0,11.0,,1926-11-30
...,...,...,...,...,...,...,...,...,...
1158,2023-01-01,0.0665,0.0502,-0.0405,0.0035,2023.0,1.0,-0.1598,2023-01-31
1159,2023-02-01,-0.0258,0.0121,-0.0078,0.0034,2023.0,2.0,0.0021,2023-02-28
1160,2023-03-01,0.0251,-0.0559,-0.0901,0.0036,2023.0,3.0,-0.0250,2023-03-31
1161,2023-04-01,0.0061,-0.0334,-0.0003,0.0035,2023.0,4.0,0.0165,2023-04-28


In [None]:
#| hide 
for f in glob.glob('../data/*'): os.remove(f)
with open('../data/.gitkeep', 'w') as f: pass 

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()