# wrds_compa

> WRDS Compustat Annual

In [None]:
#| default_exp wrds_compa

In [None]:
#|exports
from __future__ import annotations
from typing import List, Dict, Tuple, Callable
import os

import pandas as pd
import numpy as np
import wrds 

import pandasmore as pdm
from finsets import wrds_utils, wrds_links

In [None]:
#| export
def common_raw_vars():
    return ['datadate', 'gvkey', 'cusip' ,'cik' ,'tic' ,'fyear' ,'fyr' ,'naicsh', 'sich' ,'exchg',  
            'lt' ,'at' ,'txditc' ,'pstkl' ,'pstkrv' ,'pstk' ,'csho' ,'ajex' , 'rdip',
            'act' ,'dvc' ,'xad','seq' ,'che' ,'lct' ,'dlc' ,'ib' ,'dvp' ,'txdi' ,'dp' ,
            'txp' ,'oancf' ,'ivncf' ,'fincf' ,'dltt' ,'mib','ceq' ,'invt' ,'cogs' , 'revt',
            'sale' ,'capx' ,'xrd' ,'txdb' ,'prcc_f' ,'sstk' ,'prstkc' ,'dltis' ,'dltr' ,'emp' ,
            'dd1' ,'ppegt' ,'ppent' ,'xint' ,'txt' ,'sppe' ,'gdwl' ,'xrent' ,'re' ,'dvpsx_f' ,
            'tstk' ,'wcap' ,'rect' ,'xsga' ,'aqc' ,'oibdp' ,'dpact' ,'fic' ,'ni' ,'ivao' ,'ivst' ,
            'dv' , 'intan' ,'pi' ,'txfo' ,'pifo' ,'xpp' ,'drc' ,'drlt' ,'ap' ,'xacc' ,'itcb']             

In [None]:
print(common_raw_vars())

['datadate', 'gvkey', 'cusip', 'cik', 'tic', 'fyear', 'fyr', 'naicsh', 'sich', 'exchg', 'lt', 'at', 'txditc', 'pstkl', 'pstkrv', 'pstk', 'csho', 'ajex', 'rdip', 'act', 'dvc', 'xad', 'seq', 'che', 'lct', 'dlc', 'ib', 'dvp', 'txdi', 'dp', 'txp', 'oancf', 'ivncf', 'fincf', 'dltt', 'mib', 'ceq', 'invt', 'cogs', 'revt', 'sale', 'capx', 'xrd', 'txdb', 'prcc_f', 'sstk', 'prstkc', 'dltis', 'dltr', 'emp', 'dd1', 'ppegt', 'ppent', 'xint', 'txt', 'sppe', 'gdwl', 'xrent', 're', 'dvpsx_f', 'tstk', 'wcap', 'rect', 'xsga', 'aqc', 'oibdp', 'dpact', 'fic', 'ni', 'ivao', 'ivst', 'dv', 'intan', 'pi', 'txfo', 'pifo', 'xpp', 'drc', 'drlt', 'ap', 'xacc', 'itcb']


In [None]:
#|export
def download(vars: List[str]=common_raw_vars(),
             library: str='comp.funda', # WRDS Compustat library (must start with 'comp.')
             wrds_username: str=None, #If None, looks for WRDS_USERNAME with `os.getenv`, then prompts you if needed
             ) -> pd.DataFrame:
    """Downloads `vars` from WRDS `library`"""

    always_get_these = ['datadate', 'gvkey']
    vars = always_get_these + [x for x in vars if x not in always_get_these]
    sql_string=f"""select {','.join(vars)} from {library}
                        where indfmt='INDL' and datafmt='STD' and popsrc='D' and consol='C'"""
    
    return wrds_utils.download(sql_string, wrds_username)

In [None]:
#| eval: false
raw = download(vars=['at'])

Loading library list...
Done


In [None]:
#| eval: false
#| hide
raw.head()

Unnamed: 0,datadate,gvkey,at
0,1961-12-31,1000,
1,1962-12-31,1000,
2,1963-12-31,1000,
3,1964-12-31,1000,1.416
4,1965-12-31,1000,2.31


In [None]:
#| export
def clean(df: pd.DataFrame, # If None, downloads the entire comp.funda dataset
          ) -> pd.DataFrame:
    
    if df is None: df = download()
    df = wrds_links.merge_permno_into_gvkey(dset_using_gvkey=df)
    df = pdm.setup_panel(df, panel_ids='permno', time_var='datadate', freq='Y')
    return df 

In [None]:
#| eval: false
df = clean(raw)

Loading library list...
Done


In [None]:
#| eval: false
#| hide
df 

Unnamed: 0_level_0,Unnamed: 1_level_0,datadate,dtdate,gvkey,at
permno,Ydate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
10000,1986,1986-10-31,1986-10-31,013007,2.115
10001,1986,1986-06-30,1986-06-30,012994,12.242
10001,1987,1987-06-30,1987-06-30,012994,11.771
10001,1988,1988-06-30,1988-06-30,012994,11.735
10001,1989,1989-06-30,1989-06-30,012994,18.565
...,...,...,...,...,...
93436,2018,2018-12-31,2018-12-31,184996,29739.614
93436,2019,2019-12-31,2019-12-31,184996,34309.000
93436,2020,2020-12-31,2020-12-31,184996,52148.000
93436,2021,2021-12-31,2021-12-31,184996,62131.000


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()