# wrds_compa

> Retrieve and process data from WRDS Compustat Annual

Since this is a proprietary dataset, in the documentation below, I can not show any of the data that is being retrieved/generated (will show only column names).

In [None]:
#| default_exp wrds.compa

In [None]:
#|exports
from __future__ import annotations
from pathlib import Path
from typing import List
import os

import pandas as pd
import numpy as np

import pandasmore as pdm
from finsets.wrds import wrds_api
from finsets import RESOURCES

In [None]:
#| export 
def raw_metadata(rawfile: str|Path=RESOURCES/'compa_variable_descriptions.csv', # location of the raw variable labels file
             ) -> pd.DataFrame:
    "Loads raw variable labels file, cleans it and returns it as a pd.DataFrame"

    df = pd.read_csv(rawfile)
    df['output_of'] = 'wrds.compa.clean'

    df['Variable Label'] = df.apply(lambda row: row['Description'].replace(row['Variable Name'].strip()+' -- ', ''), axis=1)
    df['Variable Label'] = df.apply(lambda row: row['Variable Label'].replace( '(' + row['Variable Name'].strip() + ')', ''), axis=1)
    df['Variable Name'] = df['Variable Name'].str.strip().str.lower()
    df = df[['Variable Name', 'Variable Label', 'output_of', 'Type']].copy()
    df.columns = ['name','label','output_of','type']
    return df

In [None]:
raw_metadata()

Unnamed: 0,name,label,output_of,type
0,gvkey,Global Company Key,wrds.compa.clean,string
1,conm,Company Name,wrds.compa.clean,string
2,tic,Ticker Symbol,wrds.compa.clean,string
3,cusip,CUSIP,wrds.compa.clean,string
4,cik,CIK Number,wrds.compa.clean,string
...,...,...,...,...
969,prch_c,Price High - Annual - Calendar,wrds.compa.clean,double
970,prch_f,Price High - Annual - Fiscal,wrds.compa.clean,double
971,prcl_c,Price Low - Annual - Calendar,wrds.compa.clean,double
972,prcl_f,Price Low - Annual - Fiscal,wrds.compa.clean,double


The following function gives more detailed metadata but requires connecting to WRDS. If all you want is variable names and labels, then `raw_metadata` is sufficient.

In [None]:
#| export
def raw_metadata_extra(wrds_username: str=None
             ) -> pd.DataFrame:
    "Collects metadata from WRDS `comp.funda` table and merges it with `variable_labels`."

    if wrds_username is None:
        wrds_username = os.getenv('WRDS_USERNAME')
        if wrds_username is None: wrds_username = input("Enter your WRDS username: ") 

    with wrds_api.Connection(wrds_username = wrds_username) as db:
        funda = db.describe_table('comp','funda')
        nr_rows = db.get_row_count('comp','funda')
        
    meta = funda[['name','type']].copy()
    meta['nr_rows'] = nr_rows
    meta['wrds_library'] = 'comp'
    meta['wrds_table'] = 'funda'

    meta = meta.merge(raw_metadata()[['name','label']], how='left', on='name')
    
    meta['output_of'] = 'wrds.compa.download'
    meta = pdm.order_columns(meta,these_first=['name','label','output_of'])
    for v in list(meta.columns):
        meta[v] = meta[v].astype('string')
    
    return meta

In [None]:
#| eval: false
raw_metadata_extra()

Loading library list...
Done
Approximately 879890 rows in comp.funda.


Unnamed: 0,name,label,output_of,type,nr_rows,wrds_library,wrds_table
0,gvkey,Global Company Key,wrds.compa.download,VARCHAR(6),879890,comp,funda
1,datadate,,wrds.compa.download,DATE,879890,comp,funda
2,fyear,Data Year - Fiscal,wrds.compa.download,DOUBLE_PRECISION,879890,comp,funda
3,indfmt,,wrds.compa.download,VARCHAR(12),879890,comp,funda
4,consol,,wrds.compa.download,VARCHAR(2),879890,comp,funda
...,...,...,...,...,...,...,...
943,au,Auditor,wrds.compa.download,VARCHAR(8),879890,comp,funda
944,auop,Auditor Opinion,wrds.compa.download,VARCHAR(8),879890,comp,funda
945,auopic,Auditor Opinion - Internal Control,wrds.compa.download,VARCHAR(1),879890,comp,funda
946,ceoso,Chief Executive Officer SOX Certification,wrds.compa.download,VARCHAR(1),879890,comp,funda


In [None]:
#| export
def default_raw_vars():
    """Default variables used in `download` if none are specified."""

    return ['datadate', 'gvkey', 'cusip' ,'cik' ,'tic' ,'fyear' ,'fyr' ,'naicsh', 'sich' ,'exchg',  
            'lt' ,'at' ,'txditc' ,'pstkl' ,'pstkrv' ,'pstk' ,'csho' ,'ajex' , 'rdip',
            'act' ,'dvc' ,'xad','seq' ,'che' ,'lct' ,'dlc' ,'ib' ,'dvp' ,'txdi' ,'dp' ,
            'txp' ,'oancf' ,'ivncf' ,'fincf' ,'dltt' ,'mib','ceq' ,'invt' ,'cogs' , 'revt',
            'sale' ,'capx' ,'xrd' ,'txdb' ,'prcc_f' ,'sstk' ,'prstkc' ,'dltis' ,'dltr' ,'emp' ,
            'dd1' ,'ppegt' ,'ppent' ,'xint' ,'txt' ,'sppe' ,'gdwl' ,'xrent' ,'re' ,'dvpsx_f' ,
            'tstk' ,'wcap' ,'rect' ,'xsga' ,'aqc' ,'oibdp' ,'dpact' ,'fic' ,'ni' ,'ivao' ,'ivst' ,
            'dv' , 'intan' ,'pi' ,'txfo' ,'pifo' ,'xpp' ,'drc' ,'drlt' ,'ap' ,'xacc' ,'itcb']             

In [None]:
print(default_raw_vars())

['datadate', 'gvkey', 'cusip', 'cik', 'tic', 'fyear', 'fyr', 'naicsh', 'sich', 'exchg', 'lt', 'at', 'txditc', 'pstkl', 'pstkrv', 'pstk', 'csho', 'ajex', 'rdip', 'act', 'dvc', 'xad', 'seq', 'che', 'lct', 'dlc', 'ib', 'dvp', 'txdi', 'dp', 'txp', 'oancf', 'ivncf', 'fincf', 'dltt', 'mib', 'ceq', 'invt', 'cogs', 'revt', 'sale', 'capx', 'xrd', 'txdb', 'prcc_f', 'sstk', 'prstkc', 'dltis', 'dltr', 'emp', 'dd1', 'ppegt', 'ppent', 'xint', 'txt', 'sppe', 'gdwl', 'xrent', 're', 'dvpsx_f', 'tstk', 'wcap', 'rect', 'xsga', 'aqc', 'oibdp', 'dpact', 'fic', 'ni', 'ivao', 'ivst', 'dv', 'intan', 'pi', 'txfo', 'pifo', 'xpp', 'drc', 'drlt', 'ap', 'xacc', 'itcb']


In [None]:
#| export
def download(vars: List[str]=None, # If None, downloads `default_raw_vars`; else `permno`, `permco`, and `date` are added by default
             wrds_username: str=None, #If None, looks for WRDS_USERNAME with `os.getenv`, then prompts you if needed
             start_date: str="01/01/1900", # Start date in MM/DD/YYYY format
             end_date: str=None #End date in MM/DD/YYYY format; if None, defaults to current date
             ) -> pd.DataFrame:
    """Downloads `vars` from `start_date` to `end_date` from WRDS `comp.funda` library and adds PERMNO and PERMCO as in CCM"""

    if vars is None: vars = default_raw_vars()
    vars = ','.join(['a.gvkey', 'a.datadate'] + 
                    [f'a.{x}' for x in vars if x not in ['datadate', 'gvkey']])

    sql_string=f"""SELECT b.lpermno as permno, b.lpermco as permco, b.liid as iid, {vars}
                    FROM comp.funda AS a
                    INNER JOIN crsp.ccmxpf_lnkhist AS b ON a.gvkey = b.gvkey
                    WHERE datadate BETWEEN b.linkdt AND COALESCE(b.linkenddt, CURRENT_DATE)
                            AND b.linktype IN ('LU','LC') AND b.linkprim IN ('P','C')
                            AND indfmt='INDL' AND datafmt='STD' AND popsrc='D' AND consol='C'
                            AND datadate BETWEEN '{start_date}' AND COALESCE(%(end)s, CURRENT_DATE)
                """
    return wrds_api.download(sql_string, wrds_username=wrds_username, params={'end':end_date})

In [None]:
#| eval: false
raw = download()

Loading library list...
Done


In [None]:
#| eval: false
raw.head(0)

Unnamed: 0,permno,permco,iid,gvkey,datadate,cusip,cik,tic,fyear,fyr,...,intan,pi,txfo,pifo,xpp,drc,drlt,ap,xacc,itcb


In [None]:
#| export
def clean(df: pd.DataFrame=None,        # If None, downloads `vars` using `download` function; else, must contain `permno` and `datadate` columns
          vars: List[str]=None,         # If None, downloads `default_raw_vars`
          wrds_username: str=None,      # If None, looks for WRDS_USERNAME with `os.getenv`, then prompts you if needed
          start_date: str="01/01/1900", # Start date in MM/DD/YYYY format
          end_date: str=None,           # End date. Default is current date          
          clean_kwargs: dict={},        # Params to pass to `pdm.setup_panel` other than `panel_ids`, `time_var`, and `freq`
          ) -> pd.DataFrame:
    """Applies `pandasmore.setup_panel` to `df`. If `df` is None, downloads `vars` using `download` function."""

    if df is None: df = download(vars=vars, wrds_username=wrds_username, start_date=start_date, end_date=end_date)
    df = pdm.setup_panel(df, panel_ids='permno', time_var='datadate', freq='Y', **clean_kwargs)
    return df 

In [None]:
#| eval: false
df = clean(raw)

In [None]:
#| eval: false
df.head(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,datadate,dtdate,permco,iid,gvkey,cusip,cik,tic,fyear,fyr,...,intan,pi,txfo,pifo,xpp,drc,drlt,ap,xacc,itcb
permno,Ydate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1


In [None]:
#| export
def book_equity(df: pd.DataFrame=None, # If None, downloads (and cleans) only required vars
                add_itcb=False,
                return_metadata: bool=False # If true, just returns metadata dictionary
                ) -> pd.DataFrame:

    metadata = {'inputs': {'wrds.compa.clean': ['at', 'lt', 'seq', 'ceq', 'txditc', 'pstk', 'pstkrv', 'pstkl', 'itcb']},
                'outputs': ['bookeq','shreq','pref_stock'],
                'labels': {'bookeq': 'Book equity', 'shreq': 'Shareholder equity', 'pref_stock': 'Preferred stock'}
    }      
    if return_metadata: return metadata

    reqs = metadata['inputs']['wrds.compa.clean']
    if df is None: df = clean(vars=reqs)
    df = df[reqs].copy()

    df['pstk'] = df['pstk'].fillna(0)
    df['pref_stock'] = np.where(df['pstkrv'].isnull(), df['pstkl'], df['pstkrv'])
    df['pref_stock'] = np.where(df['pref_stock'].isnull(),df['pstk'], df['pref_stock'])

    df['shreq'] = np.where(df['seq'].isnull(), df['ceq'] + df['pstk'], df['seq'])
    df['shreq'] = np.where(df['shreq'].isnull(), df['at'] - df['lt'], df['shreq'])

    df['bookeq'] = df['shreq'] + df['txditc'].fillna(0) - df['pref_stock']
    if add_itcb: df['bookeq'] = df['bookeq'] + df['itcb'].fillna(0)
    
    return df[metadata['outputs']].copy()

In [None]:
#| eval: false
book_equity(return_metadata=True)

{'inputs': {'wrds.compa.clean': ['at',
   'lt',
   'seq',
   'ceq',
   'txditc',
   'pstk',
   'pstkrv',
   'pstkl',
   'itcb']},
 'outputs': ['bookeq', 'shreq', 'pref_stock'],
 'labels': {'bookeq': 'Book equity',
  'shreq': 'Shareholder equity',
  'pref_stock': 'Preferred stock'}}

In [None]:
#| eval: false
beq = book_equity(df)

In [None]:
#| eval: false
beq.head(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,bookeq,shreq,pref_stock
permno,Ydate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [None]:
#| eval: false
pdm.wins(beq).describe()

Unnamed: 0,bookeq,shreq,pref_stock
count,298888.0,298888.0,322333.0
mean,920.346,856.861213,12.553519
std,3075.027908,2825.647508,60.847518
min,-84.10013,-72.84664,0.0
25%,15.03,15.344,0.0
50%,73.1975,73.105,0.0
75%,377.767,370.5255,0.0
max,23055.29042,21232.8491,477.1482


In [None]:
#| eval: false
beq_from_scratch = book_equity()

Loading library list...
Done


In [None]:
#| eval: false
beq_from_scratch.head(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,bookeq,shreq,pref_stock
permno,Ydate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [None]:
#| export 
def tobin_q(df: pd.DataFrame=None,      # If None, downloads (and cleans) only required vars
            return_metadata: bool=False # If True, just returns the metadata dictionary
            ) -> pd.DataFrame:

    metadata = {'inputs': {'wrds.compa.clean': ['at', 'lt', 'seq', 'ceq', 'txditc', 'pstk', 'pstkrv', 'pstkl', 'itcb','prcc_f','csho']},
                'outputs':  ['tobinq'],
                'labels': {'tobinq': 'Tobin Q'}
    }      
    if return_metadata: return metadata

    reqs = metadata['inputs']['wrds.compa.clean']
    if df is None: df = clean(vars=reqs)
    df = df[reqs].copy()

    beq = book_equity(df)[['bookeq']].copy()
    df = df.join(beq)

    df['tobinq'] = (df['at'] - df['bookeq'] + df['prcc_f'] * df['csho']) / df['at']
    df = df.replace([np.inf, -np.inf], np.nan)
    return  df[metadata['outputs']].copy()

In [None]:
#| eval: false
tobin_q(return_metadata=True)

{'inputs': {'wrds.compa.clean': ['at',
   'lt',
   'seq',
   'ceq',
   'txditc',
   'pstk',
   'pstkrv',
   'pstkl',
   'itcb',
   'prcc_f',
   'csho']},
 'outputs': ['tobinq'],
 'labels': {'tobinq': 'Tobin Q'}}

In [None]:
#| eval: false
q = tobin_q(df)

In [None]:
#| eval: false
q.head(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,tobinq
permno,Ydate,Unnamed: 2_level_1


In [None]:
#| eval: false
pdm.wins(q).describe()

count    287918.000000
mean          1.839927
std           1.684207
min           0.544699
25%           0.992951
50%           1.242048
75%           1.948118
max          11.301507
Name: tobinq, dtype: float64

In [None]:
#| export
def issuance_vars(df: pd.DataFrame=None,        # If None, downloads (and cleans) only required vars
                    return_metadata: bool=False # If True, just returns the metadata dictionary
                    ) -> pd.DataFrame:

    metadata = {'inputs': {'wrds.compa.clean': ['at', 'lt', 'seq', 'ceq', 'txditc', 'pstk', 'pstkrv', 'pstkl', 'itcb',
                                                  'sstk','prstkc','dltis','dltr', 're', 'dlc','dltt']},
                'outputs': ['equityiss_tot','equityiss_cfs', 'debtiss_tot', 'debtiss_cfs', 'debtiss_bs'],
                'labels': {'equityiss_tot':'Equity issuance','equityiss_cfs':'Equity issuance', 
                           'debtiss_tot':'Debt issuance', 'debtiss_cfs':'Debt issuance', 'debtiss_bs':'Debt issuance'}
    }      
    if return_metadata: return metadata

    reqs = metadata['inputs']['wrds.compa.clean']
    if df is None: df = clean(vars=reqs)
    df = df[reqs].copy()

    beq = book_equity(df)[['bookeq']].copy()
    df = df.join(beq)
    
    df['lag_at'] = pdm.lag(df['at'])

    df['equityiss_cfs'] = (df['sstk'].fillna(0) - df['prstkc'].fillna(0)) / df['lag_at']
    df['debtiss_cfs'] = (df['dltis'].fillna(0) - df['dltr'].fillna(0)) / df['lag_at']

    df['debtiss_bs'] = (pdm.rdiff(df['dltt']) + pdm.rdiff(df['dlc'].fillna(0))) / df['lag_at']

    df['equityiss_tot'] = (pdm.rdiff(df['bookeq']) - pdm.rdiff(df['re'])) / df['lag_at']
    df['debtiss_tot'] = (pdm.rdiff(df['at']) - pdm.rdiff(df['bookeq'])) / df['lag_at']
    
    df = df.replace([np.inf, -np.inf], np.nan)
    return df[metadata['outputs']].copy()

In [None]:
#| eval: false
iss = issuance_vars(df)

In [None]:
#| eval: false
pdm.wins(iss).describe()

Unnamed: 0,equityiss_tot,equityiss_cfs,debtiss_tot,debtiss_cfs,debtiss_bs
count,260554.0,274122.0,271723.0,274122.0,272317.0
mean,0.085781,0.048186,0.082399,0.022402,0.039611
std,0.28961,0.207606,0.246443,0.115986,0.161918
min,-0.174221,-0.162377,-0.432127,-0.230464,-0.319204
25%,-0.000409,-4e-06,-0.022582,-0.011087,-0.017209
50%,0.006674,0.0,0.031693,0.0,0.0
75%,0.039231,0.006823,0.119413,0.018571,0.056469
max,1.996432,1.44313,1.450205,0.683632,0.928032


In [None]:
#| export 
def investment_vars(df: pd.DataFrame=None,      # If None, downloads (and cleans) only required vars
                    return_metadata: bool=False # If True, just returns the metadata dictionary
                    ) -> pd.DataFrame:

    metadata = {'inputs': {'wrds.compa.clean': ['ppent','capx','at']},
                'outputs': ['ppentpch','capx2la'],
                'labels': {'ppentpch':'Pct change in net PPE','capx2la': 'CAPX to lagged assets'}
    }      
    if return_metadata: return metadata

    reqs = metadata['inputs']['wrds.compa.clean']
    if df is None: df = clean(vars=reqs)
    df = df[reqs].copy()
    
    df['ppentpch'] = pdm.rpct_change(df['ppent'])
    df['capx2la'] = df['capx'] / pdm.lag(df['at'])
    df = df.replace([np.inf, -np.inf], np.nan)
    return df[metadata['outputs']].copy()

In [None]:
#| eval: false
inv = investment_vars(df)

In [None]:
#| eval: false
pdm.wins(inv).describe()

Unnamed: 0,ppentpch,capx2la
count,260899.0,255113.0
mean,0.19299,0.068316
std,0.63284,0.088292
min,-0.722275,0.0
25%,-0.045872,0.01407
50%,0.052463,0.040548
75%,0.214977,0.086128
max,4.278186,0.5281


In [None]:
#| export 
def profitability_vars(df: pd.DataFrame=None,      # If None, downloads (and cleans) only required vars
                        return_metadata: bool=False # If True, just returns the metadata dictionary
                        ) -> pd.DataFrame:

    metadata = {'inputs': {'wrds.compa.clean': ['ib','at']},
                'outputs': ['roa'],
                'labels': {'roa':'Return on assets'}
    }      
    if return_metadata: return metadata

    reqs = metadata['inputs']['wrds.compa.clean']
    if df is None: df = clean(vars=reqs)
    df = df[reqs].copy()

    df['roa'] = df['ib'] / df['at']
    df = df.replace([np.inf, -np.inf], np.nan)
    return df[metadata['outputs']].copy()

In [None]:
#| eval: false
prof = profitability_vars(df)

In [None]:
#| eval: false
pdm.wins(prof).describe()

count    300217.000000
mean         -0.031146
std           0.235988
min          -1.375179
25%          -0.014638
50%           0.027369
75%           0.066788
max           0.255064
Name: roa, dtype: float64

In [None]:
#| export 
def cashflow_vars(df: pd.DataFrame=None,      # If None, downloads (and cleans) only required vars
                        return_metadata: bool=False # If True, just returns the metadata dictionary
                        ) -> pd.DataFrame:

    metadata = {'inputs': {'wrds.compa.clean': ['dtdate','oancf','ib','dp','at']},
                'outputs': ['cflow2la_is', 'cflow2la_cfs', 'cflow2la_full'],
                'labels': {'cflow2la_is':'Operating cash flows to lagged assets', 
                           'cflow2la_cfs':'Operating cash flows to lagged assets', 
                           'cflow2la_full':'Operating cash flows to lagged assets'}
    }      
    if return_metadata: return metadata

    reqs = metadata['inputs']['wrds.compa.clean']
    if df is None: df = clean(vars=reqs)
    df = df[reqs].copy()
    
    df['cflow2la_is'] = (df['ib']+df['dp']) / pdm.lag(df['at'])
    df['cflow2la_cfs'] = df['oancf'] / pdm.lag(df['at'])
    df['cflow2la_full'] = np.where(df.dtdate.dt.year<1987, df['cflow2la_is'], df['cflow2la_cfs'])

    df = df.replace([np.inf, -np.inf], np.nan)    
    return df[metadata['outputs']].copy()

In [None]:
#| eval: false
cflow = cashflow_vars(df)

In [None]:
#| eval: false
pdm.wins(cflow).describe()

Unnamed: 0,cflow2la_is,cflow2la_cfs,cflow2la_full
count,261278.0,178780.0,254169.0
mean,0.03156,0.025371,0.044868
std,0.211337,0.208676,0.183586
min,-1.058468,-0.984805,-0.861444
25%,0.010214,-0.00229,0.012644
50%,0.07263,0.060724,0.074543
75%,0.128312,0.124443,0.130926
max,0.428791,0.442324,0.417509


In [None]:
#| export 
def liquidity_vars(df: pd.DataFrame=None,      # If None, downloads (and cleans) only required vars
                        return_metadata: bool=False # If True, just returns the metadata dictionary
                        ) -> pd.DataFrame:

    metadata = {'inputs': {'wrds.compa.clean': ['che','at']},
                'outputs': ['cash2a'],
                'labels': {'cash2a':'Cash holdings to assets'}
    }      
    if return_metadata: return metadata

    reqs = metadata['inputs']['wrds.compa.clean']
    if df is None: df = clean(vars=reqs)
    df = df[reqs].copy()

    df['cash2a'] = df['che'] / df['at']

    df = df.replace([np.inf, -np.inf], np.nan) 
    return df[metadata['outputs']].copy()

In [None]:
#| eval: false
liq = liquidity_vars(df)

In [None]:
#| eval: false
pdm.wins(liq).describe()

count    298095.000000
mean          0.165121
std           0.209391
min           0.000340
25%           0.027438
50%           0.078031
75%           0.212501
max           0.925255
Name: cash2a, dtype: float64

In [None]:
#| export 
def leverage_vars(df: pd.DataFrame=None,      # If None, downloads (and cleans) only required vars
                        return_metadata: bool=False # If True, just returns the metadata dictionary
                        ) -> pd.DataFrame:

    metadata = {'inputs': {'wrds.compa.clean': ['dltt','dlc','at']},
                'outputs': ['booklev'],
                'labels': {'booklev':'Book leverage'}
    }      
    if return_metadata: return metadata

    reqs = metadata['inputs']['wrds.compa.clean']
    if df is None: df = clean(vars=reqs)
    df = df[reqs].copy()

    df['booklev'] = (df['dltt'] + df['dlc']) / df['at']
    df.loc[df.booklev<0, 'booklev'] = 0
    df.loc[df.booklev>1, 'booklev'] = 1
    
    df = df.replace([np.inf, -np.inf], np.nan)         
    return df[metadata['outputs']].copy()

In [None]:
#| eval: false
lev = leverage_vars(df)

In [None]:
#| eval: false
pdm.wins(lev).describe()

count    296551.000000
mean          0.242475
std           0.217108
min           0.000000
25%           0.053153
50%           0.203276
75%           0.373355
max           0.943807
Name: booklev, dtype: float64

In [None]:
#| export 
def payout_vars(df: pd.DataFrame=None,      # If None, downloads (and cleans) only required vars
                        return_metadata: bool=False # If True, just returns the metadata dictionary
                        ) -> pd.DataFrame:

    metadata = {'inputs': {'wrds.compa.clean': ['dvc','prstkc','at']},
                'outputs': ['div2la','rep2la'],
                'labels': {'div2la': 'Dividends to lagged assets',
                           'rep2la': 'Repurchases to lagged assets'}
    }      
    if return_metadata: return metadata

    reqs = metadata['inputs']['wrds.compa.clean']
    if df is None: df = clean(vars=reqs)
    df = df[reqs].copy()

    df['div2la'] = df['dvc'].fillna(0) / pdm.lag(df['at'])
    df['rep2la'] = df['prstkc'].fillna(0) / pdm.lag(df['at'])

    df = df.replace([np.inf, -np.inf], np.nan) 
    return df[metadata['outputs']].copy()

In [None]:
#| eval: false
payout = payout_vars(df)

In [None]:
#| eval: false
pdm.wins(payout).describe()

Unnamed: 0,div2la,rep2la
count,274122.0,274122.0
mean,0.012562,0.00971
std,0.023415,0.029295
min,0.0,0.0
25%,0.0,0.0
50%,0.0,0.0
75%,0.01741,0.002073
max,0.140852,0.189866


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()