# core

> Functions that are not specific to individual data sources

These functions are available directly from the finsets namespace. For example to use the `features_metadata` function below, use 

```python
from finsets import features_metadata
```


In [None]:
#| default_exp core

In [None]:
#| export
from __future__ import annotations
from typing import Literal
from importlib import import_module
from inspect import signature

import pandas as pd
from  thefuzz import process, fuzz

from fastcore.script import call_parse

from finsets import RESOURCES

In [None]:
#| exports 
METADATA_FILE = RESOURCES/'all_metadata.pkl'

In [None]:
#| export
def features_metadata(submodules: list=['wrds', 'papers'] # list of submodules to collect metadata from
                      ) -> pd.DataFrame:
    "Go through `submodules` of `finsets` and collect metadata from all functions that have `return_metadata` parameter"
    
    df = pd.DataFrame(columns=['name','label','output_of','inputs','inputs_generated_by'])
    for name in submodules:
        module = import_module(f'finsets.{name}')
        for sub in dir(module):
            if sub.startswith('_'): continue
            submodule = import_module(f'finsets.{name}.{sub}')
            for func_name in submodule.__all__:
                func = getattr(submodule, func_name)
                if callable(func):
                    try: 
                        params = signature(func).parameters
                    except:
                        continue
                    if 'return_metadata' in params: 
                        meta = func(return_metadata=True)
                        for var_name in meta['outputs']:
                            for input_name in meta['inputs']:    
                                new_meta = pd.DataFrame({'name':var_name, 
                                                'label':meta['labels'][var_name], 
                                                'output_of':f'{name}.{sub}.{func_name}', 
                                                'inputs':','.join(meta['inputs'][input_name]),
                                                'inputs_generated_by':input_name}, index=[0])
                                df = pd.concat([df,new_meta],ignore_index=True)
    return df

In [None]:
features_metadata()

Unnamed: 0,name,label,output_of,inputs,inputs_generated_by
0,bookeq,Book equity,wrds.compa.book_equity,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb",wrds.compa.clean
1,shreq,Shareholder equity,wrds.compa.book_equity,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb",wrds.compa.clean
2,pref_stock,Preferred stock,wrds.compa.book_equity,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb",wrds.compa.clean
3,tobinq,Tobin Q,wrds.compa.tobin_q,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb,pr...",wrds.compa.clean
4,equityiss_tot,Equity issuance,wrds.compa.issuance_vars,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb,ss...",wrds.compa.clean
5,equityiss_cfs,Equity issuance,wrds.compa.issuance_vars,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb,ss...",wrds.compa.clean
6,debtiss_tot,Debt issuance,wrds.compa.issuance_vars,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb,ss...",wrds.compa.clean
7,debtiss_cfs,Debt issuance,wrds.compa.issuance_vars,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb,ss...",wrds.compa.clean
8,debtiss_bs,Debt issuance,wrds.compa.issuance_vars,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb,ss...",wrds.compa.clean
9,ppentpch,Pct change in net PPE,wrds.compa.investment_vars,"ppent,capx,at",wrds.compa.clean


In [None]:
#| export
def raw_metadata(submodules=['wrds', 'papers'] # list of submodules to collect metadata from
                ) -> pd.DataFrame:
    "Go through `submodules` of `finsets` and collect metadata from `raw_metadata` functions (if present)"

    df = pd.DataFrame(columns=['name','label','output_of','type'])
    for name in submodules:
        module = import_module(f'finsets.{name}')
        for sub in dir(module):
            if sub.startswith('_'): continue
            submodule = import_module(f'finsets.{name}.{sub}')
            if 'raw_metadata' in submodule.__all__:
                df = pd.concat([df,submodule.raw_metadata()],ignore_index=True)
    return df

In [None]:
raw_metadata()

Loading library list...
Done
Approximately 881742 rows in comp.funda.
Loading library list...
Done
Approximately 4922867 rows in crsp.msf.
Approximately 111623 rows in crsp.msenames.
Loading library list...
Done
Approximately 2750800 rows in wrdsapps_finratio_ibes.firm_ratio_ibes.


Unnamed: 0,name,label,output_of,type,nr_rows,LIBRARY,TABLE,wrds_library,wrds_table,group
0,gvkey,Global Company Key,wrds.compa.download,VARCHAR(6),881742,comp,funda,,,
1,datadate,,wrds.compa.download,DATE,881742,comp,funda,,,
2,fyear,Data Year - Fiscal,wrds.compa.download,DOUBLE_PRECISION,881742,comp,funda,,,
3,indfmt,,wrds.compa.download,VARCHAR(12),881742,comp,funda,,,
4,consol,,wrds.compa.download,VARCHAR(2),881742,comp,funda,,,
...,...,...,...,...,...,...,...,...,...,...
1085,ffi48,,wrds.ratios.download,DOUBLE_PRECISION,2750800,,,wrdsapps_finratio_ibes,firm_ratio_ibes,
1086,ffi49_desc,,wrds.ratios.download,VARCHAR(5),2750800,,,wrdsapps_finratio_ibes,firm_ratio_ibes,
1087,ffi49,,wrds.ratios.download,DOUBLE_PRECISION,2750800,,,wrdsapps_finratio_ibes,firm_ratio_ibes,
1088,ticker,EXCHANGE TICKER SYMBOL - HISTORICAL,wrds.ratios.download,VARCHAR(8),2750800,,,wrdsapps_finratio_ibes,firm_ratio_ibes,ID


In [None]:
#| export
def all_metadata(submodules=['wrds', 'papers'] # list of submodules to collect metadata from
                ) -> pd.DataFrame:
    "Collects `raw_metadata` and `features_metadata` from `submodules` of `finsets`"

    df =  pd.concat([features_metadata(submodules), raw_metadata(submodules)], ignore_index=True)
    df.to_pickle(METADATA_FILE)
    return df 

In [None]:
meta = all_metadata()

Loading library list...
Done
Approximately 881742 rows in comp.funda.
Loading library list...
Done
Approximately 4922867 rows in crsp.msf.
Approximately 111623 rows in crsp.msenames.
Loading library list...
Done
Approximately 2750800 rows in wrdsapps_finratio_ibes.firm_ratio_ibes.


In [None]:
#| export
@call_parse
def search(query: str,              # What to search for 
           meta: str='all',   #"all", "features", or "raw"; specifies the function that fetches the metadata you want to search through
           field: str='label', # Which column in the metadata table you want to search through. Use "name" to search variable names.  
           limit: int=10,           # How many results to display                      
           ) -> pd.DataFrame:
    "Search for `query` in metadata returned by f`{meta_func}_metadata`; return `limit` number of results"

    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 100)

    try:   
        #import finsets
        metadata = pd.read_pickle(METADATA_FILE)  #getattr(finsets, f'{meta}_metadata')()
        results = process.extractBests(query, metadata[field].dropna().astype('string'), 
                                        scorer = fuzz.token_sort_ratio,
                                        limit=limit)
        rows = [x[2] for x in results]
        scores = [x[1] for x in results]
        df = metadata.iloc[rows]
        df.index = scores
        df.index.name="SCORE"
        df.columns = df.columns.str.upper()
    finally:
        pd.reset_option('display.max_columns')
        pd.reset_option('display.width')        
    return df
       

In [None]:
meta.label.astype('string')

0                                Book equity
1                         Shareholder equity
2                            Preferred stock
3                                    Tobin Q
4                            Equity issuance
                        ...                 
1104                                    <NA>
1105                                    <NA>
1106                                    <NA>
1107    EXCHANGE TICKER SYMBOL - HISTORICAL 
1108          CUSIP IDENTIFIER - HISTORICAL 
Name: label, Length: 1109, dtype: string

In [None]:
search('tobins q')

Unnamed: 0_level_0,NAME,LABEL,OUTPUT_OF,INPUTS,INPUTS_GENERATED_BY,TYPE,NR_ROWS,LIBRARY,TABLE,WRDS_LIBRARY,WRDS_TABLE,GROUP
SCORE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
93,tobinq,Tobin Q,wrds.compa.tobin_q,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb,pr...",wrds.compa.clean,,,,,,,
50,aqc,Acquisitions,wrds.compa.download,,,DOUBLE_PRECISION,881742.0,comp,funda,,,
45,acqlntal,Acquired Loans,wrds.compa.download,,,DOUBLE_PRECISION,881742.0,comp,funda,,,
43,optgr,Options - Granted,wrds.compa.download,,,DOUBLE_PRECISION,881742.0,comp,funda,,,
43,pvon,Provisions - Other (Net),wrds.compa.download,,,DOUBLE_PRECISION,881742.0,comp,funda,,,
42,bid,Closing Bid,wrds.crspm.download,,,DOUBLE_PRECISION,4922867.0,,,crsp,msf,
42,ask,Closing Ask,wrds.crspm.download,,,DOUBLE_PRECISION,4922867.0,,,crsp,msf,
40,optca,Options - Cancelled (-),wrds.compa.download,,,DOUBLE_PRECISION,881742.0,comp,funda,,,
40,optexd,Options - Exercised (-),wrds.compa.download,,,DOUBLE_PRECISION,881742.0,comp,funda,,,
40,wdp,Writedowns Pretax,wrds.compa.download,,,DOUBLE_PRECISION,881742.0,comp,funda,,,


In [None]:
search('total assets', meta = 'raw', limit=5)

Unnamed: 0_level_0,NAME,LABEL,OUTPUT_OF,TYPE
SCORE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
100,at,Assets - Total,wrds.compa.clean,double
75,act,Current Assets - Total,wrds.compa.clean,double
75,ao,Assets - Other,wrds.compa.clean,double
71,batr,Benefits Assumed - Total,wrds.compa.clean,double
69,dptb,Deposits - Total - Banks,wrds.compa.clean,double


In [None]:
search('cash flows', meta = 'features')

Unnamed: 0_level_0,NAME,LABEL,OUTPUT_OF,INPUTS,INPUTS_GENERATED_BY
SCORE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
54,cflow2la_is,Cash flows to lagged assets,wrds.compa.cashflow_vars,"dtdate,oancf,ib,dp,at",wrds.compa.clean
54,cflow2la_cfs,Cash flows to lagged assets,wrds.compa.cashflow_vars,"dtdate,oancf,ib,dp,at",wrds.compa.clean
54,cflow2la_full,Cash flows to lagged assets,wrds.compa.cashflow_vars,"dtdate,oancf,ib,dp,at",wrds.compa.clean
42,cash2a,Cash holdings to assets,wrds.compa.liquidity_vars,"che,at",wrds.compa.clean
32,capx2la,CAPX to lagged assets,wrds.compa.investment_vars,"ppent,capx,at",wrds.compa.clean
31,roa,Return on assets,wrds.compa.profitability_vars,"ib,at",wrds.compa.clean
28,div2la,Dividends to lagged assets,wrds.compa.payout_vars,"dvc,prstkc,at",wrds.compa.clean
26,rep2la,Repurchases to lagged assets,wrds.compa.payout_vars,"dvc,prstkc,at",wrds.compa.clean
24,tobinq,Tobin Q,wrds.compa.tobin_q,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb,pr...",wrds.compa.clean
21,shreq,Shareholder equity,wrds.compa.book_equity,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb",wrds.compa.clean


In [None]:
search('txditc', field='name', limit=5)

Unnamed: 0_level_0,NAME,LABEL,OUTPUT_OF,INPUTS,INPUTS_GENERATED_BY,TYPE
SCORE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
100,txditc,Deferred Taxes and Investment Tax Credit,wrds.compa.clean,,,double
80,txdi,Income Taxes - Deferred,wrds.compa.clean,,,double
80,txdc,Deferred Taxes (Cash Flow),wrds.compa.clean,,,double
67,tic,Ticker Symbol,wrds.compa.clean,,,string
67,txdbca,Deferred Tax Asset - Current,wrds.compa.clean,,,double


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()