# core

> Functions that are not specific to individual data sources

These functions are available directly from the finsets namespace. For example to use the `features_metadata` function below, use 

```python
from finsets import features_metadata
```


In [None]:
#| default_exp core

In [None]:
#| export
from importlib import import_module
from inspect import signature
import pandas as pd
from  thefuzz import process

In [None]:
#| export
def features_metadata(submodules: list=['wrds', 'papers'] # list of submodules to collect metadata from
                      ) -> pd.DataFrame:
    "Go through `submodules` of `finsets` and collect metadata from all functions that have `return_metadata` parameter"
    
    df = pd.DataFrame(columns=['name','label','output_of','inputs','inputs_generated_by'])
    for name in submodules:
        module = import_module(f'finsets.{name}')
        for sub in dir(module):
            if sub.startswith('_'): continue
            submodule = import_module(f'finsets.{name}.{sub}')
            for func_name in submodule.__all__:
                func = getattr(submodule, func_name)
                if callable(func):
                    try: 
                        params = signature(func).parameters
                    except:
                        continue
                    if 'return_metadata' in signature(func).parameters: 
                        meta = func(return_metadata=True)
                        for var_name in meta['outputs']:
                            for input_name in meta['inputs']:    
                                new_meta = pd.DataFrame({'name':var_name, 
                                                'label':meta['labels'][var_name], 
                                                'output_of':f'{name}.{sub}.{func_name}', 
                                                'inputs':','.join(meta['inputs'][input_name]),
                                                'inputs_generated_by':input_name}, index=[0])
                                df = pd.concat([df,new_meta],ignore_index=True)
    return df

In [None]:
features_metadata()

Unnamed: 0,name,label,output_of,inputs,inputs_generated_by
0,bookeq,Book equity,wrds.compa.book_equity,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb",wrds.compa.clean
1,shreq,Shareholder equity,wrds.compa.book_equity,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb",wrds.compa.clean
2,pref_stock,Preferred stock,wrds.compa.book_equity,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb",wrds.compa.clean
3,tobinq,Tobin Q,wrds.compa.tobin_q,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb,pr...",wrds.compa.clean
4,equityiss_tot,Equity issuance,wrds.compa.issuance_vars,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb,ss...",wrds.compa.clean
5,equityiss_cfs,Equity issuance,wrds.compa.issuance_vars,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb,ss...",wrds.compa.clean
6,debtiss_tot,Debt issuance,wrds.compa.issuance_vars,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb,ss...",wrds.compa.clean
7,debtiss_cfs,Debt issuance,wrds.compa.issuance_vars,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb,ss...",wrds.compa.clean
8,debtiss_bs,Debt issuance,wrds.compa.issuance_vars,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb,ss...",wrds.compa.clean
9,ppentpch,Pct change in net PPE,wrds.compa.investment_vars,"ppent,capx,at",wrds.compa.clean


In [None]:
#| export
def raw_metadata(submodules=['wrds', 'papers'] # list of submodules to collect metadata from
                ) -> pd.DataFrame:
    "Go through `submodules` of `finsets` and collect metadata from `raw_metadata` functions (if present)"

    df = pd.DataFrame(columns=['name','label','output_of','type'])
    for name in submodules:
        module = import_module(f'finsets.{name}')
        for sub in dir(module):
            if sub.startswith('_'): continue
            submodule = import_module(f'finsets.{name}.{sub}')
            if 'raw_metadata' in submodule.__all__:
                df = pd.concat([df,submodule.raw_metadata()],ignore_index=True)
    return df

In [None]:
raw_metadata()

Unnamed: 0,name,label,output_of,type
0,gvkey,Global Company Key,wrds.compa.clean,string
1,conm,Company Name,wrds.compa.clean,string
2,tic,Ticker Symbol,wrds.compa.clean,string
3,cusip,CUSIP,wrds.compa.clean,string
4,cik,CIK Number,wrds.compa.clean,string
...,...,...,...,...
1031,vwretd,Value-Weighted Return (includes distributions),wrds.crspm.clean,double
1032,vwretx,Value-Weighted Return (excluding dividends),wrds.crspm.clean,double
1033,ewretd,Equal-Weighted Return (includes distributions),wrds.crspm.clean,double
1034,ewretx,Equal-Weighted Return (excluding dividends),wrds.crspm.clean,double


In [None]:
#| export
def all_metadata(submodules=['wrds', 'papers'] # list of submodules to collect metadata from
                ) -> pd.DataFrame:
    "Collects `raw_metadata` and `features_metadata` from `submodules` of `finsets`"

    return pd.concat([features_metadata(submodules), raw_metadata(submodules)], ignore_index=True)

In [None]:
meta = all_metadata()
meta

Unnamed: 0,name,label,output_of,inputs,inputs_generated_by,type
0,bookeq,Book equity,wrds.compa.book_equity,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb",wrds.compa.clean,
1,shreq,Shareholder equity,wrds.compa.book_equity,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb",wrds.compa.clean,
2,pref_stock,Preferred stock,wrds.compa.book_equity,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb",wrds.compa.clean,
3,tobinq,Tobin Q,wrds.compa.tobin_q,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb,pr...",wrds.compa.clean,
4,equityiss_tot,Equity issuance,wrds.compa.issuance_vars,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb,ss...",wrds.compa.clean,
...,...,...,...,...,...,...
1050,vwretd,Value-Weighted Return (includes distributions),wrds.crspm.clean,,,double
1051,vwretx,Value-Weighted Return (excluding dividends),wrds.crspm.clean,,,double
1052,ewretd,Equal-Weighted Return (includes distributions),wrds.crspm.clean,,,double
1053,ewretx,Equal-Weighted Return (excluding dividends),wrds.crspm.clean,,,double


In [None]:
#| export
def search(label: str,
           meta_func: callable=all_metadata,
           limit: int=10,
           score_cutoff: int=85
           ) -> pd.DataFrame:
    "Search for `label` in metadata returned by `meta_func` and return `limit` number of results with `score_cutoff`"

    metadata = meta_func()
    results = process.extractBests(label, metadata['label'], limit=limit, score_cutoff=score_cutoff)
    rows = [x[2] for x in results]
    return metadata.iloc[rows]   

In [None]:
search('tobins q')

Unnamed: 0,name,label,output_of,inputs,inputs_generated_by,type
3,tobinq,Tobin Q,wrds.compa.tobin_q,"at,lt,seq,ceq,txditc,pstk,pstkrv,pstkl,itcb,pr...",wrds.compa.clean,


In [None]:
search('total asset')

Unnamed: 0,name,label,output_of,inputs,inputs_generated_by,type
110,at,Assets - Total,wrds.compa.clean,,,double
88,aco,Current Assets Other Total,wrds.compa.clean,,,double
92,acoxar,Current Assets - Other - Total As Reported,wrds.compa.clean,,,double
93,act,Current Assets - Total,wrds.compa.clean,,,double
109,artfs,Accounts Receivable/Debtors - Total,wrds.compa.clean,,,double
116,ceq,Common/Ordinary Equity - Total,wrds.compa.clean,,,double
134,clt,Contingent Liabilities - Total,wrds.compa.clean,,,double
156,dlc,Debt in Current Liabilities - Total,wrds.compa.clean,,,double
160,dltt,Long-Term Debt - Total,wrds.compa.clean,,,double
176,dptb,Deposits - Total - Banks,wrds.compa.clean,,,double


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()