# stata helpers

> Helper functions that require Stata to be running (no Jupyter dependence)

In [None]:
#| default_exp stata_helpers
%load_ext autoreload
%autoreload 2

In [None]:
#| hide
from nbdev.showdoc import *
from fastcore.test import test_eq

In [None]:
#| export
import pandas as pd
import numpy as np
from nbstata.config import set_pystata_path
from nbstata.helpers import launch_stata

In [None]:
#| export
set_pystata_path()
import pystata

In [None]:
#| export
def count():
    """Count the number of observations"""
    import sfi
    return sfi.Data.getObsTotal()

In [None]:
#|eval: false
launch_stata(splash=False)
count()

NameError: name 'set_pystata_path' is not defined

In [None]:
#| export
def resolve_macro(macro):
    import sfi
    macro = macro.strip()
    if macro.startswith("`") and macro.endswith("'"):
        macro = sfi.Macro.getLocal(macro[1:-1])
    elif macro.startswith("$_"):
        macro = sfi.Macro.getLocal(macro[2:])
    elif macro.startswith("$"):
        macro = sfi.Macro.getGlobal(macro[1:])
    return macro

In [None]:
#|eval: false
%stata global test = "hello world"
resolve_macro("$test")

In [None]:
#| export
def better_dataframe_from_stata(stfr, var, obs, selectvar, valuelabel, missingval):
    import sfi
    hdl = sfi.Data if stfr is None else sfi.Frame.connect(stfr)

    if hdl.getObsTotal() <= 0:
        return None

    pystata.stata.run("""tempvar indexvar
                         generate `indexvar' = _n""", quietly=True)
    idx_var = sfi.Macro.getLocal('indexvar')

    data = hdl.getAsDict(var, obs, selectvar, valuelabel, missingval)
    if idx_var in data:
        idx = data.pop(idx_var)
    else:
        idx = hdl.getAsDict(idx_var, obs, selectvar, valuelabel, missingval).pop(idx_var)

    idx = pd.array(idx, dtype='Int64')

    pystata.stata.run("drop `indexvar'")

    return pd.DataFrame(data=data, index=idx).convert_dtypes()

In [None]:
#| export
def better_pdataframe_from_data(var=None, obs=None, selectvar=None, valuelabel=False, missingval=np.NaN):
    import sfi
    pystata.config.check_initialized()

    return better_dataframe_from_stata(None, var, obs, selectvar, valuelabel, missingval)

In [None]:
#| export
def better_pdataframe_from_frame(stfr, var=None, obs=None, selectvar=None, valuelabel=False, missingval=np.NaN):
    import sfi
    pystata.config.check_initialized()

    return better_dataframe_from_stata(stfr, var, obs, selectvar, valuelabel, missingval)