# helpers

> Helper functions that require Stata to be running (but no Jupyter dependence)

`sfi` is [Stata's python API](https://www.stata.com/python/api17/index.html), originally intended for interacting with Stata from python *within Stata*. As such, it can only be imported with Stata running.
`pystata.stata.run` [enables running Stata code from python](https://www.stata.com/python/pystata/stata.html#pystata.stata.run).

In [None]:
#| default_exp helpers
%load_ext autoreload
%autoreload 2

In [None]:
#| hide
from nbdev.showdoc import *
from fastcore.test import test_eq

In [None]:
#| export
from nbstata.config import launch_stata
from nbstata.parsers import break_out_prog_blocks
import pandas as pd
import numpy as np

In [None]:
#| export
def count():
    """Count the number of observations"""
    import sfi
    return sfi.Data.getObsTotal()

In [None]:
#| eval: false
launch_stata(splash=False)
count()

0

`pystata.stata.run` echos multiple commands even when the `echo` parameter is set to `False`.

In [None]:
#| eval: false
from pystata.stata import run
run('disp "test 1"', echo=False)

test 1


In [None]:
#| eval: false
two_lines_of_code = '''disp "test 1"
disp "test 2"'''
run(two_lines_of_code, echo=False)


. disp "test 1"
test 1

. disp "test 2"
test 2

. 


As a workaround when echo is not desired, we can run multiple commands as a Stata program.

In [None]:
#| export
def _run_as_program(clean_non_prog_code):
    from pystata.stata import run
    _program_name = "temp_nbstata_kernel_program_name"
    _program_define_code = f"program {_program_name}\n{clean_non_prog_code}\nend\n"
    run(_program_define_code, quietly=True)
    run(_program_name, quietly=False, inline=True, echo=False)
    run(f"program drop {_program_name}", quietly=True)

In [None]:
#| eval: false
_run_as_program(two_lines_of_code)


test 1
test 2


This has the unwanted side-effect, though, of a initial blank line. When there is only one command to run, we can avoid that blank line by running it directly.

In [None]:
#| export
def run_non_prog_noecho(clean_non_prog_code):
    from pystata.stata import run
    if len(clean_non_prog_code.splitlines()) == 1:  # to avoid outputting extra blank lines
        run(clean_non_prog_code, quietly=False, inline=True, echo=False)
    else:
        _run_as_program(clean_non_prog_code)

In [None]:
#| eval: false
run_non_prog_noecho('disp "test 1"')

test 1


Stata programs (`program`, `mata` or `python`) cannot be run as a program, however. Instead, we will just run them directly, quietly to prevent echo, except for the case of mata programs, in which case `quietly` would block the output.

In [None]:
#| export
def run_prog_noecho(clean_prog_code):
    from pystata.stata import run
    if clean_prog_code.splitlines()[0] in ['mata', 'mata:']:  # b/c 'quietly' blocks all mata output
        run(clean_prog_code, quietly=False, inline=True, echo=False)
    else:
        run(clean_prog_code, quietly=True, inline=True, echo=False)

In [None]:
#| eval: false
_run_as_program("capture program drop display1")
prog_block_code = """program define display1
    disp "display1 output"
end
"""
run_prog_noecho(prog_block_code)
run("display1", echo=False)



display1 output


In [None]:
#| eval: false
python_block_code = """python:
print("hello")
end
"""
run_prog_noecho(python_block_code)

hello



In [None]:
#| eval: false
mata_block_code = """mata:
display("hello")
end
"""
run_prog_noecho(mata_block_code)


. mata:
------------------------------------------------- mata (type end to exit) -----
: display("hello")
hello

: end
-------------------------------------------------------------------------------

. 


In [None]:
#| export
def run_noecho(code):
    """
    Split code into program and non-program blocks, running each block noecho
    """
    for block in break_out_prog_blocks(code):
        if block['is_prog']:
            run_prog_noecho(block['std_code'])
        else:
            run_non_prog_noecho(block['std_code'])

In [None]:
#| eval: false
run_noecho('''
capture program drop ender
program define ender
    disp "ender output"
end
capture program drop display2
program define display2
    ender
end
display2
''')



ender output


In [None]:
#| export
def resolve_macro(macro):
    import sfi
    macro = macro.strip()
    if macro.startswith("`") and macro.endswith("'"):
        macro = sfi.Macro.getLocal(macro[1:-1])
    elif macro.startswith("$_"):
        macro = sfi.Macro.getLocal(macro[2:])
    elif macro.startswith("$"):
        macro = sfi.Macro.getGlobal(macro[1:])
    return macro

In [None]:
#| eval: false
import pystata
pystata.stata.run('global test = "hello world"')
resolve_macro("$test")

'hello world'

[getAsDict example](https://blog.stata.com/2020/11/05/stata-python-integration-part-8-using-the-stata-function-interface-to-copy-data-from-stata-to-python/)

In [None]:
#| export
def better_dataframe_from_stata(stfr, var, obs, selectvar, valuelabel, missingval):
    import sfi, pystata
    hdl = sfi.Data if stfr is None else sfi.Frame.connect(stfr)

    if hdl.getObsTotal() <= 0:
        return None

    pystata.stata.run("""tempvar indexvar
                         generate `indexvar' = _n""", quietly=True)
    idx_var = sfi.Macro.getLocal('indexvar')

    data = hdl.getAsDict(var, obs, selectvar, valuelabel, missingval)
    if idx_var in data:
        idx = data.pop(idx_var)
    else:
        idx = hdl.getAsDict(idx_var, obs, selectvar, valuelabel, missingval).pop(idx_var)

    idx = pd.array(idx, dtype='Int64')

    pystata.stata.run("drop `indexvar'")

    return pd.DataFrame(data=data, index=idx).convert_dtypes()

In [None]:
#| export
def better_pdataframe_from_data(var=None, obs=None, selectvar=None, valuelabel=False, missingval=np.NaN):
    import pystata
    pystata.config.check_initialized()

    return better_dataframe_from_stata(None, var, obs, selectvar, valuelabel, missingval)

In [None]:
#| export
def better_pdataframe_from_frame(stfr, var=None, obs=None, selectvar=None, valuelabel=False, missingval=np.NaN):
    import pystata
    pystata.config.check_initialized()

    return better_dataframe_from_stata(stfr, var, obs, selectvar, valuelabel, missingval)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()