# helpers

> Helper functions that require Stata to be running (but no Jupyter dependence)

`sfi` is [Stata's python API](https://www.stata.com/python/api17/index.html), originally intended for interacting with Stata from python *within Stata*. As such, it can only be imported with Stata running.
`pystata.stata.run` [enables running Stata code from python](https://www.stata.com/python/pystata/stata.html#pystata.stata.run).

In [None]:
#| default_exp helpers
%load_ext autoreload
%autoreload 2

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from nbstata.config import launch_stata
from nbstata.utils import break_out_prog_blocks, HiddenPrints
import sys
from io import StringIO
from textwrap import dedent
import pandas as pd
import numpy as np

## Simple helpers

In [None]:
#| export
def count():
    """Count the number of observations"""
    import sfi
    return sfi.Data.getObsTotal()

In [None]:
#| eval: false
launch_stata(splash=False)
count()

0

In [None]:
#| export
def resolve_macro(macro):
    import sfi
    macro = macro.strip()
    if macro.startswith("`") and macro.endswith("'"):
        macro = sfi.Macro.getLocal(macro[1:-1])
    elif macro.startswith("$_"):
        macro = sfi.Macro.getLocal(macro[2:])
    elif macro.startswith("$"):
        macro = sfi.Macro.getGlobal(macro[1:])
    return macro

In [None]:
#| eval: false
from pystata.stata import run
run('global test = "hello world"')
resolve_macro("$test")

'hello world'

In [None]:
#| export
class Selectvar():
    """Class for generating Stata selectvar for getAsDict"""
    varname = None
    
    def __init__(self, stata_if_code):
        import sfi, pystata
        condition = stata_if_code.replace('if ', '', 1).strip()
        if condition:
            cmd = f"tempvar __selectionVar\ngenerate `__selectionVar' = cond({condition},1,0)"
            pystata.stata.run(cmd, quietly=True)      
            self.varname = sfi.Macro.getLocal("__selectionVar")  

    def clear(self):
        """Remove temporary selectvar from Stata dataset"""
        import pystata
        if self.varname != None:
            pystata.stata.run(f"capture drop {self.varname}", quietly=True)  

`Selectvar.varname` is a temporary Stata variable for use in `sfi.Data.getAsDict`

In [None]:
#| eval: false
run("""\
set obs 5
gen var1 = _n > 3
""", quietly=True)
sel_var = Selectvar(" if var1==0")
print(f"sel_var.varname: {sel_var.varname}")
run("list, clean", echo=False)



sel_var.varname: __000000

       var1   __000000  
  1.      0          1  
  2.      0          1  
  3.      0          1  
  4.      1          0  
  5.      1          0  


In [None]:
show_doc(Selectvar.clear)

---

[source](https://github.com/hugetim/nbstata/blob/main/nbstata/helpers.py#L48){target="_blank" style="float:right; font-size:smaller"}

### Selectvar.clear

>      Selectvar.clear ()

Remove temporary selectvar from Stata dataset

In [None]:
#| eval: false
sel_var.clear()
run("desc, simple")

var1


## Run multi-line Stata commands no-echo

`pystata.stata.run` can only suppress the "echo" of single commands, not multi-line Stata code:

In [None]:
#| eval: false
run('disp "test 1"', echo=False)

test 1


In [None]:
#| eval: false
two_lines_of_code = dedent('''\
    disp "test 1"
    disp "test 2"
    ''')
run(two_lines_of_code, echo=False)


. disp "test 1"
test 1

. disp "test 2"
test 2

. 


As a workaround when echo is not desired, we can run multiple commands as a Stata program:

In [None]:
#| eval: false
run(f"program temp_nbstata_program_name\n{two_lines_of_code}\nend\n", quietly=True)




In [None]:
#| eval: false
run("temp_nbstata_program_name", quietly=False, inline=True, echo=False)

test 1
test 2


In [None]:
#| eval: false
run(f"program drop temp_nbstata_program_name", quietly=True)

The first `run` (defining the temp. program) prints an unwanted blank line despite setting "quietly" to True, so we block its printed output in the function version.

(Note: This and the following two functions assume input Stata code standardized by `standardize_code`, which will be ensured by the `break_out_prog_blocks` within the final `run_noecho` wrapper function below.)

In [None]:
#| export
def run_as_program(std_non_prog_code):
    from pystata.stata import run
    _program_name = "temp_nbstata_program_name"
    _program_define_code = f"program {_program_name}\n{std_non_prog_code}\nend\n"
    with HiddenPrints():
        run(_program_define_code, quietly=True)
    try:
        run(_program_name, quietly=False, inline=True, echo=False)
    finally:
        run(f"program drop {_program_name}", quietly=True)

In [None]:
#| eval: false
run_as_program(two_lines_of_code)

test 1
test 2


To test resilience to Stata errors, let's intentionally run error-causing Stata code to ensure the error stays isolated to one code cell:

In [None]:
from fastcore.test import ExceptionExpected

In [None]:
#| eval: false
with ExceptionExpected(SystemError):
    run_as_program("error-causing code")

In [None]:
#| eval: false
run_as_program(two_lines_of_code)

test 1
test 2


In [None]:
#| export
def run_non_prog_noecho(std_non_prog_code, run_as_prog=run_as_program):
    from pystata.stata import run
    if len(std_non_prog_code.splitlines()) == 1:  # to keep it simple when we can
        run(std_non_prog_code, quietly=False, inline=True, echo=False)
    else:
        run_as_prog(std_non_prog_code)

In [None]:
#| eval: false
run_non_prog_noecho('disp "test 1"')

test 1


In [None]:
#| export
def run_prog_noecho(std_prog_code):
    from pystata.stata import run
    if std_prog_code.splitlines()[0] in {'mata', 'mata:'}:  # b/c 'quietly' blocks mata output
        run(std_prog_code, quietly=False, inline=True, echo=False)
    else:
        run(std_prog_code, quietly=True, inline=True, echo=False)

Other programs (that is, Stata's `program define`, as well as [mata](https://www.stata.com/manuals/m-1first.pdf) or [python](https://www.stata.com/stata-news/news35-3/python-blogs/) blocks) cannot be defined/run within a Stata program, however. Instead, we will just run them directly, quietly to prevent echo, except for the case of mata programs, in which case `quietly` would block the output.

In [None]:
#| eval: false
#| hide
run_as_program("capture program drop display1")

In [None]:
#| eval: false
prog_block_code = dedent("""\
    program define display1
        disp "display1 output"
    end
    """)
run_prog_noecho(prog_block_code)
run("display1", echo=False)


display1 output


In [None]:
#| eval: false
python_block_code = dedent("""\
    python:
    print("hello")
    end
    """)
run_prog_noecho(python_block_code)

hello



In [None]:
#| eval: false
mata_block_code = dedent("""\
    mata:
    display("hello")
    end
    """)
run_prog_noecho(mata_block_code)


. mata:
------------------------------------------------- mata (type end to exit) -----
: display("hello")
hello

: end
-------------------------------------------------------------------------------

. 


In [None]:
#| export
def run_noecho(code, starting_delimiter=None, run_as_prog=run_as_program):
    """After `break_out_prog_blocks`, run each prog and non-prog block noecho"""
    for block in break_out_prog_blocks(code, starting_delimiter):
        if block['is_prog']:
            run_prog_noecho(block['std_code'])
        else:
            run_non_prog_noecho(block['std_code'], run_as_prog=run_as_prog)

In [None]:
#| eval: false
run_noecho(dedent('''\
    capture program drop ender
    program define ender
        disp "ender output"
    end
    capture program drop display2
    program define display2
        ender
    end
    display2
    '''))



ender output


## Divert Stata output to string

In [None]:
#| export
def diverted_stata_output(std_code, noecho=True):
    import pystata
    old_stdout = sys.stdout
    diverted = StringIO()
    sys.stdout = diverted
    if noecho:
        code = f"capture log off\n{std_code}\ncapture log on"""
        run_noecho(code) # multi-line code run as a program, which clears locals
    else:
        pystata.stata.run("capture log off", quietly=True)
        code = f"{std_code}\ncapture log on"""
        pystata.stata.run(code, quietly=False, inline=True, echo=False)
    sys.stdout = old_stdout
    out = diverted.getvalue()
    return out #.replace("\n> ", "")

In [None]:
#| eval: false
print(diverted_stata_output(two_lines_of_code))

test 1
test 2



In [None]:
#| eval: false
print(diverted_stata_output(two_lines_of_code, noecho=False))


. disp "test 1"
test 1

. disp "test 2"
test 2

. 
. capture log on

. 



In [None]:
#| eval: false
import time
tic = time.perf_counter()
diverted_stata_output(two_lines_of_code)
toc = time.perf_counter()
print(f"Noecho diverted run in {toc - tic:0.4f} seconds")
tic = time.perf_counter()
diverted_stata_output(two_lines_of_code, noecho=False)
toc = time.perf_counter()
print(f"Echo diverted run in {toc - tic:0.4f} seconds")

Noecho diverted run in 0.1510 seconds
Echo diverted run in 0.0934 seconds


In [None]:
#| hide
#| eval: false
tic = time.perf_counter()
diverted_stata_output("disp 1")
toc = time.perf_counter()
print(f"Noecho diverted run in {toc - tic:0.4f} seconds")

Noecho diverted run in 0.1339 seconds


## Stata-to-pandas

Reference Stata docs: [getAsDict example](https://blog.stata.com/2020/11/05/stata-python-integration-part-8-using-the-stata-function-interface-to-copy-data-from-stata-to-python/)

In [None]:
#| export
def better_dataframe_from_stata(stfr, var, obs, selectvar, valuelabel, missingval):
    import sfi, pystata
    hdl = sfi.Data if stfr is None else sfi.Frame.connect(stfr)

    if hdl.getObsTotal() <= 0:
        return None

    pystata.stata.run("""tempvar indexvar
                         generate `indexvar' = _n""", quietly=True)
    idx_var = sfi.Macro.getLocal('indexvar')

    data = hdl.getAsDict(var, obs, selectvar, valuelabel, missingval)
    if idx_var in data:
        idx = data.pop(idx_var)
    else:
        idx = hdl.getAsDict(idx_var, obs, selectvar, valuelabel, missingval).pop(idx_var)

    idx = pd.array(idx, dtype='Int64')

    pystata.stata.run("drop `indexvar'", quietly=True)

    return pd.DataFrame(data=data, index=idx).convert_dtypes()

In [None]:
#| export
def better_pdataframe_from_data(var=None, obs=None, selectvar=None, valuelabel=False, missingval=np.NaN):
    import pystata
    pystata.config.check_initialized()

    return better_dataframe_from_stata(None, var, obs, selectvar, valuelabel, missingval)

In [None]:
#| export
def better_pdataframe_from_frame(stfr, var=None, obs=None, selectvar=None, valuelabel=False, missingval=np.NaN):
    import pystata
    pystata.config.check_initialized()

    return better_dataframe_from_stata(stfr, var, obs, selectvar, valuelabel, missingval)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()