# helpers

> Helper functions which require a Stata installation

In [None]:
#| default_exp helpers
%load_ext autoreload
%autoreload 2

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from nbstata.utils import DivertedPrints, break_out_prog_blocks
from nbstata.stata import run_direct, run_as_program, get_local, run_prog_noecho, run_non_prog_noecho
from textwrap import dedent
import functools
import re

In [None]:
#| eval: false
from nbstata.config import launch_stata

## Simple Helpers

In [None]:
#| export
class SelectVar():
    """Class for generating Stata select_var for getAsDict"""
    varname = None
    
    def __init__(self, stata_if_code):
        condition = stata_if_code.replace('if ', '', 1).strip()
        if condition:
            cmd = dedent(f"""\
                tempvar __selectionVar
                generate `__selectionVar' = cond({condition},1,0)""")
            run_direct(cmd, quietly=True)      
            self.varname = get_local("__selectionVar")  

    def clear(self):
        """Remove temporary select_var from Stata dataset"""
        import sfi
        if self.varname:
            sfi.Data.dropVar(self.varname)
            sfi.Macro.setLocal("__selectionVar", "")
            
    def __enter__(self):
        return self.varname
    
    def __exit__(self, exc_type, exc_value, exc_tb):
        self.clear()

`Selectvar.varname` is a temporary Stata variable for use in `sfi.Data.getAsDict`

In [None]:
from fastcore.test import test_eq

In [None]:
#| eval: false
launch_stata(splash=False)
run_direct("""\
set obs 5
gen var1 = _n > 3
""", quietly=True)
with SelectVar(" if var1==0") as sel_varname:
    print(f"varname: {sel_varname}")
    run_direct("list, clean")
    test_eq(bool(get_local("__selectionVar")), True)
run_direct("desc, simple")
test_eq(get_local("__selectionVar"), "")



varname: __000000

       var1   __000000  
  1.      0          1  
  2.      0          1  
  3.      0          1  
  4.      1          0  
  5.      1          0  
var1


In [None]:
#| export
class IndexVar:
    def __enter__(self):
        run_direct("""\
            tempvar indexvar
            generate `indexvar' = _n""", quietly=True)
        self.idx_var = get_local('indexvar')
        return self.idx_var
    
    def __exit__(self, exc_type, exc_value, exc_tb):
        import sfi
        sfi.Data.dropVar(self.idx_var)
        sfi.Macro.setLocal('indexvar', "")

In [None]:
#| eval: False
with IndexVar() as idx_var:
    run_direct("desc, simple")
    test_eq(bool(get_local('indexvar')), True)
run_direct("desc, simple")
test_eq(get_local('indexvar'), "")


var1      __000001
var1


## Divert Stata output to string

Here we want to get output from some Stata commands without changing the Stata environment. Preserving `r()` return values is a particular challenge, in part because the `log on`/`off` commands needed to ensure this output is not logged are themselves r-class. But the input `std_code` may also contain rclass commands. And capturing multi-line Stata output without the commands echoed poses additional challenges `run_as_program`-related challenges with regard to local variables.

To start, we set aside the latter two issues and simply use `run_direct` to run the Stata code. We handle the first issue by running the `log` commands inside an r-class program with [`return add`](https://www.stata.com/help.cgi?return) at the start.

A custom code `runner` may be specified. This may be useful if, for instance, the input `std_code` needs to access the `r()` results.

In [None]:
#| export
def diverted_stata_output(std_code, runner=None):
    if runner is None:
        runner = functools.partial(run_direct, quietly=False, inline=True, echo=False)
    with DivertedPrints() as diverted:
        run_as_program("return add\ncapture log off", prog_def_option_code="rclass")
        try:
            runner(std_code)
        finally:
            run_as_program("return add\ncapture log on", prog_def_option_code="rclass")
        out = diverted.getvalue()
    return out

In [None]:
from textwrap import dedent

In [None]:
#| eval: false
two_lines_of_code = dedent('''\
    disp "test 1"
    disp "test 2"
    ''')
out = diverted_stata_output(two_lines_of_code)

In [None]:
#| eval: false
print(out)


. disp "test 1"
test 1

. disp "test 2"
test 2

. 



In [None]:
#| eval: false
print(diverted_stata_output('disp "test 1"', run_as_program))

test 1



If we know the code we're running is non-program code, we can get a speed improvement by running the `log` code together with the input `std_non_prog_code`.

In [None]:
#| export
def diverted_stata_output_quicker(std_non_prog_code):
    with DivertedPrints() as diverted:
        code = f"return add\ncapture log off\n{std_non_prog_code}\ncapture log on"""
        try:
            run_as_program(code, prog_def_option_code="rclass")
        except SystemError as e:
            run_as_rclass_prog("return add\ncapture log on")
            raise(e)
        out = diverted.getvalue()
    return out

In [None]:
#| eval: false
print(diverted_stata_output_quicker(two_lines_of_code))

test 1
test 2



In [None]:
#| eval: false
import time
tic = time.perf_counter()
diverted_stata_output(two_lines_of_code)
toc = time.perf_counter()
print(f"diverted_stata_output_direct run in {toc - tic:0.4f} seconds")
tic = time.perf_counter()
diverted_stata_output_quicker(two_lines_of_code)
toc = time.perf_counter()
print(f"diverted_stata_output_quicker run in {toc - tic:0.4f} seconds")

diverted_stata_output_direct run in 0.3039 seconds
diverted_stata_output_quicker run in 0.1424 seconds


In [None]:
#| hide
#| eval: false
tic = time.perf_counter()
diverted_stata_output(two_lines_of_code, run_as_program)
toc = time.perf_counter()
print(f"diverted_stata_output(run_as_program) run in {toc - tic:0.4f} seconds")

diverted_stata_output(run_as_program) run in 0.4286 seconds


## Run noecho as if run directly

We now have the tools needed to run generic code without echoing the commands, handling locals as if the code were run directly (rather than inside a program definition). We can use `diverted_stata_output` to pull any previously-defined locals into the program.

In [None]:
#| export
def local_names():
    out = diverted_stata_output("""mata : invtokens(st_dir("local", "macro", "*")')""")
    return out.split()

In [None]:
#| eval: False
run_direct("local test1 = 1 \n local test2 = 2", quietly=True)
test_eq(set(local_names()), {'test1', 'test2'})




In [None]:
#| export
def get_local_dict(_local_names=None):
    if _local_names is None:
        _local_names = local_names()
    return {n: get_local(n) for n in _local_names}

In [None]:
#| eval: False
test_eq(get_local_dict(), {'test1': '1', 'test2': '2'})

In [None]:
#| export
def _locals_code_from_dict(preexisting_local_dict):
    local_defs = (f"""local {name} `"{preexisting_local_dict[name]}"'"""
                  for name in preexisting_local_dict)
    return "\n".join(local_defs)

In [None]:
show_doc(_locals_code_from_dict)

---

[source](https://github.com/hugetim/nbstata/blob/main/nbstata/helpers.py#L93){target="_blank" style="float:right; font-size:smaller"}

### _locals_code_from_dict

>      _locals_code_from_dict (preexisting_local_dict)

In [None]:
#| hide
print(_locals_code_from_dict({'test1': 'blah', 'test2': 'blah blah'}))

local test1 `"blah"'
local test2 `"blah blah"'


In [None]:
#| hide
test_eq(_locals_code_from_dict({'test1': 'blah', 'test2': 'blah blah'}),
        """local test1 `"blah"\'\nlocal test2 `"blah blah"\'""")

In [None]:
#| eval: False
print(_locals_code_from_dict(get_local_dict()))

local test2 `"2"'
local test1 `"1"'


We can use `diverted_stata_output` to artificially transfer any locals created within the program to the outside scope. We will do this by making the program an s-class program and storing any locals present at the end of the program that way. Then these locals can be quietly set after the program run has completed.

In [None]:
#| export
def _run_as_program_w_locals_sreturned(std_code):
    sreturn_code = dedent("""\
        
        mata : st_local("temp_nbstata_all_locals", invtokens(st_dir("local", "macro", "*")'))
        foreach lname in `temp_nbstata_all_locals' {
            sreturn local `lname' "``lname''"
        }
        """)
    store_new_locals_code = ("sreturn clear\n" 
                             + std_code
                             + sreturn_code)                          
    run_as_program(store_new_locals_code, "sclass")

In [None]:
show_doc(_run_as_program_w_locals_sreturned)

---

[source](https://github.com/hugetim/nbstata/blob/main/nbstata/helpers.py#L99){target="_blank" style="float:right; font-size:smaller"}

### _run_as_program_w_locals_sreturned

>      _run_as_program_w_locals_sreturned (std_code)

In [None]:
#| eval: False
_run_as_program_w_locals_sreturned('local test3 "3"')
run_direct("sreturn list")


macros:
              s(test3) : "3"


In [None]:
#| export
#| hide
parse_sreturn = re.compile(
    r'^\s*?(?:\ss\((?P<name>\w+)\) : \"(?P<value>.+)\"\s)', flags=re.MULTILINE
).findall

In [None]:
#| hide
output = """
macros:
              s(test1) : "blah"
              s(test2) : "blah blah"

"""
parse_sreturn(output)

[('test1', 'blah'), ('test2', 'blah blah')]

In [None]:
#| export
def _local_dict_from_sreturn(sreturn_output):
    matches = parse_sreturn(sreturn_output)
    return {m[0]: m[1] for m in matches}

In [None]:
show_doc(_local_dict_from_sreturn)

---

[source](https://github.com/hugetim/nbstata/blob/main/nbstata/helpers.py#L118){target="_blank" style="float:right; font-size:smaller"}

### _local_dict_from_sreturn

>      _local_dict_from_sreturn (sreturn_output)

In [None]:
_local_dict_from_sreturn(output)

{'test1': 'blah', 'test2': 'blah blah'}

In [None]:
#| export
def _after_local_dict():
    sreturn_output = diverted_stata_output_quicker("sreturn list")
    return _local_dict_from_sreturn(sreturn_output)

In [None]:
#| export
def _restore_locals_and_clear_sreturn():
    # run non-prog to avoid clearing locals
    after_local_dict = _after_local_dict()
    after_locals_code = _locals_code_from_dict(after_local_dict)
    if after_local_dict:
        after_locals_code += "\n" + "sreturn clear"
    run_direct(after_locals_code, quietly=True)

In [None]:
#| export
#| hide
pre = (
    r'(cap(t|tu|tur|ture)?'
    r'|qui(e|et|etl|etly)?'
    r'|n(o|oi|ois|oisi|oisil|oisily)?)')
kwargs = {'flags': re.MULTILINE}
local_def_in = re.compile(
    r"(^({0} )*(loc(a|al)?|tempname|tempvar|tempfile|gettoken|token(i|iz|ize)?)\s)|st_local\(".format(pre),
    **kwargs,
).search

In [None]:
#| hide
test_eq(bool(local_def_in("sysuse auto")), False)
test_eq(bool(local_def_in("loc auto=1")), True)
test_eq(bool(local_def_in("qui n cap local auto=1")), True)
test_eq(bool(local_def_in("list local auto")), False)
test_eq(bool(local_def_in("tempfile file1")), True)
test_eq(bool(local_def_in("capture token file1")), True)
test_eq(bool(local_def_in("mata: st_local(test1, 2)")), True)

In [None]:
#| export
def run_as_program_w_locals(std_code, local_dict=None):
    if local_dict is None:
        local_dict = get_local_dict()
    locals_code = _locals_code_from_dict(local_dict)
    if not local_def_in(std_code):
        run_as_program(f"""{locals_code}\n{std_code}""")
    else:
        _run_as_program_w_locals_sreturned(f"""{locals_code}\n{std_code}""")
        _restore_locals_and_clear_sreturn()

In [None]:
#| eval: false
run_direct(dedent("""
    macro drop _all
    local local1 = 1
    local local2 "two"
    local local3 `""3""' 
    """), quietly=True)
run_as_program_w_locals("""disp `"`local1' `local2' `local3'"' """)


1 two "3"


In [None]:
#| eval: false
code = '''\
local test1 "blah blah"
local test2 "blah"
'''
run_as_program_w_locals("""disp `"`local1' `local2' `local3'"' \n""" + code)
test_eq(get_local_dict(), 
        {'test2': 'blah',
         'test1': 'blah blah',
         'local1': '1',
         'local2': 'two',
         'local3': '"3"'})

1 two "3"



## dispatch_run

In [None]:
#| export
def run_noecho(code, sc_delimiter=False, run_as_prog=run_as_program_w_locals):
    """After `break_out_prog_blocks`, run each prog and non-prog block noecho"""
    for block in break_out_prog_blocks(code, sc_delimiter):
        if block['is_prog']:
            run_prog_noecho(block['std_code'])
        else:
            run_non_prog_noecho(block['std_code'], run_as_prog=run_as_prog)

In [None]:
#| eval: false
run_noecho(dedent('''\
    capture program drop ender
    program define ender
        disp "ender output"
    end
    capture program drop display2
    program define display2
        ender
    end
    display2
    '''))



ender output


In [None]:
#| eval: false
run_noecho(dedent("""\
    disp `"`local1' `local2' `local3'"'
    disp `"`local1' `local2' `local3' `test1'"'
    """), run_as_prog=run_as_program_w_locals)

1 two "3"
1 two "3" blah blah


In [None]:
#| eval: false
code = """\
local local1 "foo"
local local2 "bar"
local abcd "foo bar"
"""
run_noecho(code, run_as_prog=run_as_program_w_locals)
run_noecho(dedent("""\
    disp `"`local1' `local2' `local3'"'
    disp `"`local1' `local2' `local3' `test1'"'
    """), run_as_prog=run_as_program_w_locals)


foo bar "3"
foo bar "3" blah blah


In [None]:
#| export
def run_simple(code, quietly=False, echo=False, sc_delimiter=False):
    if sc_delimiter:
        code = "#delimit;\n" + code
    run_direct(code, quietly=quietly, inline=not quietly, echo=echo)

In [None]:
#| eval: false
run_simple(dedent('''\
    capture program drop ender
    program define ender
        disp "ender output"
    end
    capture program drop display2
    program define display2
        ender
    end
    display2
    '''), quietly=True)




In [None]:
#| export
def dispatch_run(code, quietly=False, echo=False, sc_delimiter=False, noecho=False, run_as_prog=run_as_program_w_locals):
    if noecho and not quietly:
        run_noecho(code, sc_delimiter, run_as_prog=run_as_prog)
    else:
        run_simple(code, quietly, echo, sc_delimiter)   

In [None]:
#| eval: false
dispatch_run(dedent('''\
    capture program drop ender
    program define ender
        disp "ender output"
    end
    capture program drop display2
    program define display2
        ender
    end
    display2
    '''), quietly=True)




In [None]:
#| eval: false
dispatch_run(dedent('''\
    capture program drop ender
    program define ender
        disp "ender output"
    end
    capture program drop display2
    program define display2
        ender
    end
    display2
    '''), noecho=True)



ender output


## Output for kernel.do_inspect()

In [None]:
#| export
def get_inspect(code="", cursor_pos=0, detail_level=0, omit_sections=()):
    runner = functools.partial(run_as_program, prog_def_option_code="rclass")
    inspect_code = """\
        return list
        ereturn list
        return add
        display "*** Last updated `c(current_time)' `c(current_date)' ***"
        describe, fullnames
        """
    raw_output = diverted_stata_output(inspect_code, runner=runner)
    desc_start = raw_output.find('*** Last updated ')
    return raw_output[desc_start:] + raw_output[:desc_start]

In [None]:
#| eval: false
run_noecho('sum \nreturn list')


    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
        var1 |          5          .4    .5477226          0          1

scalars:
                  r(N) =  5
              r(sum_w) =  5
               r(mean) =  .4
                r(Var) =  .3
                 r(sd) =  .5477225575051662
                r(min) =  0
                r(max) =  1
                r(sum) =  2


In [None]:
#| hide
#| eval: false
run_noecho('return list')


scalars:
                  r(N) =  5
              r(sum_w) =  5
               r(mean) =  .4
                r(Var) =  .3
                 r(sd) =  .5477225575051662
                r(min) =  0
                r(max) =  1
                r(sum) =  2


In [None]:
#| eval: false
print(get_inspect())
print("--")
run_noecho('return list')

*** Last updated 17:23:36 11 Jan 2023 ***

Contains data
 Observations:             5                  
    Variables:             1                  
-------------------------------------------------------------------------------
Variable      Storage   Display    Value
    name         type    format    label      Variable label
-------------------------------------------------------------------------------
var1            float   %9.0g                 
-------------------------------------------------------------------------------
Sorted by: 
     Note: Dataset has changed since last saved.

scalars:
                r(sum) =  2
                r(max) =  1
                r(min) =  0
                 r(sd) =  .5477225575051662
                r(Var) =  .3
               r(mean) =  .4
              r(sum_w) =  5
                  r(N) =  5

--

scalars:
                r(sum) =  2
                r(max) =  1
                r(min) =  0
                 r(sd) =  .5477225575051662
     

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()