# stata_more

> Helper functions that expand on `pystata`/`sfi` functionality

In [None]:
#| default_exp stata_more
%load_ext autoreload
%autoreload 2

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from nbstata.misc_utils import DivertedPrints
from nbstata.stata import run_direct, get_local, set_local, drop_var
from textwrap import dedent
import functools

In [None]:
#| eval: false
from nbstata.config import launch_stata

## Simple Helpers

In [None]:
#| export
class SelectVar():
    """Class for generating Stata select_var for getAsDict"""
    varname = None
    
    def __init__(self, stata_if_code):
        condition = stata_if_code.replace('if ', '', 1).strip()
        if condition:
            cmd = dedent(f"""\
                tempvar __selectionVar
                generate `__selectionVar' = cond({condition},1,0)""")
            run_direct(cmd, quietly=True)      
            self.varname = get_local("__selectionVar")  

    def clear(self):
        """Remove temporary select_var from Stata dataset"""
        if self.varname:
            drop_var(self.varname)
            set_local("__selectionVar", "")
            
    def __enter__(self):
        return self.varname
    
    def __exit__(self, exc_type, exc_value, exc_tb):
        self.clear()

`Selectvar.varname` is a temporary Stata variable for use in `sfi.Data.getAsDict`

In [None]:
from fastcore.test import test_eq, ExceptionExpected

In [None]:
#| eval: false
launch_stata(splash=False)
run_direct("""\
set obs 5
gen var1 = _n > 3
""", quietly=True)
with SelectVar(" if var1==0") as sel_varname:
    print(f"varname: {sel_varname}")
    run_direct("list, clean")
    test_eq(bool(get_local("__selectionVar")), True)
run_direct("desc, simple")
test_eq(get_local("__selectionVar"), "")

varname: __000000

       var1   __000000  
  1.      0          1  
  2.      0          1  
  3.      0          1  
  4.      1          0  
  5.      1          0  
var1


In [None]:
#| export
class IndexVar:
    def __enter__(self):
        run_direct("""\
            tempvar indexvar
            generate `indexvar' = _n""", quietly=True)
        self.idx_var = get_local('indexvar')
        return self.idx_var
    
    def __exit__(self, exc_type, exc_value, exc_tb):
        drop_var(self.idx_var)
        set_local('indexvar', "")

In [None]:
#| eval: False
with IndexVar() as idx_var:
    run_direct("desc, simple")
    test_eq(bool(get_local('indexvar')), True)
run_direct("desc, simple")
test_eq(get_local('indexvar'), "")

var1      __000001
var1


## Run commands as a Stata program

The original motivation for adding this functionality is that `pystata.stata.run` can only suppress the "echo" of single commands, not multi-line Stata code:

In [None]:
#| eval: false
run_direct('disp "test 1"', echo=False)

test 1


In [None]:
#| eval: false
two_lines_of_code = dedent('''\
    disp "test 1"
    disp "test 2"
    ''')
run_direct(two_lines_of_code, echo=False)


. disp "test 1"
test 1

. disp "test 2"
test 2

. 


As a workaround when echo is not desired, we can run multiple commands as a Stata program:

In [None]:
#| eval: false
run_direct(f"program temp_nbstata_program_name\n{two_lines_of_code}\nend\n", quietly=True)

In [None]:
#| eval: false
run_direct("temp_nbstata_program_name", quietly=False, inline=True, echo=False)

test 1
test 2


In [None]:
#| eval: false
run_direct(f"program drop temp_nbstata_program_name", quietly=True)

(Note: This and the following two functions assume input Stata code standardized by `standardize_code`, which will be ensured by the `break_out_prog_blocks` within the ultimate `dispatch_run` wrapper function.)

In [None]:
#| export
def run_as_program(std_non_prog_code, prog_def_option_code=""):
    _program_name = "temp_nbstata_program_name"
    _options = f", {prog_def_option_code}" if prog_def_option_code else ""
    _program_define_code = (
        f"program {_program_name}{_options}\n"
        f"{std_non_prog_code}\n"
        "end\n"
    )
    try:
        run_direct(_program_define_code, quietly=True)
        run_direct(_program_name, quietly=False, inline=True, echo=False)
    finally:
        run_direct(f"program drop {_program_name}", quietly=True)

In [None]:
#| eval: false
run_as_program(two_lines_of_code)

test 1
test 2


In [None]:
#| hide
#| eval: false
run_as_program(two_lines_of_code, "sclass")

test 1
test 2


Not all code can be run within a program without modification, however: 
1. Programs cannot be defined within another program, nor can python or mata blocks be run.
2. A program definition is a different scope for locals, so:
    * the program code does not have access to locals defined previously, and
    * locals set within the program code do not persist outside of it.

In [None]:
#| eval: false
with ExceptionExpected(SystemError):
    run_as_program("""\
        program define prog1
            disp 1
        end
        """)

In [None]:
#| eval: false
run_direct("local test1 = 1")
run_direct("disp `test1'")

1


In [None]:
#| eval: false
run_as_program("""\
    disp `test1'
    local test2 = 2""")




In [None]:
#| eval: false
run_direct("""\
    disp `test1'
    disp `test2' """)


.     disp `test1'
1

.     disp `test2' 


. 


## Divert Stata output to string

The goal here is to get output from some Stata commands without changing the Stata environment. Preserving `r()` return values requires special treatment because the `log on`/`off` commands needed to ensure this output is not logged are themselves r-class. But the input `std_code` may also contain rclass commands. And capturing multi-line Stata output without the commands being echoed poses additional `run_as_program`-related challenges with regard to local variables.

To start, we set aside the latter two issues and simply use `run_direct` to run the Stata code. We handle the first issue by running the `log` commands inside an r-class program with [`return add`](https://www.stata.com/help.cgi?return) at the start.

A custom code `runner` may be specified. This may be useful if, for instance, the input `std_code` needs to access the `r()` results.

In [None]:
#| export
def diverted_stata_output(std_code, runner=None):
    if runner is None:
        runner = functools.partial(run_direct, quietly=False, inline=True, echo=False)
    with DivertedPrints() as diverted:
        run_as_program("return add\ncapture log off", prog_def_option_code="rclass")
        try:
            runner(std_code)
        finally:
            run_as_program("return add\ncapture log on", prog_def_option_code="rclass")
        out = diverted.getvalue()
    return out

In [None]:
from textwrap import dedent

In [None]:
#| eval: false
two_lines_of_code = dedent('''\
    disp "test 1"
    disp "test 2"
    ''')
out = diverted_stata_output(two_lines_of_code)

In [None]:
#| eval: false
print(out)


. disp "test 1"
test 1

. disp "test 2"
test 2

. 



In [None]:
#| eval: false
print(diverted_stata_output('disp "test 1"', run_as_program))

test 1



If we know the code we're running is non-program code, we can get a speed improvement by running the `log` code together with the input `std_non_prog_code`.

In [None]:
#| export
def diverted_stata_output_quicker(std_non_prog_code):
    with DivertedPrints() as diverted:
        code = f"return add\ncapture log off\n{std_non_prog_code}\ncapture log on"""
        try:
            run_as_program(code, prog_def_option_code="rclass")
        except SystemError as e:
            run_as_rclass_prog("return add\ncapture log on")
            raise(e)
        out = diverted.getvalue()
    return out

In [None]:
#| eval: false
print(diverted_stata_output_quicker(two_lines_of_code))

test 1
test 2



In [None]:
#| eval: false
import time
tic = time.perf_counter()
diverted_stata_output(two_lines_of_code)
toc = time.perf_counter()
print(f"diverted_stata_output_direct run in {toc - tic:0.4f} seconds")
tic = time.perf_counter()
diverted_stata_output_quicker(two_lines_of_code)
toc = time.perf_counter()
print(f"diverted_stata_output_quicker run in {toc - tic:0.4f} seconds")

diverted_stata_output_direct run in 0.3325 seconds
diverted_stata_output_quicker run in 0.1559 seconds


In [None]:
#| hide
#| eval: false
tic = time.perf_counter()
diverted_stata_output(two_lines_of_code, run_as_program)
toc = time.perf_counter()
print(f"diverted_stata_output(run_as_program) run in {toc - tic:0.4f} seconds")

diverted_stata_output(run_as_program) run in 0.4741 seconds


## varlist utility

Takes a [Stata `varlist`](https://www.stata.com/help.cgi?varlist) and returns a list of full variable names, as required by [sfi.Data.getAsDict()](https://www.stata.com/python/api16/Data.html#sfi.Data.getAsDict). Frames (`strf`) not yet properly supported.

In [None]:
#| export
def var_from_varlist(varlist, stfr=None):
    if stfr:
        var_code = varlist.strip()
    else:
        _program_name = "temp_nbstata_varlist_name"
        run_direct(f"""\
            program define {_program_name}
                syntax [varlist(default=none)]
                disp "`varlist'"
            end
            """, quietly=True)
        try:
            var_code = diverted_stata_output_quicker(f"""\
                {_program_name} {varlist}
                program drop {_program_name}
                """).strip()
        except Exception as e:
            run_direct(f"capture program drop {_program_name}", quietly=True)
            raise(e)
    return [c.strip() for c in var_code.split() if c] if var_code else None

In [None]:
#| hide
#| eval: False
run_as_program("sum\nreturn list")


    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
        var1 |          5          .4    .5477226          0          1

scalars:
                  r(N) =  5
              r(sum_w) =  5
               r(mean) =  .4
                r(Var) =  .3
                 r(sd) =  .5477225575051662
                r(min) =  0
                r(max) =  1
                r(sum) =  2


In [None]:
#| hide
#| eval: False
run_direct("return list")


scalars:
                  r(N) =  5
              r(sum_w) =  5
               r(mean) =  .4
                r(Var) =  .3
                 r(sd) =  .5477225575051662
                r(min) =  0
                r(max) =  1
                r(sum) =  2


In [None]:
#| eval: False
print(var_from_varlist("v", None))

['var1']


In [None]:
#| hide
#| eval: False
run_direct("return list")


scalars:
                r(sum) =  2
                r(max) =  1
                r(min) =  0
                 r(sd) =  .5477225575051662
                r(Var) =  .3
               r(mean) =  .4
              r(sum_w) =  5
                  r(N) =  5


## Get local macro info

https://www.statalist.org/forums/forum/general-stata-discussion/general/1457792-how-to-list-all-locals-and-store-them-in-a-macro

In [None]:
#| export
def local_names():
    run_direct("""\
        mata : st_local("temp_nbstata_all_locals", invtokens(st_dir("local", "macro", "*")'))
        """, quietly=True)
    out = get_local('temp_nbstata_all_locals')
    set_local('temp_nbstata_all_locals', "")
    return out.split()

In [None]:
#| hide
def _local_names_old():
    out = diverted_stata_output("""mata : invtokens(st_dir("local", "macro", "*")')""")
#     run_direct("""\
#         mata : st_local("temp_nbstata_all_locals", invtokens(st_dir("local", "macro", "*")'))
#         """, quietly=True)
#     out = get_local('temp_nbstata_all_locals')
#     set_local('temp_nbstata_all_locals', "")
    return out.split()

In [None]:
#| hide
#| eval: False
import time
tic = time.perf_counter()
local_names()
toc = time.perf_counter()
print(f"new run in {toc - tic:0.4f} seconds")
tic = time.perf_counter()
_local_names_old()
toc = time.perf_counter()
print(f"old run in {toc - tic:0.4f} seconds")

new run in 0.0374 seconds
old run in 0.3268 seconds


In [None]:
#| eval: False
run_direct("local test1 = 1 \n local test2 = 2", quietly=True)
test_eq(set(local_names()), {'test1', 'test2'})

In [None]:
#| export
def get_local_dict(_local_names=None):
    if _local_names is None:
        _local_names = local_names()
    return {n: get_local(n) for n in _local_names}

In [None]:
#| eval: False
test_eq(get_local_dict(), {'test1': '1', 'test2': '2'})

In [None]:
#| export
def locals_code_from_dict(preexisting_local_dict):
    local_defs = (f"""local {name} `"{preexisting_local_dict[name]}"'"""
                  for name in preexisting_local_dict)
    return "\n".join(local_defs)

In [None]:
#| hide
print(locals_code_from_dict({'test1': 'blah', 'test2': 'blah blah'}))

local test1 `"blah"'
local test2 `"blah blah"'


In [None]:
#| hide
test_eq(locals_code_from_dict({'test1': 'blah', 'test2': 'blah blah'}),
        """local test1 `"blah"\'\nlocal test2 `"blah blah"\'""")

In [None]:
#| eval: False
print(locals_code_from_dict(get_local_dict()))

local test2 `"2"'
local test1 `"1"'


## Output for kernel.do_inspect()

In [None]:
#| export
def get_inspect(code="", cursor_pos=0, detail_level=0, omit_sections=()):
    runner = functools.partial(run_as_program, prog_def_option_code="rclass")
    inspect_code = """\
        return list
        ereturn list
        return add
        display "*** Last updated `c(current_time)' `c(current_date)' ***"
        describe, fullnames
        """
    raw_output = diverted_stata_output(inspect_code, runner=runner)
    desc_start = raw_output.find('*** Last updated ')
    return raw_output[desc_start:] + raw_output[:desc_start]

In [None]:
#| eval: false
run_direct('sum')


    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
        var1 |          5          .4    .5477226          0          1


In [None]:
#| hide
#| eval: false
run_direct('return list')


scalars:
                  r(N) =  5
              r(sum_w) =  5
               r(mean) =  .4
                r(Var) =  .3
                 r(sd) =  .5477225575051662
                r(min) =  0
                r(max) =  1
                r(sum) =  2


The `r()` `return list` values remain unchanged, except that the display order is reversed for some reason--there seems to be no way to avoid that.

In [None]:
#| eval: false
print(get_inspect())
print("--")
run_direct('return list')

*** Last updated 10:26:01 13 Jan 2023 ***

Contains data
 Observations:             5                  
    Variables:             1                  
-------------------------------------------------------------------------------
Variable      Storage   Display    Value
    name         type    format    label      Variable label
-------------------------------------------------------------------------------
var1            float   %9.0g                 
-------------------------------------------------------------------------------
Sorted by: 
     Note: Dataset has changed since last saved.

scalars:
                r(sum) =  2
                r(max) =  1
                r(min) =  0
                 r(sd) =  .5477225575051662
                r(Var) =  .3
               r(mean) =  .4
              r(sum_w) =  5
                  r(N) =  5

--

scalars:
                r(sum) =  2
                r(max) =  1
                r(min) =  0
                 r(sd) =  .5477225575051662
     

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()