# completions

> Autocomplete functionality

Adapted from https://github.com/kylebarron/stata_kernel/blob/master/stata_kernel/completions.py, limited for now to variables, globals, locals, scalars, matrices, and file names.

In [None]:
#| default_exp completions
%load_ext autoreload
%autoreload 2

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from nbstata.helpers import run_noecho
from fastcore.basics import patch_to
from textwrap import dedent
from enum import IntEnum
import sys
from io import StringIO
import os
import re
import platform

In [None]:
#| export
class CompletionsManager():
    def __init__(self): #, kernel):
#         self.kernel = kernel

        # Path completion
        self.path_search = re.compile(
            r'^(?P<fluff>.*")(?P<path>[^"]*)\Z').search

#         # Magic completion
#         self.magic_completion = re.compile(
#             r'\A%(?P<magic>\S*)\Z', flags=re.DOTALL + re.MULTILINE).match

#         self.set_magic_completion = re.compile(
#             r'\A%set (?P<setting>\S*)\Z', flags=re.DOTALL + re.MULTILINE).match

        self.matchall = re.compile(
            r"\A.*?"
            r"%varlist%(?P<varlist>.*?)"
            r"%globals%(?P<globals>.*?)"
            r"%scalars%(?P<scalars>.*?)"
            r"%matrices%(?P<matrices>.*?)(\Z|---+\s*end)",
            flags=re.DOTALL + re.MULTILINE).match


#         # Match output from mata mata desc
#         self.matadesc = re.compile(
#             r"(\A.*?---+|---+[\r\n]*\Z)", flags=re.MULTILINE + re.DOTALL)

#         self.matalist = re.compile(
#             r"(?:.*?)\s(\S+)\s*$", flags=re.MULTILINE + re.DOTALL)

#         self.mataclean = re.compile(r"\W.*?(\b|$)")
#         self.matasearch = re.compile(r"(?P<kw>\w.*?(?=\W|\b|$))").search

#         self.matainline = re.compile(r"^m(ata)?\b").search

#         self.matacontext = re.compile(
#             r'(^|\s+)(?P<st>_?st_)'
#             r'(?P<context>\S+?)\('
#             r'(?P<quote>[^\)]*?")'
#             r'(?P<pre>[^\)]*?)\Z', flags=re.MULTILINE + re.DOTALL).search

        # Varlist-style matching; applies to most
        self.varlist = re.compile(r"(?:\s+)(\S+)", flags=re.MULTILINE)

        # file-style matching
        self.filelist = re.compile(r"[\r\n]{1,2}", flags=re.MULTILINE)

        # Clean line-breaks.
        self.varclean = re.compile(
            r"(?=\s*)[\r\n]{1,2}?^>\s", flags=re.MULTILINE).sub

        # Macth context; this is used to determine if the line starts
        # with matrix or scalar. It also matches constructs like
        #
        #     (`=)?scalar(

        pre = (
            r'(cap(t|tu|tur|ture)?'
            r'|qui(e|et|etl|etly)?'
            r'|n(o|oi|ois|oisi|oisil|oisily)?)')
        kwargs = {'flags': re.MULTILINE}
        self.context = {
            'function':
                re.compile(
                    r"(\s+|(?P<equals>\=))(?P<context>\S+?)"
                    r"\([^\)\s]*?\Z", **kwargs).search,
            'lfunction':
                re.compile(
                    r"\s(?P<fluff>.*?)`\=(?P<context>\S+?)"
                    r"\([^\)\s]*?\Z", **kwargs).search,
            'line':
                re.compile(
                    r"^\s*({0}\s+)*(?P<context>\S+)".format(pre),
                    **kwargs).search,
            'delimit_line':
                re.compile(
                    r"\A\s*({0}\s+)*(?P<context>\S+)".format(pre),
                    **kwargs).search}

        self.refresh()
#         self.suggestions = self.get_suggestions(kernel)
#         self.suggestions['magics'] = kernel.magics.available_magics
#         self.suggestions['magics_set'] = config.all_settings

    def refresh(self):
        self.suggestions = self.get_suggestions()
#         self.suggestions['magics'] = kernel.magics.available_magics
#         self.suggestions['magics_set'] = config.all_settings
#         self.globals = self.get_globals(kernel)

To take the place of `self.quickdo('_StataKernelCompletions', kernel)`:

In [None]:
#| export
def variable_names():
    from sfi import Data
    return [Data.getVarName(i) for i in range(Data.getVarCount())]

In [None]:
#| eval: false
from nbstata.config import launch_stata

In [None]:
#| eval: false
launch_stata(splash=False)
run_noecho("gen var1 = 1")
variable_names()

['var1']

In [None]:
#| export
def diverted_stata_output(code):
    import pystata
    pystata.stata.run("capture log off", quietly=True)
    old_stdout = sys.stdout
    diverted = StringIO()
    sys.stdout = diverted
    run_noecho(code)
    sys.stdout = old_stdout
    out = diverted.getvalue()
    pystata.stata.run("capture log on", quietly=True)
    return out #.replace("\n> ", "")

In [None]:
#| export
@patch_to(CompletionsManager)
def _completions(self):
#     return dedent(f"""\
#     %varlist%
#     {' '.join(variable_names())}
#     %globals%
#     {' '.join(global_names())}
#     """
    return diverted_stata_output(dedent("""\
        set more off
        set trace off
        syntax [varlist]
        disp "%varlist%"
        disp `"`varlist'"'
        disp "%globals%"
        disp `"`:all globals'"'
        * NOTE: This only works for globals; locals are, well, local ):
        * disp "%locals%"
        * mata : invtokens(st_dir("local", "macro", "*")')
        disp "%scalars%"
        disp `"`:all scalars'"'
        disp "%matrices%"
        disp `"`:all matrices'"'
    """))

In [None]:
show_doc(CompletionsManager._completions)

---

[source](https://github.com/hugetim/nbstata/blob/main/nbstata/completions.py#L129){target="_blank" style="float:right; font-size:smaller"}

### CompletionsManager._completions

>      CompletionsManager._completions ()

In [None]:
#| export
@patch_to(CompletionsManager)
def get_suggestions(self):
    match = self.matchall(self._completions())
    if match:
        suggestions = match.groupdict()
#         suggestions['mata'] = self._parse_mata_desc(suggestions['mata'])
#         suggestions['programs'] = self._parse_programs_desc(
#             suggestions['programs'])
        for k, v in suggestions.items():
#             if k in ['mata', 'programs']:
#                 continue
#             elif k in ['logfiles']:
#                 suggestions[k] = [
#                     f for f in self.filelist.split(v.strip()) if f]
#             else:
            suggestions[k] = self.varlist.findall(self.varclean('', v))

        all_locals = """mata : invtokens(st_dir("local", "macro", "*")')"""
        res = '\r\n'.join(
            re.split(r'[\r\n]{1,2}', diverted_stata_output(all_locals)))
        if res.strip():
            suggestions['locals'] = self.varlist.findall(
                self.varclean('', res))
        else:
            suggestions['locals'] = []
    else:
        suggestions = {
            'varlist': [],
            'scalars': [],
            'matrices': [],
#             'logfiles': [],
            'globals': [],
#             'programs': [],
            'locals': [],
        }

    return suggestions

In [None]:
#| eval: false
# kernel1 = Mock()
test_instance = CompletionsManager() #kernel1)
# print(test_instance._completions())
test_instance.suggestions

{'varlist': ['var1'],
 'globals': ['S_level',
  'F1',
  'F2',
  'F7',
  'F8',
  'S_ADO',
  'S_StataMP',
  'S_StataSE',
  'S_CONSOLE',
  'S_FLAVOR',
  'S_OS',
  'S_OSDTL',
  'S_MACH'],
 'scalars': [],
 'matrices': [],
 'locals': []}

In [None]:
#| export
@patch_to(CompletionsManager)
def get_file_paths(self, chunk):
    """Get file paths based on chunk
    Args:
        chunk (str): chunk of text after last space. Doesn't include string
            punctuation characters
    Returns:
        (List[str]): folders and files at that location
    """
    from sfi import SFIToolkit
    # If local exists, return empty list
    if re.search(r'[`\']', chunk):
        return []

    # Define directory separator
    dir_sep = '/'
    if platform.system() == 'Windows':
        if '/' not in chunk:
            dir_sep = '\\'

    # Get directory without ending file, and without / or \
    if any(x in chunk for x in ['/', '\\']):
        ind = max(chunk.rfind('/'), chunk.rfind('\\'))
        user_folder = chunk[:ind + 1]
        user_starts = chunk[ind + 1:]

        # Replace multiple consecutive / with a single /
        user_folder = re.sub(r'/+', '/', user_folder)
        user_folder = re.sub(r'\\+', r'\\', user_folder)

    else:
        user_folder = ''
        user_starts = chunk

    # Replace globals with their values
    globals_re = r'\$\{?((?![0-9_])\w{1,32})\}?'
    try:
        folder = re.sub(
            globals_re, lambda x: self.globals[x.group(1)], user_folder)
    except KeyError:
        # If the global doesn't exist in self.globals (aka it hasn't been
        # defined in the Stata environment yet), then there are no paths to
        # check
        return []

    # Use Stata's relative path
    abspath = re.search(r'^([/~]|[a-zA-Z]:)', folder)
    if not abspath:
        folder = SFIToolkit.getWorkingDir() + '/' + folder

    try:
        top_dir, dirs, files = next(os.walk(os.path.expanduser(folder)))
        results = [x + dir_sep for x in dirs] + files
        results = [
            user_folder + x for x in results if not x.startswith('.')
            and re.match(re.escape(user_starts), x, re.I)]

    except StopIteration:
        results = []

    return sorted(results)

In [None]:
#| eval: false
test_instance.get_file_paths("0")

['00_config.ipynb',
 '01_utils.ipynb',
 '02_helpers.ipynb',
 '03_magics.ipynb',
 '04_completions.ipynb',
 '05_kernel.ipynb']

In [None]:
#| export
class Env(IntEnum):
    GENERAL = 0    # varlist and/or file path
    LOCAL = 1      # `x* completed with `x*'
    GLOBAL = 2     # $x* completed with $x*
    GLOBAL_R = 3   # ${x* completed with ${x*}
    SCALAR = 4     # scalar .* x* completed with x*
    SCALAR_R = 5   # scalar(x* completed with scalar(x*)
    MATRIX = 6     # matrix .* x* completed with x*
    SCALAR_VAR = 7 # scalars and varlist, scalar .* = x* completed with x*
    MATRIX_VAR = 8 # matrices and varlist, matrix .* = x* completed with x*
    MATA = 9       # inline or in mata environment

In [None]:
#| export
@patch_to(CompletionsManager)
def get_env(self, code, r2chars, sc_delimit_mode, mata_mode=False):
    """Returns completions environment
    Args:
        code (str): Right-truncated to cursor position
        r2chars (str): The two characters immediately after code.
            Will be used to accurately determine rcomp.
        sc_delimit_mode (bool): Whether #delimit ; is on.
        mata_mode (bool): Whether mata is on
    Returns:
        env (int):
            -2: %set magic, %set x*
            -1: magics, %x*
            0: varlist, program names, and/or file path
            1: locals, `x* completed with `x*'
            2: globals, $x* completed with $x*
            3: globals, ${x* completed with ${x*}
            4: scalars, scalar .* x* completed with x*
            5: scalars, scalar(x* completed with scalar(x*)
            6: matrices, matrix .* x* completed with x*
            7: scalars and varlist, scalar .* = x* completed with x*
            8: matrices and varlist, matrix .* = x* completed with x*
            9: mata, inline or in mata environment
        pos (int):
            Where the completions start. This is set to the start
            of the word to be completed.
        code (str):
            Word to match.
        rcomp (str):
            How to finish the completion. Blank by default.
                locals: '
                globals (if start with ${): }
                scalars: )
                scalars (if start with `): )'
    """

    lcode = code.lstrip()
#     if self.magic_completion(lcode):
#         pos = code.rfind("%") + 1
#         env = -1
#         rcomp = ""
#         return env, pos, code[pos:], rcomp
#     elif self.set_magic_completion(lcode):
#         pos = max(code.rfind(' '), code.rfind('"')) + 1
#         env = -2
#         rcomp = ""
#         return env, pos, code[pos:], rcomp

    # Detect space-delimited word.
    env = Env.GENERAL
    search = re.search(r'(?<![`$"{/])\b\w+\Z', code, flags=re.MULTILINE)
    searchpos = -1 if search is None else search.start() - 1
    pos = max(code.rfind(' '), code.rfind('"'), searchpos)
    rcomp = ''
    if pos >= 0:
        pos += 1

        if mata_mode:
            env = Env.MATA
        else:
            # Figure out if current statement is a matrix or scalar
            # statement. If so, will add them to completions list.
            if sc_delimit_mode:
                linecontext = self.context['delimit_line'](code)
            else:
                linecontext = self.context['line'](code)

            if linecontext:
                context = linecontext.groupdict()['context']
                equals = (code.find('=') > 0)
                if re.match(r'^sca(lar|la|l)?$', context.strip()):
                    env = Env.SCALAR_VAR if equals else Env.SCALAR
                elif re.match(r'^mat(rix|ri|r)?$', context.strip()):
                    env = Env.MATRIX_VAR if equals else Env.MATRIX
#                 elif self.matainline(context.strip()):
#                     env = 9

            # Constructs of the form scalar(x<tab> will be filled only
            # with scalars. This can be preceded by = or `=
            if env is Env.GENERAL:
                lfuncontext = self.context['lfunction'](code)
                if lfuncontext:
                    lfunction = lfuncontext.groupdict()['context']
                    fluff = lfuncontext.groupdict()['fluff']
                    lfluff = 0 if not fluff else len(fluff)
                    if lfunction == 'scalar':
                        env = Env.SCALAR_R
                        pos += len(lfunction) + 3 + lfluff
                        if r2chars == ")'":
                            rcomp = ""
                        elif r2chars[0:1] == ")":
                            rcomp = ""
                        elif r2chars[0:1] == "'":
                            rcomp = ")"
                        else:
                            rcomp = ")'"
                else:
                    funcontext = self.context['function'](code)
                    if funcontext:
                        function = funcontext.groupdict()['context']
                        extra = 2 if funcontext.groupdict()['equals'] else 1
                        if function == 'scalar':
                            env = Env.SCALAR_R
                            pos += len(function) + extra
                            rcomp = "" if r2chars[0:1] == ")" else ")"
    else:
        pos = 0
        if mata_mode:
            env = Env.MATA

    # Figure out if this is a local or global; env = 0 (default)
    # will suggest variables in memory.
    chunk = code[pos:]
    lfind = chunk.rfind('`')
    gfind = chunk.rfind('$')
    path_chars = any(x in chunk for x in ['/', '\\', '~'])
    chunk_quoted = chunk[lfind:].startswith('`"')

    if lfind >= 0 and (lfind > gfind) and not chunk_quoted:
        pos += lfind + 1
        env = Env.LOCAL
        rcomp = "" if r2chars[0:1] == "'" else "'"
    elif gfind >= 0 and not path_chars:
        bfind = chunk.rfind('{')
        if bfind >= 0 and (bfind > gfind):
            pos += bfind + 1
            env = Env.GLOBAL_R
            rcomp = "" if r2chars[0:1] == "}" else "}"
        else:
            env = Env.GLOBAL
            pos += gfind + 1
    elif chunk.startswith('"'):
        pos += 1
    elif chunk.startswith('`"'):
        pos += 2
    else:
        # Set to matrix or scalar environment, if applicable. Note
        # that matrices and scalars can be set to variable values,
        # so varlist is still a valid completion in a matrix or
        # scalar context.
        pass

#     if env == 9:
#         matacontext = self.matacontext(code)
#         if matacontext:
#             st, context, quote, pre = matacontext.groupdict().values()
#             varlist = [
#                 'data', 'sdata', 'store', 'sstore', 'view', 'sview',
#                 'varindex', 'varrename', 'vartype', 'isnumvar', 'isstrvar',
#                 'vartype', 'varformat', 'varlabel', 'varvaluelabel',
#                 'dropvar', 'keepvar']
#             _globals = ['global', 'global_hcat']
#             _locals = ['local']
#             scalars = ['numscalar', 'strscalar', 'numscalar_hcat']
#             matrices = [
#                 'matrix', 'matrix_hcat', 'matrixrowstripe',
#                 'matrixcolstripe', 'replacematrix']

#             posextra = 0
#             # if st:
#             #     posextra += len(st)
#             # if context:
#             #     posextra += len(context)
#             # if quote:
#             #     posextra += len(quote) + 1

#             if context in varlist:
#                 env = 0
#             elif context in _globals:
#                 env = 2
#                 rcomp = ''
#             elif context in _locals:
#                 env = 1
#                 rcomp = ''
#             elif context in scalars:
#                 env = 4
#                 rcomp = ''
#             elif context in matrices:
#                 env = 6
#                 rcomp = ''
#             else:
#                 posextra = 0

#             pos += posextra

    closing_symbol = True #config.get('autocomplete_closing_symbol', 'False')
#     closing_symbol = closing_symbol.lower() == 'true'
    if not closing_symbol:
        rcomp = ''

    return env, pos, code[pos:], rcomp

In [None]:
#| export

relevant_suggestion_keys = {
    Env.GENERAL: ['varlist'],
    Env.LOCAL: ['locals'],
    Env.GLOBAL: ['globals'],
    Env.GLOBAL_R: ['globals'],
    Env.SCALAR: ['scalars'],
    Env.SCALAR_R: ['scalars'],
    Env.MATRIX: ['matrices'],
    Env.SCALAR_VAR: ['scalars', 'varlist'],
    Env.MATRIX_VAR: ['matrices', 'varlist'],
}

@patch_to(CompletionsManager)
def get(self, starts, env, rcomp):
    """Return environment-aware completions list."""
    relevant_suggestions = [var + rcomp 
                            for key in relevant_suggestion_keys[env]
                            for var in self.suggestions[key]
                            if var.startswith(starts)]
    if env is Env.GENERAL:
        relevant_suggestions += self.get_file_paths(starts)
    return relevant_suggestions

#     elif env == 9:
#         if len(starts) > 1:
#             builtins = [
#                 var for var in mata_builtins if var.startswith(starts)]
#         else:
#             builtins = []

#         if re.search(r'[/\\]', starts):
#             paths = self.get_file_paths(starts)
#         else:
#             paths = []

#         return [
#             var for var in self.suggestions['mata']
#             if var.startswith(starts)] + builtins + paths

In [None]:
#| eval: False
from fastcore.test import test_eq
from pystata import stata

In [None]:
#| eval: False
def completions_test_setup(code):
    global test_instance
    stata.run("clear all", quietly=True)
    run_noecho(code)
    test_instance.refresh()
    
    
def _complete(code, cursor_pos):
    env, pos, chunk, rcomp = test_instance.get_env(
        code[:cursor_pos], code[cursor_pos:(cursor_pos + 2)],
        False)
    return test_instance.get(chunk, env, rcomp)

In [None]:
#| eval: False
completions_test_setup("gen var1 = 1")
code = "list va"
cursor_pos = 7

test_eq(
    test_instance.get_env(
        code[:cursor_pos], code[cursor_pos:(cursor_pos + 2)],
        False),
    (0, 5, 'va', ''),
) 
test_eq(
    _complete(code, cursor_pos),
    ['var1'],
)

In [None]:
#| hide
#| eval: False
completions_test_setup('')
test_eq(
    _complete("use 00", 6),
    ["00_config.ipynb"],
)

In [None]:
#| hide
#| eval: False
completions_test_setup('local test_local "test value"')
test_eq(
    _complete("list `t'", 7),
    ["test_local"],
)

In [None]:
#| hide
#| eval: False
completions_test_setup('local test_local "test value"')
test_eq(
    _complete("list `t'", 8),
    [],
)

In [None]:
#| eval: False
completions_test_setup('local test_local "test value"')
test_eq(
    _complete("list `t", 7),
    ["test_local'"],
)

In [None]:
#| hide
#| eval: False
completions_test_setup('global test_global "test value"')
test_instance.refresh()
test_eq(
    _complete("list $tes", 9),
    ['test_global'],
)

In [None]:
#| eval: False
completions_test_setup('global test_global "test value"')
test_instance.refresh()
test_eq(
    _complete("list ${tes}", 10),
    ['test_global'],
)

In [None]:
#| hide
#| eval: False
completions_test_setup('global test_global "test value"')
test_instance.refresh()
test_eq(
    _complete("list ${tes", 10),
    ['test_global}'],
)

In [None]:
#| hide
#| eval: False
completions_test_setup('scalar test_scalar = 5')
test_instance.refresh()
test_eq(
    _complete("disp scalar(tes", 15),
    ['test_scalar)'],
)

In [None]:
#| hide
#| eval: False
completions_test_setup(r'matrix test_matrix = (1,2,3\4,5,6)')
test_instance.refresh()
test_eq(
    _complete("matrix A = tes", 14),
    ['test_matrix'],
)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()