# completion_env

> Autocomplete helper: determine context of the token to be autocompleted

Adapted from the [stata_kernel version](https://github.com/kylebarron/stata_kernel/blob/master/stata_kernel/completions.py) (omitting mata-specific stuff).

In [None]:
#| default_exp completion_env
%load_ext autoreload
%autoreload 2

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from nbstata.code_utils import ending_sc_delimiter
from fastcore.basics import patch_to
from enum import IntEnum
from typing import Tuple
import re

In [None]:
#| export
from pygments import lexers
from pygments.token import Comment, Keyword, Name, Number, \
    String, Text, Operator

In [None]:
#| export
stata_lexer = lexers.get_lexer_by_name('stata')

def _lex_tokens(code):
    return list(stata_lexer.get_tokens_unprocessed(code))

In [None]:
#| export
def _last_token(code):
    tokens = _lex_tokens(code)
    last_tokentype = tokens[-1][1]
    tokens_to_combine = []
    for token in reversed(tokens):
        if token[1] is last_tokentype:
            tokens_to_combine.append(token)
        else:
            break
    tokens_to_combine = list(reversed(tokens_to_combine))
    return (min(tokens_to_combine, key=lambda t: t[0])[0], last_tokentype, "".join([t[2] for t in tokens_to_combine]))

In [None]:
#| export
def _last_token_full_string(code, sc_delimiter=False):
    if not code:
        return (0, None, "")
    prefix = ""
    if sc_delimiter:
        prefix = "#delimit;\n"
        orig_code = code
        code = prefix + orig_code
    tokens = _lex_tokens(code)
    last_tokentype = tokens[-1][1]
    tokens_to_combine = []
    for token in reversed(tokens):
        if token[1] is last_tokentype:
            tokens_to_combine.append(token)
        else:
            break
    tokens_to_combine = list(reversed(tokens_to_combine))
    index = min(tokens_to_combine, key=lambda t: t[0])[0]
    value = "".join([t[2] for t in tokens_to_combine])
    while last_tokentype == String and value[0] != '"' and value[0:2] != '`"':
        tokens_to_combine = list(reversed(tokens_to_combine))
        reversed_remaining_tokens = list(reversed(_lex_tokens(code[:index])))
        for i, token in enumerate(reversed_remaining_tokens):
            if (token[1] is not String 
                and reversed_remaining_tokens[i-1][1] is String):
                break
            elif token[1] in [Comment.Single, Comment.Multiline, Comment.Special]:
                break
            tokens_to_combine.append(token)
        tokens_to_combine = list(reversed(tokens_to_combine))
        index = min(tokens_to_combine, key=lambda t: t[0])[0]
        value = "".join([t[2] for t in tokens_to_combine])
    index = index-len(prefix)
    if index < 0:
        value = value[-index:]
        index = 0
    return (index, last_tokentype, value)

In [None]:
#| hide
from fastcore.test import test_eq

In [None]:
#| hide
for code in ['', ' ', 'di', 'disp ']:
    test_eq(_last_token_full_string(code, sc_delimiter=True), 
            _last_token_full_string(code))

In [None]:
#| hide
_last_token_full_string(' ')

(0, Token.Text, ' ')

In [None]:
#| hide
_last_token_full_string('use "$indir/delimit t')

(4, Token.Literal.String, '"$indir/delimit t')

In [None]:
#| hide
_lex_tokens("""\
sysuse lifeexp
disp scalar(te""")

[(0, Token.Keyword, 'sysuse'),
 (6, Token.Text, ' '),
 (7, Token.Text, 'l'),
 (8, Token.Text, 'i'),
 (9, Token.Text, 'f'),
 (10, Token.Text, 'e'),
 (11, Token.Text, 'e'),
 (12, Token.Text, 'x'),
 (13, Token.Text, 'p'),
 (14, Token.Keyword, '\ndisp'),
 (19, Token.Keyword, ' scalar'),
 (26, Token.Text, '('),
 (27, Token.Text, 't'),
 (28, Token.Text, 'e')]

In [None]:
#| hide
_lex_tokens("""\
sysuse lifeexp
disp scalar(
te""")

[(0, Token.Keyword, 'sysuse'),
 (6, Token.Text, ' '),
 (7, Token.Text, 'l'),
 (8, Token.Text, 'i'),
 (9, Token.Text, 'f'),
 (10, Token.Text, 'e'),
 (11, Token.Text, 'e'),
 (12, Token.Text, 'x'),
 (13, Token.Text, 'p'),
 (14, Token.Keyword, '\ndisp'),
 (19, Token.Keyword, ' scalar'),
 (26, Token.Text, '('),
 (27, Token.Keyword, '\nte')]

In [None]:
#| hide
_last_token("""\
sysuse lifeexp
disp scalar(
te""")

(27, Token.Keyword, '\nte')

In [None]:
#| hide
_last_token("""use  00""")

(5, Token.Literal.Number, '00')

In [None]:
#| hide
_last_token("""use `00""")

(4, Token.Name.Variable, '`00')

In [None]:
#| hide
_last_token("""use $00""")

(4, Token.Name.Variable.Global, '$00')

In [None]:
#| hide
_last_token("""use ${00""")

(4, Token.Name.Variable.Global, '${00')

In [None]:
#| hide
_last_token("""use "00""")

(4, Token.Literal.String, '"00')

In [None]:
#| hide
_last_token("""use `"00""")

(4, Token.Literal.String, '`"00')

In [None]:
#| hide
_last_token('disp " // x')

(5, Token.Literal.String, '" // x')

In [None]:
#| hide
_last_token("sfdf/* fdafd")

(4, Token.Comment.Multiline, '/* fdafd')

In [None]:
#| hide
_last_token('disp " // `')

(10, Token.Name.Variable, '`')

In [None]:
#| hide
_last_token('disp " // $')

(10, Token.Name.Variable.Global, '$')

In [None]:
#| hide
_last_token('use "$indir/delimit t')

(11, Token.Literal.String, '/delimit t')

Note the corner cases discussed here, but the commented out test cases below are not yet handled:

* [github.com/kylebarron/language-stata/issues/90](github.com/kylebarron/language-stata/issues/90)
* [statalist.org/forums/forum/general-stata-discussion/general/1448244](statalist.org/forums/forum/general-stata-discussion/general/1448244)

In [None]:
#| export
class CompletionEnv():
    def __init__(self):
        """"""
        # any non-space/"/= 'word' at the end of the string after the last ", =, or white space
        self.last_chunk = re.compile(
            r'[\s"=][^\s"=]*?\Z', flags=re.MULTILINE).search
        
        # Path completion
        self.path_search = re.compile(
            r'^(?P<fluff>.*")(?P<path>[^"]*)\Z').search

        # Magic completion
        self.magic_completion = re.compile(
            r'\A\*?%(?P<magic>\S*)\Z', flags=re.DOTALL + re.MULTILINE).match

        # Match context; this is used to determine if the line starts
        # with matrix or scalar. It also matches constructs like
        #
        #     (`=)?scalar(
        pre = (
            r'(cap(t|tu|tur|ture)?'
            r'|qui(e|et|etl|etly)?'
            r'|n(o|oi|ois|oisi|oisil|oisily)?)')
        kwargs = {'flags': re.MULTILINE}
        self.fcontext = {
            'function':
                re.compile(
                    r"(\s+|\=|`=)\s*(?P<name>\w+?)"
                    r"\([^\)]*?(?P<last_word>\w*)\Z", **kwargs).search,
        }
        self.context = {
            'line':
                re.compile(
                    r"^(?P<last_line>\s*({0}\s+)*(?P<first_word>\S+) .*?)\Z".format(pre),
                    **kwargs).search,
            'delimit_line':
                re.compile(
                    r"(?:\A|;)(?P<last_line>\s*({0}\s+)*(?P<first_word>[^\s;]+)\s[^;]*?)\Z".format(pre),
                    **kwargs).search
        }
#         self.last_line = {
#             'line':
#                 re.compile(
#                     r"^(?P<last_line>.*)\Z",
#                     **kwargs).search,
#             'delimit_line':
#                 re.compile(
#                     r"(?:\A|;)(?P<last_line>[^;]*)\Z",
#                     **kwargs).search
#         }

#         self.ends_in_a_comment = re.compile(
#             r'('
#             r'(^((\s*\*)|((.*( |\t))?\/\/)).*)'     # last line starting with '*' or containing ' //'
#             r'|(\/\*)([^\*\/]|\*(?!\/)|\/(?<!\*))*' # unfinished /* block
#             r')\Z', flags=re.MULTILINE).search

#         self.ends_in_a_string_literal = re.compile(
#             r'(\`\")' # start of a `" block
#             r'(' 
#             r'([^\"\']|\"(?!\')|\'(?<!\"))*' 
#             r'([^\"\']|\"(?!\')|\'(?<!\"))*' 
#             r'([^\"\']|\"(?!\')|\'(?<!\"))*' 
#             r')*\Z').search

In [None]:
#| export
def _ends_in_string_literal(code, sc_delimiter=False):
    if sc_delimiter:
        code = "#delimit;\n" + code
    return _last_token(code)[1] is String

In [None]:
#| export
def _ends_in_a_comment(code, sc_delimiter=False):
    if sc_delimiter:
        code = "#delimit;\n" + code
    return _last_token(code)[1] in [Comment.Single, Comment.Multiline, Comment.Special]

In [None]:
#| hide
test_instance = CompletionEnv()

`ends_in_a_string_literal` tests:

In [None]:
#| hide
no_examples = [
    """disp `"`"fdafd"' "' x""",
    """disp `""fdafd" "' x""",
    """disp `"fdafd" "' x""",
    """disp "fdafd" x""",
    '''disp // " x
    fdfd''',
    'disp "x" ',
]
yes_examples = [
    """disp `"fdafd""",
    """disp "fdafd""",
    'disp " // x',
    """disp `"fdaf`"d"'""",
]
for code in no_examples:
    test_eq(bool(_ends_in_string_literal(code)), False)
for code in yes_examples:
    test_eq(bool(_ends_in_string_literal(code)), True)

`ends_in_a_comment` tests:

In [None]:
#| hide
no_examples = [
    "fdafd",
    "fdafs *",
    "fdk//  fda * ",
    'disp " // x',
    """\
sfdf/* fdafd
fadfdaf   */dfd""",
    """\
#delimit ;
// This line is ignored, but the line break is not
di x""",
    """\
#delimit ;
// Same for multi-line /*
di x""",
    """\
#delimit ;
* Line continuation ///
// Breaks line continuation ///
di x""",
    """\
#delimit ;
disp "Line continuation" ///
// Breaks line continuation ///
di x""",
]
yes_examples = [
    " \t*fdfd",
    "sfdf/* fdafd",
    "* dfda ///",
    "// fdafdafs ",
    "/// fjda;fjds",
    "/* fdafd /",
    "/* fdafd *",
    " /* fdafd * / x",
#     """\
# #delimit ; 
# * Line continuations do apply
# di x""",
    """\
#delimit ; 
* Same for multi-line /*
di """,
]
for code in no_examples:
    test_eq(bool(_ends_in_a_comment(code)), False)
for code in yes_examples:
    test_eq(bool(_ends_in_a_comment(code)), True)

In [None]:
#| hide
_last_token("""\
#delimit ; 
* Line continuations do apply
di x""")

(44, Token.Text, ' x')

In [None]:
#| export
@patch_to(CompletionEnv)
def _scalar_f_pos_rcomp(self, code, r2chars):
    scalar_f = False
    funcontext = self.fcontext['function'](code)
    if funcontext:
        function = funcontext.group('name')
        if function == 'scalar':
            scalar_f = True
            pos = funcontext.start('last_word') if funcontext.start('last_word') else len(code)
            rcomp = "" if (r2chars[0:1] == ")" or r2chars == " )") else ")"
    if scalar_f:
        return True, pos, rcomp
    else:
        return False, None, None

In [None]:
#| hide
code = """\
sysuse lifeexp
disp scalar("""
test_instance._scalar_f_pos_rcomp(code, "")

(True, 27, ')')

In [None]:
#| hide
code = """\
sysuse lifeexp
disp scalar(te"""
test_instance._scalar_f_pos_rcomp(code, "")

(True, 27, ')')

In [None]:
#| hide
code = """\
sysuse lifeexp
disp scalar( te"""
test_instance._scalar_f_pos_rcomp(code, "")

(True, 28, ')')

In [None]:
#| hide
code = """\
sysuse lifeexp
disp scalar(
te"""
test_instance._scalar_f_pos_rcomp(code, "")

(True, 28, ')')

In [None]:
#| hide
code = """\
sysuse lifeexp
disp
scalar(
te"""
test_instance._scalar_f_pos_rcomp(code, "")

(True, 28, ')')

In [None]:
#| export
@patch_to(CompletionEnv)
def _start_of_last_chunk(self, code):
    search = self.last_chunk(code)
    return search.start() + 1 if search else 0

In [None]:
#| hide
code = """\
sysuse lifeexp
disp
scalar(
te"""
test_instance._start_of_last_chunk(code)

28

In [None]:
#| hide
def _word(code):
    return code[test_instance._start_of_last_chunk(code):]

test_eq(_word("""use  00"""), """00""")

test_eq(_word("""use `00"""), """`00""")
test_eq(_word("""use $00"""), """$00""")
test_eq(_word("""use ${00"""), """${00""")
test_eq(_word("""use {00"""), """{00""")
test_eq(_word("""use /00"""), """/00""")

test_eq(_word("""use "00"""), """00""")
test_eq(_word("""use"00"""), """00""")
test_eq(_word("""use `"00"""), """00""")
test_eq(_word('''use `"00"'''), "")
test_eq(_word("""use `"00"'"""), "'")

test_eq(_word("""use """), "")
test_eq(_word("""use"""), "use")

test_eq(_word("""use `tes"""), """`tes""")
test_eq(_word("""use `tes'"""), """`tes'""")
test_eq(_word("""use ${tes}"""), """${tes}""")

test_eq(_word("\n".join(["use", "${tes}"])), "${tes}")

test_eq(_word("`=tes"), "tes")
test_eq(_word("disp `=tes"), "tes")

In [None]:
#| export
@patch_to(CompletionEnv)
def _last_line_first_word(self, code, sc_delimiter=False):
    if sc_delimiter:
        linecontext = self.context['delimit_line'](code)
    else:
        linecontext = self.context['line'](code)
    if linecontext:
        last_line = linecontext.groupdict()['last_line']
        first_word = linecontext.groupdict()['first_word']
        return last_line, first_word
    else:
        return None, None

In [None]:
#| hide
code = """\
sysuse lifeexp
list lex"""
test_instance._last_line_first_word(code, False)

('list lex', 'list')

In [None]:
#| hide
code = """\
sysuse lifeexp
list lex"""
test_instance._last_line_first_word(code, True)

('sysuse lifeexp\nlist lex', 'sysuse')

In [None]:
#| hide
code = """\
sysuse lifeexp;list lex"""
test_instance._last_line_first_word(code, True)

('list lex', 'list')

In [None]:
#| hide
code = """\
sysuse lifeexp
lis"""
test_instance._last_line_first_word(code, False)

(None, None)

In [None]:
#| hide
code = """\
#delimit;
sysuse lifeexp
list lex"""
test_instance._last_line_first_word(code, True)

('\nsysuse lifeexp\nlist lex', 'sysuse')

In [None]:
#| export
class Env(IntEnum):
    NONE = -9      # no suggestions
    MAGIC = -1     # magics, %x*
    GENERAL = 0    # varlist and/or file path
    LOCAL = 1      # `x* completed with `x*'
    GLOBAL = 2     # $x* completed with $x* or ${x* completed with ${x*}
    SCALAR = 4     # scalar .* x* completed with x* or scalar(x* completed with scalar(x*)
    MATRIX = 6     # matrix .* x* completed with x*
    SCALAR_VAR = 7 # scalars and varlist, scalar .* = x* completed with x*
    MATRIX_VAR = 8 # matrices and varlist, matrix .* = x* completed with x*
    MATA = 9       # inline or in mata environment
    STRING = 10    # file path

In [None]:
#| export
@patch_to(CompletionEnv)
def get_env(self, 
            code: str, # Right-truncated to cursor position
            r2chars: str, # The two characters immediately after `code`, used to accurately determine rcomp
            sc_delimiter,
           ) -> Tuple[Env, int, str, str]:
    """Returns completions environment
    
    Returns
    -------
    env : Env    
    pos : int
        Where the completions start. This is set to the start of the word to be completed.
    out_chunk : str
        Word to match.
    rcomp : str
        How to finish the completion (defaulting to nothing):
        locals: '
        globals (if start with ${): }
        scalars: )
        scalars (if start with `): )'
    """
    rcomp = ""
    
    lcode = code.lstrip()
    if self.magic_completion(lcode):
        pos = code.rfind("%") + 1
        env = Env.MAGIC
        return env, pos, code[pos:], rcomp
    
    sc_delimiter = ending_sc_delimiter(code, sc_delimiter)
    env = Env.GENERAL   
    
    # Detect last "word" delimited by white space, a double-quote, or =.
    pos = self._start_of_last_chunk(code)

    if _ends_in_a_comment(code, sc_delimiter):
        return env, pos, code[pos:], rcomp

    last_token_index, last_token_type, last_token_value = _last_token_full_string(code, sc_delimiter)
    
    if last_token_type is String:
        if (not _ends_in_string_literal(code + " ", sc_delimiter)
            or not (last_token_value.startswith('"')
                    or last_token_value.startswith('`"'))):
            return Env.NONE, len(code)-1, rcomp
        if last_token_value.startswith('"'):
            opening_marker_length = 1
            rcomp = "" if r2chars[0:1] == '"' else '"'
        elif last_token_value.startswith('`"'):
            opening_marker_length = 2
            rcomp = "" if r2chars[0:2] == "\"'" else "\"'"
        pos = last_token_index + opening_marker_length
        env = Env.STRING
    else:
        # Figure out if this is a local or global; env = 0 (default)
        # will suggest variables in memory.
        chunk = code[pos:]
        lfind = chunk.rfind('`')
        gfind = chunk.rfind('$')
        path_chars = any(x in chunk for x in ['/', '\\', '~'])

        if lfind >= 0 and (lfind > gfind):
            pos += lfind + 1
            env = Env.LOCAL
            rcomp = "" if r2chars[0:1] == "'" else "'"
        elif gfind >= 0 and not path_chars:
            bfind = chunk.rfind('{')
            if bfind >= 0 and (bfind == gfind+1):
                pos += bfind + 1
                env = Env.GLOBAL
                rcomp = "" if r2chars[0:1] == "}" else "}"
            else:
                env = Env.GLOBAL
                pos += gfind + 1    
    
    if pos == 0:
        env = Env.NONE # to-do: auto-complete commands here
    else:
        # Figure out if current statement is a matrix or scalar
        # statement. If so, will add them to completions list.
        last_line, first_word = self._last_line_first_word(code, sc_delimiter)
        if first_word:
            equals_present = (last_line.find('=') > 0)
            if re.match(r'^sca(lar|la|l)?$', first_word): #.strip()
                env = Env.SCALAR_VAR if equals_present else Env.SCALAR
            elif re.match(r'^mat(rix|ri|r)?$', first_word): #.strip()
                env = Env.MATRIX_VAR if equals_present else Env.MATRIX

        # Constructs of the form scalar(x<tab> will be filled only
        # with scalars. This can be preceded by = or `=
        if env in [Env.GENERAL, Env.STRING]:
            scalar_f, new_pos, new_rcomp = self._scalar_f_pos_rcomp(code, r2chars)
            if scalar_f:
                env = Env.SCALAR
                pos = new_pos
                rcomp = new_rcomp

    out_chunk = code[pos:]
    return env, pos, out_chunk, rcomp

In [None]:
#| hide
test_eq(
    test_instance.get_env("* $", "", None)[0],
    Env.GENERAL)

In [None]:
#| hide
test_instance.get_env("""\
scalar
list x""", "", ";")

(<Env.SCALAR: 4>, 12, 'x', '')

In [None]:
#| hide
test_eq(
    [test_instance.get_env("""\
#delimit;
scalar
list x""", "", None)[i] for i in [0, 2, 3]],
    [test_instance.get_env("""\
scalar
list x""", "", ";")[i] for i in [0, 2, 3]])

In [None]:
test_eq(
    test_instance.get_env("`", "", None)[0:3],
    (Env.LOCAL, 1, ""))

In [None]:
#| hide
test_eq(
    test_instance.get_env("disp 1\n`", "", None)[0],
    Env.LOCAL)

In [None]:
test_eq(
    test_instance.get_env("*%e", "", None)[0:2],
    (Env.MAGIC, 2))

In [None]:
#| hide
test_eq(
    test_instance.get_env("%e", "", None)[0:2],
    (Env.MAGIC, 1))

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()