# browse

> Helpers for browse, head, and tail magics
- order: 7

In [None]:
#| default_exp browse
%load_ext autoreload
%autoreload 2

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from nbstata.misc_utils import print_red
from nbstata.stata import run_direct, run_single
from nbstata.stata_more import SelectVar, diverted_stata_output_quicker
from nbstata.pandas import better_pdataframe_from_data
from fastcore.basics import patch_to
import re

## Parsing Stata code for browse magic

In [None]:
#| hide
from fastcore.test import test_eq, test_fail
from textwrap import dedent

In [None]:
#| export
def _get_start_end_strs(stata_in_code):
    stata_range_code = stata_in_code.replace('in ','').strip()
    slash_pos = stata_range_code.find('/')
    if slash_pos != -1:
        start_str = stata_range_code[:slash_pos]
        end_str = stata_range_code[slash_pos+1:]
    else:
        start_str = "1"
        end_str = stata_range_code
    return start_str, end_str

In [None]:
#| hide
_get_start_end_strs("in 10")

('1', '10')

In [None]:
#| export
def _get_pos_stata_obs_num(in_obs_str, count):
    temp_str = in_obs_str.strip().upper()
    if temp_str == 'F': 
        in_obs = 1
    elif temp_str == 'L':
        in_obs = count
    else:
        try:
            in_obs = int(in_obs_str)
        except ValueError as e:
            raise ValueError(f"{in_obs_str} invalid observation number")
        if in_obs < 0: in_obs += count + 1
        if in_obs < 1 or in_obs > count:
            raise ValueError(f"{in_obs_str} invalid observation number")
    return in_obs

In [None]:
#| hide
_get_pos_stata_obs_num("10", 100)

10

In [None]:
#| export
def in_range(stata_in_code, count):
    """Return in-statement range"""
    if not stata_in_code.strip():
        return (None, None)
    start, end = (_get_pos_stata_obs_num(in_str, count)
                  for in_str in _get_start_end_strs(stata_in_code))
    if start > end:
        raise ValueError("observations numbers out of range")
    return (start-1, end)

In [None]:
test_eq(in_range("", 100), (None, None))
test_eq(in_range("in 10", 100), (0, 10))
test_eq(in_range("in 1/10", 100), (0, 10))
test_eq(in_range("in F/10", 100), (0, 10))
test_eq(in_range("in 3/10", 100), (2, 10))
test_fail(in_range, args=("in 10", 5))
test_fail(in_range, args=("in 10/1", 100))

Negative indices are converted to positive, following Stata syntax:

In [None]:
test_eq(in_range("in -10/-1", 41), (31, 41))

## Head/tail/browse magic helpers

In [None]:
#| export
def _parse_browse_magic_syntax(code):
    _program_name = "temp_nbstata_syntax_name"
    run_direct((
        f"program define {_program_name}\n"
        """ syntax [varlist(default=none)] [if] [in] [, noLabels noFormat]
            disp "%varlist%"
            foreach var in `varlist' {
                disp "`var'"
            }
            disp "%if%"
            disp `"`if'"'
            disp "%in%"
            disp `"`in'"'
            disp "%nolabels%"
            disp "`labels'"
            disp "%noformat%"
            disp "`format'"
            disp "%end%"
        end
        """), quietly=True)
    try:
        output = diverted_stata_output_quicker(f"""\
            {_program_name} {code}
            program drop {_program_name}
            """).strip()
    except Exception as e:
        run_single(f"capture program drop {_program_name}", show_exc_warning=True)
        raise(e)
    return output.replace("\n> ", "") #[c.strip() for c in var_code.split() if c] if var_code else None

In [None]:
#| hide
from nbstata.config import launch_stata
from nbstata.stata import obs_count, run_single
import numpy as np

In [None]:
#| hide
#| eval: False
launch_stata(splash=False)
run_single("""\
sysuse uslifeexp2, clear""")

(U.S. life expectancy, 1900-1940)


In [None]:
#| hide
#| eval: False
print(_parse_browse_magic_syntax("y if year, noformat "))

%varlist%
year
%if%
if year
%in%

%nolabels%

%noformat%
noformat
%end%


In [None]:
#| hide
#| eval: False
print(_parse_browse_magic_syntax("y if year & year & year & year & year & year & year & year & year & year & year & year & year & year & year & year & year & year & year & year, noformat "))

%varlist%
year
%if%
if year & year & year & year & year & year & year & year & year & year & year & year & year & year & year & year & year & year & year & year
%in%

%nolabels%

%noformat%
noformat
%end%


In [None]:
#| hide
#| export
matchparts = re.compile(
            r"\A.*?"
            r"^%varlist%(?P<varlist>.*?)"
            r"%if%(?P<if>.*?)"
            r"%in%(?P<in>.*?)"
            r"%nolabels%(?P<nolabels>.*?)"
            r"%noformat%(?P<noformat>.*?)%end%", #"(\Z|---+\s*end)",
            flags=re.DOTALL + re.MULTILINE).match

In [None]:
#| hide
#| eval: False
matchparts(_parse_browse_magic_syntax("y if year, noformat ")).groupdict()

{'varlist': '\nyear\n',
 'if': '\nif year\n',
 'in': '\n\n',
 'nolabels': '\n\n',
 'noformat': '\nnoformat\n'}

In [None]:
#| export
def parse_browse_magic(code):
    N = None
    pieces = [c.strip() for c in code.split() if c]
    if pieces and pieces[0].isnumeric():
        N = int(pieces.pop(0))
    code_minus_N = " ".join(pieces)
    args = matchparts(_parse_browse_magic_syntax(code_minus_N)).groupdict()
    _var = [c.strip() for c in args['varlist'].split() if c]
    var = _var if _var else None
    if_code = args['if'].strip()
    in_code = args['in'].strip()
    nolabels = args['nolabels'].strip()
    noformat = args['noformat'].strip()
    return N, var, if_code, in_code, nolabels, noformat

In [None]:
#| eval: False
code = "le if year==1920 in 1/10, noformat"
test_eq(parse_browse_magic(code), 
        (None, ['le'], 'if year==1920', 'in 1/10', '', 'noformat')
       )

In [None]:
#| eval: False
run_single("gen date = dofy(year)")
test_eq(parse_browse_magic("10 le* if date==mdy(1, 1, 1910), noformat"),
        (10, ['le'], 'if date==mdy(1, 1, 1910)', '', '', 'noformat')
       )
run_single("drop date")

In [None]:
#| export
def _parse_df_params(code, count, browse=False, tail=False):
    import numpy as np
    N, var, if_code, in_code, nolabels, noformat = parse_browse_magic(code)
    sformat = not noformat
    valuelabel = not nolabels

    N_max = np.inf if browse else 5
    if N is not None:
        if browse:
            print_red("Warning: '%browse [N]' syntax is deprecated "
                      "and may be removed in v1.0.")
        N_max = N

    # Obs range
    obs_range = None
    if browse:
        start, end = in_range(in_code, count)
        if start != None and end != None:
            obs_range = range(start, end)
        elif count > N_max:
            obs_range = range(0, N_max)
    else:
        if in_code:
            print_red(f"Note: [in] not allowed for {'tail' if tail else 'head'} "
                      "magic and is ignored."
                     )
        if count > N_max:
            obs_range = range(count - N_max, count) if tail else range(0, N_max)

    return obs_range, var, if_code, valuelabel, sformat

In [None]:
#| export
def get_df(obs_range, var, stata_if_code, missingval, valuelabel, sformat):
    with SelectVar(stata_if_code) as sel_varname:
        df = better_pdataframe_from_data(obs=obs_range,
                                         var=var,
                                         selectvar=sel_varname,
                                         missingval=missingval,
                                         valuelabel=valuelabel,
                                         sformat=sformat,
                                        )
        if not var and sel_varname is not None and sel_varname in df:
            df = df.drop([sel_varname], axis=1)
    return df

## Head/tail magic helpers

In [None]:
#| export
def headtail_df_params(code, count, missing_config, tail=False):
    import numpy as np
    custom_missingval = missing_config != 'pandas'
    missingval = missing_config if custom_missingval else np.NaN
    obs_range, var, stata_if_code, valuelabel, sformat = (
        _parse_df_params(code, count, tail=tail)
    )
    return obs_range, var, stata_if_code, missingval, valuelabel, sformat

In [None]:
#| eval: False
test_eq(headtail_df_params("", 50, "."),
        ((range(0, 5), None, '', ".", True, True)))

In [None]:
#| eval: False
test_eq(headtail_df_params("10 y le*, noformat", 50, "."),
        ((range(0, 10), ['year', 'le'], '', ".", True, False)))

In [None]:
#| eval: False
test_eq(headtail_df_params("10 y le*, noformat", 50, ".", tail=True),
        ((range(40, 50), ['year', 'le'], '', ".", True, False)))

In [None]:
#| export
def headtail_get_df(obs_range, var, stata_if_code, missingval, valuelabel, sformat):
    if not stata_if_code:
        return get_df(obs_range, var, stata_if_code, missingval, valuelabel, sformat)
    N_max = len(obs_range)
    tail = obs_range[0] != 0
    with SelectVar(stata_if_code) as sel_varname:
        df = better_pdataframe_from_data(obs=None,
                                         var=var,
                                         selectvar=sel_varname,
                                         missingval=missingval,
                                         valuelabel=valuelabel,
                                         sformat=sformat,
                                        )
        if not var and sel_varname is not None and sel_varname in df:
            df = df.drop([sel_varname], axis=1)
    return df.tail(N_max) if tail else df.head(N_max)

In [None]:
#| hide
#| eval: False
get_df(*headtail_df_params('', obs_count(), "."))

Unnamed: 0,year,le
1,1900,47.3
2,1901,49.1
3,1902,51.5
4,1903,50.5
5,1904,47.6


In [None]:
#| hide
#| eval: False
test_eq(get_df(*headtail_df_params('', obs_count(), ".")),
        headtail_get_df(*headtail_df_params('', obs_count(), ".")),)

In [None]:
#| eval: False
headtail_get_df(*headtail_df_params('if year<1903', obs_count(), "."))

Unnamed: 0,year,le
1,1900,47.3
2,1901,49.1
3,1902,51.5


In [None]:
#| eval: False
test_eq(headtail_get_df(*headtail_df_params('if year<1903', obs_count(), ".")),
        headtail_get_df(*headtail_df_params('if year<1903', obs_count(), ".", tail=True)),)

In [None]:
#| eval: False
headtail_get_df(*headtail_df_params('if year>1910', obs_count(), "."))

Unnamed: 0,year,le
12,1911,52.6
13,1912,53.5
14,1913,52.5
15,1914,54.2
16,1915,54.5


In [None]:
#| hide
#| eval: False
headtail_get_df(*headtail_df_params('if year>1910', obs_count(), ".", tail=True))

Unnamed: 0,year,le
37,1936,58.5
38,1937,60.0
39,1938,63.5
40,1939,63.7
41,1940,62.9


In [None]:
#| hide
#| eval: False
test_eq(headtail_get_df(*headtail_df_params('if year>1910', obs_count(), ".", tail=True)),
        headtail_get_df(*headtail_df_params('', obs_count(), ".", tail=True)),)

In [None]:
#| hide
#| eval: False
run_single("gen date = dofy(year)")
headtail_get_df(*headtail_df_params('10 le* if date==mdy(1, 1, 1910)', obs_count(), ".", tail=False))

Unnamed: 0,le
11,50


In [None]:
#| hide
#| eval: False
run_single("drop date")

## Browse magic helpers

In [None]:
#| export
def browse_df_params(code, count, browse=True, tail=False):
    import numpy as np
    missingval = np.NaN
    obs_range, var, stata_if_code, valuelabel, sformat = (
        _parse_df_params(code, count, browse)
    )
    return obs_range, var, stata_if_code, missingval, valuelabel, sformat

In [None]:
#| hide
#| eval: False
# Can't test because
(np.NaN == np.NaN) is False
browse_df_params("y le* if year<1910 in 1/20, noformat", 50)

(range(0, 20), ['year', 'le'], 'if year<1910', nan, True, False)

In [None]:
#| eval: False
browse_df_params("", 10)

(None, None, '', nan, True, True)

In [None]:
#| eval: False
browse_df_params("y le* if year<1910, noformat", 50)

(None, ['year', 'le'], 'if year<1910', nan, True, False)

In [None]:
#| hide
#| eval: False
browse_df_params("5 y le* if year<1910, noformat", 10)



(range(0, 5), ['year', 'le'], 'if year<1910', nan, True, False)

In [None]:
#| eval: False
browse_df_params('in 1/5', 41)

(range(0, 5), None, '', nan, True, True)

In [None]:
#| hide
#| eval: False
print(code)
get_df(*browse_df_params(code, obs_count()))

le if year==1920 in 1/10, noformat


In [None]:
#| eval: False
get_df(*browse_df_params('in 1/5', obs_count()))

Unnamed: 0,year,le
1,1900,47.3
2,1901,49.1
3,1902,51.5
4,1903,50.5
5,1904,47.6


## Browse magic: PerspectiveWidget

In [None]:
#| export
def perspective_not_found():
    try:
        import perspective
    except ModuleNotFoundError as e:
        return True
    else:
        return False

In [None]:
#| export
def perspective_is_enabled():
    return not perspective_not_found()
#     import subprocess
#     if perspective_not_found():
#         return False
#     try:
#         output = subprocess.getoutput('jupyter labextension list')
#         enabled = bool(re.search(r'@finos/perspective-jupyterlab v\d\.\d\.\d enabled ok', output))
#         built = not re.search(r'@finos/perspective-jupyterlab needs to be included in build', output)
#         return enabled and built
#     except Exception as e:
#         return False

In [None]:
perspective_is_enabled()

True

In [None]:
#| export
def browse_not_enabled(kernel):
    content = {
        'data': {'text/markdown': (
            "browse requires perspective widget to be "
            "[installed](https://perspective.finos.org/docs/python/#jupyterlab)"
        )},
        'metadata': {},
    }
    kernel.send_response(kernel.iopub_socket, 'display_data', content)
    return ''

In [None]:
#| export
def display_perspective(df, sformat):
    import perspective
    from IPython.display import display
    if sformat:
        # To prevent perspective from wrongly interpreting numbers as dates
        # See: https://perspective.finos.org/docs/table/#schema-and-types
        schema = {'index': int}
        schema.update({name: str for name in list(df.columns)})
        table = perspective.Table(schema)
        table.update(df)
    else:
        table = perspective.Table(df)
    w = perspective.PerspectiveWidget(table)
    display(w)
    
    # Alternate display code, from attempt to customize alt. mime-type(s)
#     data = {'application/vnd.jupyter.widget-view+json': {
#         'version_major': w.get_manager_state()['version_major'],
#         'version_minor': w.get_manager_state()['version_minor'],
#         'model_id': w.model_id,
#     }}
#     content = {
#         'data': data,
#         'metadata': {},
#     }
#     kernel.send_response(kernel.iopub_socket, 'display_data', content) 

In [None]:
#| hide
#| eval: False
display_perspective(get_df(*browse_df_params('in 1/5, noformat', obs_count())), False)

PerspectiveWidget(columns=['index', 'year', 'le'], theme=None)

In [None]:
#| hide
#| eval: False
display_perspective(get_df(*browse_df_params('in 1/5, noformat', obs_count())), True)

PerspectiveWidget(columns=['index', 'year', 'le'], theme=None)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()