# browse

> Helpers for browse, head, and tail magics

In [None]:
#| default_exp browse
%load_ext autoreload
%autoreload 2

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from nbstata.config import launch_stata
from nbstata.helpers import *
from nbstata.utils import *
from nbstata.pandas import better_pdataframe_from_data
from fastcore.basics import patch_to
import re
import numpy as np
import subprocess

## Parsing Stata code for browse magic

In [None]:
#| export
parse_code_if_in_regex = re.compile(
    r'\A(?P<code>(?!if\s)(?!\sif)(?!in\s)(?!\sin).+?)?(?P<if>\s*if\s+.+?)?(?P<in>\s*in\s.+?)?\Z',
    flags=re.DOTALL + re.MULTILINE
)

In [None]:
#| export
def parse_code_if_in(code):
    """Parse line of Stata code into code, if, in"""
    match = parse_code_if_in_regex.match(code.strip())
    if match:
        args = match.groupdict()
        for k in args:
            args[k] = args[k].strip() if args[k] is not None else ''   
    else:
        args = {'code': code,
                'if': '',
                'in': ''}    
    return args

In [None]:
from fastcore.test import test_eq, test_fail
from textwrap import dedent

In [None]:
code = "list var1 if var1==0 in 1/10"
test_eq(parse_code_if_in(code), {'code': 'list var1', 'if': 'if var1==0', 'in': 'in 1/10'})

In [None]:
parse_code_if_in('x in 1/10')

{'code': 'x', 'if': '', 'in': 'in 1/10'}

In [None]:
#| hide
#Stata doesn't allow reversing the if/in order and it doesn't work here either:
parse_code_if_in("list var1 in 1/10 if var1==0")

{'code': 'list var1', 'if': '', 'in': 'in 1/10 if var1==0'}

In [None]:
#| export
def _get_start_end_strs(stata_in_code):
    stata_range_code = stata_in_code.replace('in ','').strip()
    slash_pos = stata_range_code.find('/')
    if slash_pos != -1:
        start_str = stata_range_code[:slash_pos]
        end_str = stata_range_code[slash_pos+1:]
    else:
        start_str = "1"
        end_str = stata_range_code
    return start_str, end_str

In [None]:
#| hide
_get_start_end_strs("in 10")

('1', '10')

In [None]:
#| export
def _get_pos_stata_obs_num(in_obs_str, count):
    temp_str = in_obs_str.strip().upper()
    if temp_str == 'F': 
        in_obs = 1
    elif temp_str == 'L':
        in_obs = count
    else:
        try:
            in_obs = int(in_obs_str)
        except ValueError as e:
            raise ValueError(f"{in_obs_str} invalid observation number")
        if in_obs < 0: in_obs += count + 1
        if in_obs < 1 or in_obs > count:
            raise ValueError(f"{in_obs_str} invalid observation number")
    return in_obs

In [None]:
#| hide
_get_pos_stata_obs_num("10", 100)

10

In [None]:
#| export
def in_range(stata_in_code, count):
    """Return in-statement range"""
    if not stata_in_code.strip():
        return (None, None)
    start, end = (_get_pos_stata_obs_num(in_str, count)
                  for in_str in _get_start_end_strs(stata_in_code))
    if start > end:
        raise ValueError("observations numbers out of range")
    return (start-1, end)

In [None]:
test_eq(in_range("", 100), (None, None))
test_eq(in_range("in 10", 100), (0, 10))
test_eq(in_range("in 1/10", 100), (0, 10))
test_eq(in_range("in F/10", 100), (0, 10))
test_eq(in_range("in 3/10", 100), (2, 10))
test_fail(in_range, args=("in 10", 5))
test_fail(in_range, args=("in 10/1", 100))

Negative indices are converted to positive, following Stata syntax:

In [None]:
test_eq(in_range("in -10/-1", 41), (31, 41))

## Head/tail magic helpers

In [None]:
#| export
def _split_option_code(code):
    code_parts = code.split(',')
    main_code = code_parts[0] if code_parts else ""
    option_code = code_parts[1] if len(code_parts) > 1 else ""
    return main_code, option_code

In [None]:
#| export
def headtail_df_params(code, count, missing_config, tail=False):
    custom_missingval = missing_config != 'pandas'
    missingval = missing_config if custom_missingval else np.NaN

    main_code, option_code = _split_option_code(code)
    oargs = [c.strip() for c in option_code.split() if c]
    sformat = 'noformat' not in oargs
    valuelabel = 'nolabel' not in oargs
    
    vargs = [c.strip() for c in main_code.split() if c]
    N_max = 5
    if len(vargs) >= 1:
        if vargs[0].isnumeric():
            # 1st argument is obs count
            N_max = int(vargs[0])
            del vargs[0]

    # Specified variables?
    varlist = " ".join(vargs)

    # Obs range
    obs_range = None
    if count > N_max:
        obs_range = range(count - N_max, count) if tail else range(0, N_max)

    stata_if_code=""
    return obs_range, varlist, stata_if_code, missingval, valuelabel, sformat

In [None]:
test_eq(headtail_df_params("", 50, "."),
        ((range(0, 5), '', '', ".", True, True)))

In [None]:
test_eq(headtail_df_params("10 y s*, noformat", 50, "."),
        ((range(0, 10), 'y s*', '', ".", True, False)))

In [None]:
test_eq(headtail_df_params("10 y s*, noformat", 50, ".", tail=True),
        ((range(40, 50), 'y s*', '', ".", True, False)))

In [None]:
#| export
def get_df(obs_range, varlist, stata_if_code, missingval, valuelabel, sformat):
    with Selectvar(stata_if_code) as sel_varname:
        df = better_pdataframe_from_data(obs=obs_range,
                                         varlist=varlist,
                                         selectvar=sel_varname,
                                         missingval=missingval,
                                         valuelabel=valuelabel,
                                         sformat=sformat,
                                        )
        if not varlist and sel_varname is not None:
            df = df.drop([sel_varname], axis=1)
    return df

In [None]:
#| eval: False
launch_stata(splash=False)
run_noecho("""\
sysuse uslifeexp2, clear
""")

(U.S. life expectancy, 1900-1940)


In [None]:
#| eval: False
get_df(*headtail_df_params('', obs_count(), "."))




Unnamed: 0,year,le
1,1900,47.3
2,1901,49.1
3,1902,51.5
4,1903,50.5
5,1904,47.6


## Browse magic helpers

In [None]:
#| export
def parse_browse_magic(code):
    non_option_code, option_code = _split_option_code(code)
    args = parse_code_if_in(non_option_code)
    return args, option_code

In [None]:
code = "le if year==1920 in 1/10, noformat"
test_eq(parse_browse_magic(code), 
        ({'code': 'le', 'if': 'if year==1920', 'in': 'in 1/10'}, ' noformat')
       )

In [None]:
#| export
def browse_df_params(code, count):
    missingval = np.NaN

    args, option_code = parse_browse_magic(code)
    oargs = [c.strip() for c in option_code.split() if c]
    sformat = 'noformat' not in oargs
    valuelabel = 'nolabel' not in oargs

    vargs = [c.strip() for c in args['code'].split() if c]
    N_max = np.inf
    if len(vargs) >= 1:
        if vargs[0].isnumeric():
            # 1st argument is obs count
            print_red("Warning: '%browse [N]' syntax is deprecated "
                      "and may be removed in v1.0.")
            N_max = int(vargs[0])
            del vargs[0]
    # Specified variables?
    varlist = " ".join(vargs)

    # Obs range
    obs_range = None
    start, end = in_range(args['in'], count)
    if start != None and end != None:
        obs_range = range(start, end)
    elif count > N_max:
        obs_range = range(0, N_max)

    stata_if_code = args['if']
    return obs_range, varlist, stata_if_code, missingval, valuelabel, sformat

In [None]:
#| hide
# Can't test because
(np.NaN == np.NaN) is False
browse_df_params("y s* if year<1910 in 1/20, noformat", 50)

(range(0, 20), 'y s*', 'if year<1910', nan, True, False)

In [None]:
browse_df_params("", 10)

(None, '', '', nan, True, True)

In [None]:
browse_df_params("y s* if year<1910, noformat", 50)

(None, 'y s*', 'if year<1910', nan, True, False)

In [None]:
#| hide
browse_df_params("5 y s* if year<1910, noformat", 10)



(range(0, 5), 'y s*', 'if year<1910', nan, True, False)

In [None]:
browse_df_params('in 1/5', 41)

(range(0, 5), '', '', nan, True, True)

In [None]:
#| hide
#| eval: False
print(code)
get_df(*browse_df_params(code, obs_count()))

le if year==1920 in 1/10, noformat




In [None]:
#| eval: False
get_df(*browse_df_params('in 1/5', obs_count()))




Unnamed: 0,year,le
1,1900,47.3
2,1901,49.1
3,1902,51.5
4,1903,50.5
5,1904,47.6


In [None]:
#| eval: False
get_df(*browse_df_params('in 1/5, noformat', obs_count()))




Unnamed: 0,year,le
1,1900,47.299999
2,1901,49.099998
3,1902,51.5
4,1903,50.5
5,1904,47.599998


## Browse magic: PerspectiveWidget

In [None]:
#| export
def perspective_not_found():
    try:
        import perspective
    except ModuleNotFoundError as e:
        return True
    else:
        return False

In [None]:
#| export
def perspective_is_enabled():
    return not perspective_not_found()
#     if perspective_not_found():
#         return False
#     try:
#         output = subprocess.getoutput('jupyter labextension list')
#         enabled = bool(re.search(r'@finos/perspective-jupyterlab v\d\.\d\.\d enabled ok', output))
#         built = not re.search(r'@finos/perspective-jupyterlab needs to be included in build', output)
#         return enabled and built
#     except Exception as e:
#         return False

In [None]:
perspective_is_enabled()

True

In [None]:
#| export
def browse_not_enabled(kernel):
    content = {
        'data': {'text/markdown': (
            "browse requires perspective widget to be "
            "[installed](https://perspective.finos.org/docs/python/#jupyterlab)"
        )},
        'metadata': {},
    }
    kernel.send_response(kernel.iopub_socket, 'display_data', content)
    return ''

In [None]:
#| export
def display_perspective(df, sformat):
    import perspective
    from IPython.display import display
    if sformat:
        # To prevent perspective from wrongly interpreting numbers as dates
        # See: https://perspective.finos.org/docs/table/#schema-and-types
        schema = {'index': int}
        schema.update({name: str for name in list(df.columns)})
        table = perspective.Table(schema)
        table.update(df)
    else:
        table = perspective.Table(df)
    w = perspective.PerspectiveWidget(table)
    display(w)
    
    # Alternate display code, from attempt to customize alt. mime-type(s)
#     data = {'application/vnd.jupyter.widget-view+json': {
#         'version_major': w.get_manager_state()['version_major'],
#         'version_minor': w.get_manager_state()['version_minor'],
#         'model_id': w.model_id,
#     }}
#     content = {
#         'data': data,
#         'metadata': {},
#     }
#     kernel.send_response(kernel.iopub_socket, 'display_data', content) 

In [None]:
#| hide
#| eval: False
display_perspective(get_df(*browse_df_params('in 1/5, noformat', obs_count())), False)




PerspectiveWidget(columns=['index', 'year', 'le'], theme=None)

In [None]:
#| hide
#| eval: False
display_perspective(get_df(*browse_df_params('in 1/5, noformat', obs_count())), True)




PerspectiveWidget(columns=['index', 'year', 'le'], theme=None)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()