In [None]:
#| export
from __future__ import annotations

In [None]:
#| default_exp pandas_visor

In [None]:
# %reload_ext autoreload
# %autoreload 0

# Pandas Visor
> Simple dataframe widget

## Preamble -

In [None]:
#| export
import asyncio
import math
from types import SimpleNamespace as NS
from typing import cast

import ipywidgets as W
import numpy as np
import pandas as pd
from pandas.io.formats.style import Styler

from vutil.imports import AD
from vutil.pandas.transformer import DataFrameStyler
from vutil.pandas.util import FTStyler
from vutil.pandas.util import PANDAS
from vwidget.base_widget import BaseExplorerApp


In [None]:
import anywidget
import fastcore.all as F
import traitlets
from fastcore.test import *  # type: ignore
from IPython.core.display import HTML

from vutil.async_helper import wait_while
from vutil.pandas.util import FTDataFrame
from vwidget.base_widget import ReflectWidget
from vwidget.base_widget import ThemerWidget


 ----

In [None]:
# if F.IN_IPYTHON:
#     import nest_asyncio
#     nest_asyncio.apply()

In [None]:
#| exporti
_STYLE = PANDAS.style

config = NS(
    PANDAS = PANDAS,
    THEME = 'dark'
)


avoid ipywidgets/anywidgets deprecation warning


In [None]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')


In [None]:
tw = ThemerWidget()
tw

ThemerWidget()

 ----

## DFVisor
> Pandas visor

In [None]:
pd.describe_option('display.max_columns')
pd.set_option("display.max_columns", 60)

display.max_columns : int
    If max_cols is exceeded, switch to truncate view. Depending on
    `large_repr`, objects are either centrally truncated or printed as
    a summary view. 'None' value means unlimited.

    In case python/IPython is running in a terminal and `large_repr`
    equals 'truncate' this can be set to 0 or None and pandas will auto-detect
    the width of the terminal and print a truncated object which fits
    the screen width. The IPython notebook, IPython qtconsole, or IDLE
    do not run in a terminal and hence it is not possible to do
    correct auto-detection and defaults to 20.
    [default: 20] [currently: 20]


In [None]:
datafile = {
    "filename": ["filename_01", "filename_02"],
    "path": [
        "volumes/data/media/user_name/storage/folder_01/filename_01",
        "volumes/data/media/user_name/storage/folder_02/filename_02",
    ],
}
pd.set_option("display.max_colwidth", 50)
df0 = pd.DataFrame(datafile)
df = FTDataFrame(datafile)


In [None]:
print(type(df0.style))
isinstance(df0.style, Styler), isinstance(df0.style, FTStyler)

<class 'pandas.io.formats.style.Styler'>


(True, False)

In [None]:
print(type(df.style))
isinstance(df.style, Styler), isinstance(df.style, FTStyler)

<class 'vutil.pandas.util.FTStyler'>


(True, True)

In [None]:
pd.set_option("display.max_colwidth", 30)
df


Unnamed: 0,filename,path
0,filename_01,volumes/data/media/user_na...
1,filename_02,volumes/data/media/user_na...


In [None]:
pd.set_option("display.max_colwidth", 100)
df


Unnamed: 0,filename,path
0,filename_01,volumes/data/media/user_name/storage/folder_01/filename_01
1,filename_02,volumes/data/media/user_name/storage/folder_02/filename_02


In [None]:
pd.set_option("display.max_colwidth", 50)


In [None]:
# dft = DataFrameStyler({
# # column_name  name        width  dtype                            transform  fmt              cell_style col_style
#   'datetime': ('dt',       None,    'datetime64[ns, Europe/Madrid]', None,      date_fmt_simple, None,      'bold'),
#   'order':    ('order_id', '100px', 'float',                         None,      '{:.4f}',        None,      'gray'),
#   'price':    (None,       None,    None,                            round_up4, None,            None,      'mono9'),
#   'total':    (None,       None,    None,                            None,      dot_align4,      neg_red,   'mono9, bold'),
# })

In [None]:
#| export
class DFVisor(BaseExplorerApp):
    # ---------- ---------- display ---------- ----------
    def d(self):
        if self.df is not None:
            self(self.df)

    # ---------- ---------- UI view ---------- ----------
    # def setup_layout(self):
    #     center = W.Box(
    #             [self._w.out],
    #             layout=W.Layout(
    #                 height="100%", width='100%', margin='0px', border='0.1px solid red')
    #         )
    #     return dict(center=center)

    # ---------- ---------- UX ---------- ----------
    # def reflect_changed(self, change):
    #     q = change.new
    #     if 'result' in q:
    #         h = f"{math.ceil(q['result']['width'])}px"
    #         if self.dcons is not None:
    #             self.dcons.log(f"{h = }")
    #         self._w.out.layout.max_width = h

    # def setup_ux(self, state):
    #     super().setup_ux(state)
    #     self.reflect.observe(self.reflect_changed, names='query')  # type: ignore

    def __init__(self, 
            df: pd.DataFrame | Styler | None = None, 
            height=300, 
            # width: int | None = 1000,
            **kwargs
        ):
            self.df = df
            self.stlr: DataFrameStyler = kwargs.pop(
                'stlr', DataFrameStyler(table_styles=PANDAS.style.default_table_style))
            reflect = kwargs.pop('reflect', True)
            super().__init__(widgets = AD(out=W.Output()),
                layout=AD(max_height=f"{height+20}px"), reflect=reflect, **kwargs)

    def __call__(self, df: pd.DataFrame | Styler):
        self._w.out.clear_output()
        if isinstance(df, Styler):
            self.stl = df
        else:
            stlr = self.stlr.on(df)
            widths = stlr.get_col_widths(df)
            w = sum(widths.values())
            self._w.out.layout.width = f"{w * 0.75}ch"  # type: ignore
            self.stl = stlr()

            def _f2(w):
                assert self.reflect is not None
                q: dict = cast(dict, self.reflect.query)
                if 'result' in q:
                    width = f"{math.ceil(q['result']['width'])}px"
                    if self.dcons is not None:
                        self.dcons.log(f"{width = }")
                    self._w.out.layout.width = width
                    
            def _show():
                assert self.reflect is not None
                # self._r.query = {'sn':347, 
                #     'id':f"T_{self.stl.uuid}", 'kind':'method', 'method':'getBoundingClientRect'}
                uuid = cast(FTStyler, self.stl).uuid
                fut = self.reflect.get_result({'selector':f"#T_{uuid}", 'method':'getBoundingClientRect'}, _f2)
                if self.dcons:
                    self.dcons.log(f"{fut = }")
            
            loop = asyncio.get_event_loop()
            loop.call_later(0.1, _show)
        self._w.out.append_display_data(self.stl)


In [None]:
dfv = DFVisor(df, dcons=True)


Box(children=(ReflectWidget(), DFVisor(children=(Box(children=(Output(),), layout=Layout(grid_area='center', h…

DebugWidget(children=(Box(children=(Button(description='clear', layout=Layout(width='2em'), style=ButtonStyle(…

[2m05-23 14:37:21[0m [[31m[1merror    [0m] [1mException in callback wait_while(<function <la...t 0x179324670>, <function <la...t 0x179324700>, <function <la...t 0x179324790>, 0.1, 19) at /Users/vic/dev/repo/project/vutil/vutil/async_helper.py:25
handle: <TimerHandle when=98432.58541016601 wait_while(<function <la...t 0x179324670>, <function <la...t 0x179324700>, <function <la...t 0x179324790>, 0.1, 19) at /Users/vic/dev/repo/project/vutil/vutil/async_helper.py:25>[0m [[34m[1masyncio[0m] [36mexc_info[0m=[35m(<class 'AssertionError'>, AssertionError('==:\n<table id="T_fa333">\n  <thead>\n    <tr>\n      <th class="blank level0">&nbsp;</th>\n      <th id="T_fa333_level0_col0" class="col_heading level0 col0">filename</th>\n      <th id="T_fa333_level0_col1" class="col_heading level0 col1">path</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th id="T_fa333_level0_row0" class="row_heading level0 row0">0</th>\n      <td id="T_fa333_row0_col0" class="data row0 col0">filen

In [None]:
test_eq(dfv.stlr.get_col_widths(), {'index': 4, 'filename': 11, 'path': 20})


In [None]:
dfv.dcons

DebugWidget(children=(Box(children=(Button(description='clear', layout=Layout(width='2em'), style=ButtonStyle(…

In [None]:
html = f'''<table id="T_{cast(FTStyler, dfv.stl).uuid}">
  <thead>
    <tr>
      <th class="blank level0">&nbsp;</th>
      <th id="T_0eac8_level0_col0" class="col_heading level0 col0">filename</th>
      <th id="T_0eac8_level0_col1" class="col_heading level0 col1">path</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th id="T_0eac8_level0_row0" class="row_heading level0 row0">0</th>
      <td id="T_0eac8_row0_col0" class="data row0 col0">filename_01</td>
      <td id="T_0eac8_row0_col1" class="data row0 col1">volumes/data/media/user_name/storage/folder_01/filename_01</td>
    </tr>
    <tr>
      <th id="T_0eac8_level0_row1" class="row_heading level0 row1">1</th>
      <td id="T_0eac8_row1_col0" class="data row1 col0">filename_02</td>
      <td id="T_0eac8_row1_col1" class="data row1 col1">volumes/data/media/user_name/storage/folder_02/filename_02</td>
    </tr>
  </tbody>
</table>'''



In [None]:
assert dfv.reflect is not None
dfv.reflect.query = {'sn':'0', 'selector':'table', 'kind':'html'}
dfv.dcons.log(dfv.reflect.query)
wait_while(
    lambda cnt: cast(ReflectWidget, dfv.reflect).result is None, 
    lambda cnt: dfv.dcons.log(f".{cnt}"), 
    lambda cnt: test_eq(cast(ReflectWidget, dfv.reflect).result, html if cnt > 0 else None)
)


In [None]:
def make_timeseries(start="2000-01-01", end="2000-12-31", freq="1D", seed=None):
    index = pd.date_range(start=start, end=end, freq=freq, name="timestamp")
    n = len(index)
    state = np.random.RandomState(seed)
    columns = {
        # "date": index.to_series(index=range(n)),
        "date": pd.Series(index),
        "name": state.choice(["Alice", "Bob", "Charlie"], size=n),
        "id": state.poisson(1000, size=n),
        "x": state.rand(n) * 2 - 1,
        "y": state.rand(n) * 2 - 1,
    }
    # df = pd.DataFrame(columns, index=index, columns=sorted(columns))
    df = pd.DataFrame(columns, columns=sorted(columns))
    if df.index[-1] == end:
        df = df.iloc[:-1]
    return df

timeseries = [
    make_timeseries(freq="1D", seed=i).rename(columns=lambda x: f"{x}_{i}")
    for i in range(10)
]

ts_wide = pd.concat(timeseries, axis=1)

# ts_wide.to_parquet("timeseries_wide.parquet")

In [None]:
dfv2 = DFVisor(ts_wide)
# dfv(ts_wide)

Box(children=(ReflectWidget(), DFVisor(children=(Box(children=(Output(),), layout=Layout(grid_area='center', h…

In [None]:
assert dfv2.reflect
# dfv2.reflect.query = {'sn':0, 'id':f"T_{dfv2.stl.uuid}", 'kind':'method', 'method':'getBoundingClientRect'}
dfv2.reflect.query = {'sn':0, 'selector':f"#T_{cast(FTStyler, dfv2.stl).uuid}", 'method':'getBoundingClientRect'}


In [None]:
assert dfv2.reflect
dfv2.reflect.result

{'x': 55,
 'y': 11905.5,
 'width': 3238.359375,
 'height': 7634.0546875,
 'top': 11905.5,
 'right': 3293.359375,
 'bottom': 19539.5546875,
 'left': 55}

In [None]:
ww = dfv2.stlr.get_col_widths()
test_eq(sum(ww.values()), 614)

 ----


# Colophon


In [None]:
import fastcore.all as F
if F.IN_NOTEBOOK:
    import nbdev; nbdev.nbdev_export('03_pandas_visor.ipynb')
