In [1]:
from icecap import Project
import icecap as ice
import polars as pl 

In [3]:
proj = Project() 

raw_input = proj.scan_csv("world_bank_gdp_data.csv")

proj['raw'] = raw_input 

input = proj['raw'].select(
    pl.col("Country Code").alias("Country"),
    pl.col("^[0-9]+.*$").cast(pl.Float32, strict=False).map_alias(lambda s: s[:4]))

proj['country'] = input

input = input.melt(id_vars=["Country"], variable_name="Date", value_name="GDP") #, "pivot tall"

input = input.select(
    pl.col("Country"),
    pl.col("Date").cast(pl.Int32),
    pl.col("*").exclude("Country", "Date")
) #, "cast dates to number"

input = input.drop_nulls().sort(by=["Country", "Date"]) #, "drop nulls and sort"

proj['input'] = input

proj['input'].inspect()

HBox(children=(VBox(children=(IntSlider(value=5, max=5), Output()), layout=Layout(min_width='320px')), Output(…

In [5]:
largest_economies = ["USA","CHN","DEU","JPN","IND","GBR","BRZ","FRA","CAN"]

filters = {
    ("Country",pl.Utf8): pl.col('Country').is_in(["JPN","USA"]), #largest_economies),
    ("Date", pl.Int32) : pl.col('Date').is_between(2010, 2024)
}
finalize = lambda df: df.collect().to_pandas().style \
    .format(precision=0, thousands=",", decimal=".")

proj['input'].inspect(ice.adjust(filters, finalize=finalize, head=10))

# TODO: Illustrating what adjustments are applied in the visuals.


HBox(children=(VBox(children=(IntSlider(value=5, max=5), Output()), layout=Layout(min_width='320px')), Output(…

In [6]:
calc = proj['input'].copy()

# # No, not going to support this. If you do this, just write the 'over' expr directly.
# with calc.over("Country"):
#     calc = calc.with_columns(diff = (pl.col("GDP") - pl.col("GDP").shift(1)).over("Country"))

with calc.over("Country"):
    calc['diff']     = pl.col("GDP") - pl.col("GDP").shift(1)
    calc['diff_avg'] = pl.col('diff').rolling_mean(5)  
    calc['diff_std'] = pl.col('diff').rolling_std(5)

proj['calc'] = calc 

proj['calc'].inspect(until='input')



HBox(children=(VBox(children=(IntSlider(value=4, max=4), Output()), layout=Layout(min_width='320px')), Output(…

In [None]:
# disp = proj['calc']
# min = ice.param(2015)
# max = ice.param(2020)
# disp = disp.filter(pl.col("Date").is_between(min,max))
# disp = disp.melt(["Country","Date"],[], "Field")
# disp = disp.collect().pivot("value",["Country","Field"],"Date").sort(by=["Country","Field"])
# # calc.save_to('diags_pivot', disp)
# disp.inspect(until=lambda df:df.parent_attr=="copy")

In [7]:
"""
How does this work. 
I want to be able to update a parameter and that flows through to wherever it's used. That means that 
I don't want the literal to be fixed. I want it to be post-hoc adjustable. 

I'm probably going to abandon this experiment, at least for now. 
"""

class Param:
    def __init__(self, wrapped):
        self.wrapped = wrapped

x = pl.col("Date").is_between(pl.lit(Param(2015), allow_object=True),2020)
x.meta.tree_format()
# y = pl.lit(2015).alias('y')
# x = pl.col("Date").is_between(y,2020)


    binary: &     

       |                                        |         

    binary: <=                                 binary: >=     

       |              |                         |                 |        

    lit(2020)         col(Date)                lit(Series)        col(Date)    



* Create a data frame for visualization |Done|
* Create a rule where it applies
* Augment inspect_df so that if more than one rule applies, there's a selector for choosing. 
* Augment (1) so that it an take parameters and inspect_df knows how to show them. 

```
class Viewer:
    applies = {
        'dates_on_cols': lambda df: 'Date' in df.schema,
        'last_date': lambda df: 'Date' in df.schema,
        'specific_date': lambda df: 'Date' in df.schema
    }
    def dates_on_cols(df):

```

In [None]:
min_, max_ = 2015, 2020
disp = proj['calc']
disp = disp.filter(pl.col("Date").is_between(min_, max_))
disp = disp.melt(["Country","Date"],[], "Field")
disp = disp.collect().pivot("value",["Country","Field"],"Date").sort(by=["Country","Field"])
disp.inspect(until='calc')

In [None]:
# viewer.apply('dates_on_cols', proj['calc'], 2017, 2022, head=5)

In [None]:
def dates_on_cols(df, min_, max_, head=5):
    disp = df
    disp = disp.filter(pl.col("Date").is_between(min_, max_))
    disp = disp.melt(["Country","Date"],[], "Field")
    disp = disp.collect().pivot("value",["Country","Field"],"Date").sort(by=["Country","Field"])
    if head:
        disp = disp.head(head)
    return disp
# dates_on_cols(proj['calc'], 2015, 2020).inspect(until="calc")



In [None]:
def last(df, head=5):
    return (df.groupby("Country", maintain_order=True).last()).head(head).collect()

In [None]:
def on_date(df, dte:int, head=5):
    return df.collect().filter(pl.col("Date") == dte).head(head)

#disp3.inspect(until='calc')
# calc.add_to("diags", disp3)

In [None]:
class ViewerRule:
    def __init__(self, view, applies_to):
        self.view, self.applies_to = view, applies_to

def raw(df):
    return df

class Viewer:
    def __init__(self, lf_to_df=lambda lf: lf.collect()):
        raw.name = 'raw'
        self.views = {
            'raw': ViewerRule(raw, lambda x:True)
        }
        self.lf_to_df = lf_to_df

    def add(self, func, applies_to, name=None):
        assert callable(func)
        assert callable(applies_to)
        name = name if name else func.__name__
        func.name = name
        self.views[name] = ViewerRule(func,applies_to)

    def applicable_views(self, df):
        return [rule.view for rule in self.views.values() if rule.applies_to(df)]
    
    def applicable_names(self, df):
        return [view.name for view in self.applicable_views(df)]

    def rule(self, name):
        return self.views[name]

    def apply(self, name, df, *args, **kwargs):
        df = df.collect() if isinstance(df.wrapped, pl.LazyFrame) else df
        return self.views[name].view(df, *args, **kwargs)

In [None]:
from ipywidgets import interactive, interact 
import ipywidgets as widgets 
from IPython.display import display, HTML, clear_output
from functools import wraps

clear_output(wait=True)

min_ = 2017
max_ = 2022
head = 5
on   = 2020

int_layout = widgets.Layout(width='20ch')
params = {
    'dates_on_cols': dict(min_=widgets.IntText(min_, layout=int_layout), max_=widgets.IntText(max_, layout=int_layout), head=widgets.IntText(head, layout=int_layout)),
    'last': dict(head=head),
    'on_date': dict(dte=widgets.IntText(on), head=head)
}

viewer = Viewer()
viewer.add(dates_on_cols, applies_to=lambda df: 'Date' in df.schema)
viewer.add(last, applies_to=lambda df: 'Date' in df.schema)
viewer.add(on_date, applies_to=lambda df: 'Date' in df.schema)

my_df = proj['calc']
selector = widgets.Dropdown(options=viewer.applicable_names(my_df))
df_out   = widgets.Output()
layout = widgets.VBox([selector, df_out])

def wrapper(df, func):
    def inner(*args, **kwargs):
        obj = func(df, *args, **kwargs)
        display(obj)
    return inner

def update_output(event):
    name = event['new']
    df_out.clear_output(wait=True)
    widget = interactive(wrapper(my_df, viewer.rule(name).view), **params.get(name, {}))
    with df_out:
        display(widget)

selector.observe(update_output, 'value')

update_output({'new':selector.value})

layout

# TODOs

## Active

1. save_to and add_to 
2. add diags to inspect
3. parameterized diags

## Backlog

1. 'until' on traverse
2. track graphs (joins)
3. Diagnostics for Period Series like tables. 
4. Table formats