In [2]:
# !pip install persist_ext==1.5.1

## PersIst: Persistent Interactions in Computational Notebooks

At Visualization Design Lab (VDL) we are working on a JupyterLab extension called `PersIst` which allows us to persist and replay our interactions done in notebook by tracking the provenance of the interactions done. It supports tracking interactions on Vega-Altair Charts as well as an interactive data table to manipulate data.

The provenance tracking in PersIst is powered by another project from VDL called [Trrack](https://apps.vdl.sci.utah.edu/trrack) which allows us to track interaction history and reproduce them later.

After transforming the data using interactions, we can directly create new Python variables to use the resulting dataframe in our code.

All the interactions and created variables are saved. PersIst goes beyond Vega-Altair and supports multiple interactions like __filtering__, __annotating__, __assigning category__, __rename & drop columns__, etc. 

Let's look at a quick demo

In [3]:
import persist_ext as PR
from vega_datasets import data
import altair as alt

In [4]:
iris = data.iris()
iris

Unnamed: 0,sepalLength,sepalWidth,petalLength,petalWidth,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [9]:
scatterplot = PR.vis.scatterplot(iris, x="sepalLength:Q", y="sepalWidth:Q", color="species", height=300, width=300)
scatterplot

OutputWithTrrackWidget(body_widget=VegaLiteChartWidget(debounce_wait=250.0, df_columns=['sepalLength', 'sepalW…

In [23]:
processed_df_dyn = PR.df.get("processed_df_dyn")
processed_df_dyn

Unnamed: 0,sepal_length,sepalWidth,petalLength,petalWidth,species,__annotations
0,4.9,3.0,1.4,0.2,setosa,23-10-23 (13:36) - Interesting
1,5.4,3.9,1.7,0.4,setosa,No Annotation
2,4.4,2.9,1.4,0.2,setosa,23-10-23 (13:36) - Interesting
3,5.4,3.7,1.5,0.2,setosa,No Annotation
4,4.8,3.0,1.4,0.1,setosa,23-10-23 (13:36) - Interesting
...,...,...,...,...,...,...
113,6.7,3.0,5.2,2.3,virginica,No Annotation
114,6.3,2.5,5.0,1.9,virginica,No Annotation
115,6.5,3.0,5.2,2.0,virginica,No Annotation
116,6.2,3.4,5.4,2.3,virginica,23-10-23 (13:36) - Interesting


In [21]:
filtered_dataset = PR.df.get("filtered_dataset")
filtered_dataset

Unnamed: 0,sepalLength,sepalWidth,petalLength,petalWidth,species,__annotations
0,4.9,3.0,1.4,0.2,setosa,No Annotation
1,5.4,3.9,1.7,0.4,setosa,No Annotation
2,4.4,2.9,1.4,0.2,setosa,No Annotation
3,5.4,3.7,1.5,0.2,setosa,No Annotation
4,4.8,3.0,1.4,0.1,setosa,No Annotation
...,...,...,...,...,...,...
113,6.7,3.0,5.2,2.3,virginica,No Annotation
114,6.3,2.5,5.0,1.9,virginica,No Annotation
115,6.5,3.0,5.2,2.0,virginica,No Annotation
116,6.2,3.4,5.4,2.3,virginica,No Annotation


In [30]:
cars = data.cars()

selection = alt.selection_interval(name="selection", encodings=["x"])

chart = alt.Chart(cars).mark_bar().encode(
    x="utcyear(Year):O",
    y="count()",
    color=alt.condition(selection, alt.value("steelblue"), alt.value("gray"))
).add_params(
    selection
)

PR.vis.TrrackableChart(chart)

OutputWithTrrackWidget(body_widget=VegaLiteChartWidget(debounce_wait=250.0, df_columns=['Name', 'Miles_per_Gal…

In [32]:
era_df = PR.df.get("era_df")
era_df

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin,__annotations,Era
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA,No Annotation,Early 70s
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA,No Annotation,Early 70s
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA,No Annotation,Early 70s
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,1970-01-01,USA,No Annotation,Early 70s
4,ford torino,17.0,8,302.0,140.0,3449,10.5,1970-01-01,USA,No Annotation,Early 70s
...,...,...,...,...,...,...,...,...,...,...,...
401,ford mustang gl,27.0,4,140.0,86.0,2790,15.6,1982-01-01,USA,No Annotation,80s
402,vw pickup,44.0,4,97.0,52.0,2130,24.6,1982-01-01,Europe,No Annotation,80s
403,dodge rampage,32.0,4,135.0,84.0,2295,11.6,1982-01-01,USA,No Annotation,80s
404,ford ranger,28.0,4,120.0,79.0,2625,18.6,1982-01-01,USA,No Annotation,80s


In [31]:
categorized_df_grouped = PR.df.get("categorized_df_grouped", groupby="Era", aggregate={"Miles_per_Gallon": "mean", "Cylinders": "mean", "Displacement": "mean", "Horsepower": "mean", "Weight_in_lbs": "mean", "Acceleration": "mean"})
categorized_df_grouped

Unnamed: 0_level_0,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration
Era,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
80s,31.908989,4.311111,126.655556,80.406977,2474.311111,16.613333
Early 70s,18.528,6.19697,245.450758,127.725191,3285.704545,14.280303
Late 70s,24.176344,5.537634,190.903226,101.784946,2962.967742,15.696774
Mid-70s,21.478022,5.516484,192.615385,99.111111,3051.483516,16.054945


In [33]:
PR.vis.interactive_table(cars)

OutputWithTrrackWidget(body_widget=InteractiveTableWidget(df_columns=['Name', 'Miles_per_Gallon', 'Cylinders',…

In [38]:
data_table_df_dyn = PR.df.get("data_table_df_dyn")
data_table_df_dyn

Unnamed: 0,Name,mpg,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin,__annotations
19,buick estate wagon (sw),14.0,8,455.0,225.0,3086,10.0,1970-01-01,USA,No Annotation
8,pontiac catalina,14.0,8,455.0,225.0,4425,10.0,1970-01-01,USA,No Annotation
102,buick electra 225 custom,12.0,8,455.0,225.0,4951,11.0,1973-01-01,USA,No Annotation
6,chevrolet impala,14.0,8,454.0,220.0,4354,9.0,1970-01-01,USA,No Annotation
101,chrysler new yorker brougham,13.0,8,440.0,215.0,4735,11.0,1973-01-01,USA,No Annotation
...,...,...,...,...,...,...,...,...,...,...
60,toyota corolla 1200,31.0,4,71.0,65.0,1773,19.0,1971-01-01,Japan,No Annotation
118,maxda rx3,18.0,3,70.0,90.0,2124,13.5,1973-01-01,Japan,No Annotation
78,mazda rx2 coupe,19.0,3,70.0,97.0,2330,13.5,1972-01-01,Japan,No Annotation
341,mazda rx-7 gs,23.7,3,70.0,100.0,2420,12.5,1980-01-01,Japan,No Annotation


In [None]:
h