## Introduction
This notebook runs all the processing steps one by one for several models and renders the output. Each section is individually runnable after a kernel restart 

## Observations
* Symbolic tracing did not play well with any BERT model, because it creates proxies for mutually exclusive inputs to e.g. `DistilBertModel.forward`
  * This was fixed by making the `concrete_args` input to `fx.symbolic_trace` available to the `MAV` and `MavTracer` objects
  * For BERT models, `concrete_args={'inputs_embeds':None}` gets around this issue
* Still, most NLP models use proxy variables for control flow, which is not supported by `torch.fx`
  * Perhaps fixing more arguments via `concrete_args` could work around this. To be investigated.

## DistilBERT

In [1]:
import sys
sys.path.append('..')
from transformers import DistilBertModel, DistilBertTokenizer
import torch
from idlmav import MAV, plotly_renderer

model = DistilBertModel.from_pretrained("distilbert-base-uncased")
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
model.eval()
inputs = tokenizer("Hello world", return_tensors="pt")
device = 'cpu'

mav = MAV(model, inputs, concrete_args={'inputs_embeds':None})
with plotly_renderer('notebook_connected'): mav.show_figure()

TraceError: symbolically traced variables cannot be used as inputs to control flow


## T5-small encoder

In [2]:
import sys
sys.path.append('..')
from transformers import T5Model, T5Tokenizer
import torch
from idlmav import MAV, plotly_renderer

model = T5Model.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model.eval()
inputs = tokenizer("translate English to French: Hello, how are you?", return_tensors="pt")
device = 'cpu'

mav = MAV(model.encoder, inputs, device=device, concrete_args={'inputs_embeds':None})
with plotly_renderer('notebook_connected'): mav.show_figure()

TypeError: finfo(): argument 'type' (position 1) must be torch.dtype, not Attribute

## BERT mini

In [1]:
import sys
sys.path.append('..')
from transformers import BertModel, BertTokenizer
import torch
from idlmav import MAV, plotly_renderer

model = BertModel.from_pretrained("google/bert_uncased_L-2_H-128_A-2")
tokenizer = BertTokenizer.from_pretrained("google/bert_uncased_L-2_H-128_A-2")
model.eval()
inputs = tokenizer("This is a test sentence.", return_tensors="pt")
device = 'cpu'

mav = MAV(model, inputs, concrete_args={'inputs_embeds':None})
with plotly_renderer('notebook_connected'): mav.show_figure()

TraceError: symbolically traced variables cannot be used as inputs to control flow

In [6]:
mav.show_widget(add_overview=True)

HBox(children=(Box(children=(FloatRangeSlider(value=(-9.5, 0.5), layout=Layout(height='400px'), max=0.5, min=-…

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## ALBERT Lite

In [1]:
import sys
sys.path.append('..')
from transformers import AlbertModel, AlbertTokenizer
import torch
from idlmav import MAV, plotly_renderer

model = AlbertModel.from_pretrained("albert-base-v2")
tokenizer = AlbertTokenizer.from_pretrained("albert-base-v2")
model.eval()
inputs = tokenizer("The quick brown fox jumps over the lazy dog.", return_tensors="pt")
device = 'cpu'

mav = MAV(model, inputs, concrete_args={'inputs_embeds':None})
with plotly_renderer('notebook_connected'): mav.show_figure()

TraceError: symbolically traced variables cannot be used as inputs to control flow

## ModernBERT

In [1]:
import sys
sys.path.append('..')
from transformers import AutoTokenizer, AutoModelForMaskedLM
import torch
from idlmav import MAV, plotly_renderer

model_id = "answerdotai/ModernBERT-base"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForMaskedLM.from_pretrained(model_id)
inputs = tokenizer("The capital of France is [MASK].", return_tensors="pt")

mav = MAV(model, inputs, concrete_args={'inputs_embeds':None})
with plotly_renderer('notebook_connected'): mav.show_figure()

TraceError: symbolically traced variables cannot be used as inputs to control flow