In [15]:
!pip install --quiet transformers gradio matplotlib torch

[0m

In [16]:
import tensorflow_datasets as tfds
import random
import matplotlib.pyplot as plt
from transformers import pipeline


ds = tfds.load("imdb_reviews", split="test", as_supervised=True)


data = []
for text, label in ds:
    lbl = int(label.numpy())
    txt = text.numpy().decode("utf-8")
    data.append({"text": txt, "label": lbl})
    if len([d for d in data if d["label"]==1]) >= 100 and len([d for d in data if d["label"]==0]) >= 100:
        break


pos = [d for d in data if d["label"]==1][:100]
neg = [d for d in data if d["label"]==0][:100]
samples = pos + neg
random.shuffle(samples)

In [17]:

cot_tpl = (
    "Review:\n{text}\n"
    "Let's think step by step whether this is Positive or Negative.\nAnswer:"
)


examples = samples[:3]
few_shot_header = "\n\n".join(
    f"Review:\n{ex['text']}\nLabel: {'Positive' if ex['label']==1 else 'Negative'}"
    for ex in examples
)

templates = {
    "Direct":       "Review:\n{text}\nSentiment?",
    "Few-Shot":     few_shot_header + "\n\nReview:\n{text}\nLabel:",
    "Chain-of-Thought": cot_tpl
}

In [18]:

classifier = pipeline(
    "sentiment-analysis",
    model="nlptown/bert-base-multilingual-uncased-sentiment",
    truncation=True
)

Device set to use cuda:0


In [19]:
def predict_label(prompt: str):
    out = classifier(prompt)[0]
    stars = int(out["label"].split()[0])
    return ("Positive" if stars >= 4 else "Negative"), out

def run_style(style: str, example):
    prompt = templates[style].format(text=example["text"])
    pred, raw = predict_label(prompt)
    return pred

In [20]:
from sklearn.metrics import accuracy_score

results = {}
for style in templates:
    preds = [run_style(style, ex) for ex in samples]
    truths = ["Positive" if ex["label"]==1 else "Negative" for ex in samples]
    results[style] = accuracy_score(truths, preds)

# Display
for style, acc in results.items():
    print(f"{style:20s}: {acc*100:.1f}%")

Direct              : 70.0%
Few-Shot            : 50.0%
Chain-of-Thought    : 73.0%


In [21]:
from sklearn.metrics import accuracy_score

results = {}
for style in templates:
    preds = [run_style(style, ex) for ex in samples]
    truths = ["Positive" if ex["label"]==1 else "Negative" for ex in samples]
    results[style] = accuracy_score(truths, preds)

# Display
for style, acc in results.items():
    print(f"{style:20s}: {acc*100:.1f}%")

Direct              : 70.0%
Few-Shot            : 50.0%
Chain-of-Thought    : 73.0%


In [24]:


import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from captum.attr import IntegratedGradients
import matplotlib.pyplot as plt
import html


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m32.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m107.3 MB/s[0m eta [36m0:00:00[0m
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.26.4 which is incompatible.[0m[31m
[0m

In [33]:


model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model     = AutoModelForSequenceClassification.from_pretrained(
    model_name, output_attentions=True
)


In [34]:


def forward_fn(input_ids, attention_mask=None):
    return model(input_ids=input_ids, attention_mask=attention_mask).logits

ig = IntegratedGradients(forward_fn)


def make_prob_figure(logits):
    probs  = torch.softmax(logits, dim=1)[0].cpu().numpy()
    labels = [f"{i+1}★" for i in range(len(probs))]
    fig, ax = plt.subplots(figsize=(4,2.5))
    ax.bar(labels, probs)
    ax.set_ylim(0,1)
    ax.set_title("Confidence per Rating")
    return fig


In [35]:


def make_attn_heatmap(attns, inputs):
    attn   = attns[0][0][0].cpu().numpy()  # layer1/head1
    tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
    fig, ax = plt.subplots(figsize=(4,4))
    im = ax.imshow(attn, aspect='auto')
    ax.set_xticks(range(len(tokens)))
    ax.set_xticklabels(tokens, rotation=90, fontsize=6)
    ax.set_yticks(range(len(tokens)))
    ax.set_yticklabels(tokens, fontsize=6)
    ax.set_title("Layer 1 Head 1 Attention")
    return fig


In [36]:


def explain_with_ig(text):
    enc = tokenizer(text, return_tensors="pt", truncation=True)
    input_ids = enc["input_ids"]
    mask      = enc.get("attention_mask")

    logits    = model(**enc).logits
    pred_idx  = torch.argmax(logits, dim=1).item()

    atts, _   = ig.attribute(
        inputs=input_ids,
        additional_forward_args=(mask,),
        target=pred_idx,
        return_convergence_delta=False
    )
    scores    = atts.sum(dim=-1).squeeze(0).cpu().numpy()

    norm      = (scores - scores.min()) / (scores.max() - scores.min() + 1e-8)
    tokens    = tokenizer.convert_ids_to_tokens(input_ids.squeeze().tolist())
    spans     = []
    for tok, sc in zip(tokens, norm):
        color = f"rgba(255,0,0,{sc:.2f})"
        spans.append(f"<span style='background:{color}'>{html.escape(tok)}</span>")
    return " ".join(spans)


In [46]:
from captum.attr import IntegratedGradients


def forward_emb(inputs_embeds, attention_mask):

    outputs = model(
        inputs_embeds=inputs_embeds,
        attention_mask=attention_mask
    )
    return outputs.logits


ig = IntegratedGradients(forward_emb)

def explain_with_ig(text):
    enc = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding="max_length",
        max_length=128
    )
    input_ids      = enc["input_ids"]
    attention_mask = enc["attention_mask"]


    embed_layer   = model.get_input_embeddings()
    embeddings    = embed_layer(input_ids)
    baseline_embs = torch.zeros_like(embeddings)


    with torch.no_grad():
        logits = model(inputs_embeds=embeddings, attention_mask=attention_mask).logits
    pred_idx = torch.argmax(logits, dim=1).item()


    attributions, delta = ig.attribute(
        inputs=embeddings,
        baselines=baseline_embs,
        additional_forward_args=(attention_mask,),
        target=pred_idx,
        return_convergence_delta=True
    )


    scores = attributions.sum(dim=-1).squeeze(0).detach().cpu().numpy()


    norm   = (scores - scores.min()) / (scores.max() - scores.min() + 1e-8)
    tokens = tokenizer.convert_ids_to_tokens(input_ids.squeeze().tolist())
    spans  = []
    for tok, sc in zip(tokens, norm):
        color = f"rgba(255,0,0,{sc:.2f})"
        spans.append(f"<span style='background:{color}'>{tok}</span>")

    return " ".join(spans)

In [39]:

gr_examples = [
    ["I absolutely loved this movie—brilliant acting and a great story!", "Direct"],
    ["What a waste of time. The plot was dull and the characters were uninteresting.", "Direct"],
    ["I wouldn’t say it was bad.", "Few-Shot"],
    ["The cinematography was breathtaking, but the story felt painfully predictable.", "Chain-of-Thought"],
    ["Oh great, another “groundbreaking” sequel—just what we needed.", "Chain-of-Thought"],
    ["This movie is so atrocious, it became my guilty-pleasure highlight of the year.", "Few-Shot"],
    ["I laughed, I cried, and I frankly questioned my life choices afterward.", "Chain-of-Thought"],
]

In [None]:


demo = gr.Interface(
    fn=explain_predict_with_ig,
    inputs=[
        gr.Textbox(lines=5, label="Movie Review"),
        gr.Dropdown(choices=list(templates.keys()), label="Prompt Style"),
    ],
    outputs=[
        gr.Textbox(label="Predicted Label"),
        gr.JSON(label="Raw Model Output"),
        gr.Plot(label="Probability Distribution"),
        gr.Plot(label="Attention Heatmap"),
        gr.HTML(label="Token Attributions"),
    ],
    title="Explainable Sentiment Analysis Playground",
    description="Label + Confidence + Attention + Integrated Gradients",
    examples=gr_examples,
)

demo.launch(share=True, debug=True)


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://b88a6f792716bde205.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
