In [None]:

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))


In [3]:
!pip install -q gradio

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.9/46.9 MB[0m [31m27.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.2/322.2 kB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m77.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [4]:
import gradio as gr
print(gr.__version__)

5.24.0


In [None]:
import gradio as gr
from transformers import pipeline

# Load the sentiment analysis model
sentiment_pipeline = pipeline("sentiment-analysis")

# Prediction function
def classify_sentiment(text):
    result = sentiment_pipeline(text)[0]
    label = result['label']
    score = result['score']
    return f"Prediction: {label} (Confidence: {score:.2f})"

# Create Gradio Interface
interface = gr.Interface(
    fn=classify_sentiment,
    inputs=gr.Textbox(lines=3, placeholder="Enter a tricky sentence to fool the model..."),
    outputs="text",
    title="Sentiment Classifier Challenge",
    description="Try to fool this Hugging Face sentiment model using sarcasm, ambiguity, or emotional tricks!"
)

# Launch the app with public sharing
interface.launch(share=True)

In [None]:
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import torch
import numpy as np

# Load model + tokenizer
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

# Attribution function
def interpret(text):
    # Tokenize
    inputs = tokenizer(text, return_tensors="pt", truncation=True)
    inputs.requires_grad_ = True

    # Forward pass
    outputs = model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    pred_label = torch.argmax(probs, dim=-1).item()
    confidence = probs[0][pred_label].item()

    # Backward pass
    outputs.logits[0][pred_label].backward()

    # Get gradients
    grads = inputs.input_ids.grad if hasattr(inputs.input_ids, "grad") else None
    if grads is None:
        grads = model.base_model.embeddings.word_embeddings.weight.grad
    grads = inputs.input_ids.grad if inputs.input_ids.grad is not None else torch.zeros_like(inputs.input_ids)

    # Get token importance via input gradients × input embeddings
    input_embeds = model.base_model.embeddings.word_embeddings(inputs.input_ids)
    grads = input_embeds.grad if input_embeds.grad is not None else torch.zeros_like(input_embeds)
    importances = grads.abs().sum(dim=-1).squeeze().detach().numpy()

    # Normalize importances
    importances = importances / (importances.max() + 1e-9)

    # Decode tokens and zip with scores
    tokens = tokenizer.convert_ids_to_tokens(inputs.input_ids[0])
    words = []
    word_scores = []
    for token, score in zip(tokens, importances):
        if token.startswith("##") and words:
            words[-1] += token[2:]
            word_scores[-1] = max(word_scores[-1], score)  # merge token scores
        else:
            words.append(token)
            word_scores.append(score)

    result = list(zip(words, word_scores))
    return result, f"{classifier(text)[0]['label']} (Confidence: {confidence:.2f})"

# Gradio app
demo = gr.Interface(
    fn=interpret,
    inputs=gr.Textbox(lines=3, placeholder="Try something sarcastic or tricky..."),
    outputs=[
        gr.HighlightedText(label="Token Attribution"),
        gr.Text(label="Prediction")
    ],
    title="🧠 Sentiment Classifier + Word Attribution",
    description="Try to trick the model, and see how much each word contributes to the prediction.",
)

demo.launch(share=True)


In [5]:
import torch
import numpy as np
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch.nn.functional as F

# Load model & tokenizer
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
model.eval()

labels = model.config.id2label  # {0: 'NEGATIVE', 1: 'POSITIVE'}

def interpret_and_score(text):
    # Tokenize and get embeddings
    inputs = tokenizer(text, return_tensors="pt", truncation=True)
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

    # Hook to store gradients
    embeddings = model.distilbert.embeddings.word_embeddings(input_ids)
    embeddings.retain_grad()

    def forward_hook(module, input, output):
        output.retain_grad()

    handle = model.distilbert.embeddings.register_forward_hook(forward_hook)

    outputs = model(inputs_embeds=embeddings, attention_mask=attention_mask)
    logits = outputs.logits.squeeze()
    probs = F.softmax(logits, dim=-1).detach().numpy()
    pred_idx = torch.argmax(logits).item()
    pred_label = labels[pred_idx]
    confidence = probs[pred_idx]

    # Backward pass
    model.zero_grad()
    logits[pred_idx].backward()

    grads = embeddings.grad[0]  # shape: [seq_len, hidden_dim]
    token_importance = grads.abs().sum(dim=1)  # shape: [seq_len]
    token_importance = token_importance / (token_importance.max() + 1e-10)
    token_importance = token_importance.detach().numpy()

    # Convert tokens to words
    tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
    words = []
    scores = []

    for token, score in zip(tokens, token_importance):
        if token.startswith("##") and words:
            words[-1] += token[2:]
            scores[-1] = max(scores[-1], score)
        else:
            words.append(token)
            scores.append(score)

    highlighted = list(zip(words, scores))

    # Detailed logit/prob table
    class_scores = "\n".join([
        f"{labels[i]}: Logit = {logits[i]:.3f}, Probability = {probs[i]*100:.2f}%"
        for i in range(len(labels))
    ])

    handle.remove()
    return highlighted, f"{pred_label} (Confidence: {confidence:.2f})", class_scores

# Gradio Interface
demo = gr.Interface(
    fn=interpret_and_score,
    inputs=gr.Textbox(lines=3, placeholder="Try something sarcastic..."),
    outputs=[
        gr.HighlightedText(label="🧠 Word Attribution"),
        gr.Text(label="🎯 Prediction"),
        gr.Text(label="📊 Logits & Probabilities")
    ],
    title="🧪 Sentiment Classifier + Word Impact + Class Scores",
    description="Enter a sentence. See prediction, word contributions, and detailed logits/probabilities.",
)

demo.launch(share=True)


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://4ecdc4e209aa7ceb12.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


