# Prompt-based text summarization (QA) trial 1

Can long-t5 perform GPT-like text extraction? 

In [1]:
from datasets import load_dataset
from backend import t5

dataset = load_dataset("ccdv/pubmed-summarization")
dataset = dataset["train"][4]
text = dataset["article"]
question = "What are the symptoms of syncope?"


No config specified, defaulting to: pubmed-summarization/section
Found cached dataset pubmed-summarization (/root/.cache/huggingface/datasets/ccdv___pubmed-summarization/section/1.0.0/f765ec606c790e8c5694b226814a13f1974ba4ea98280989edaffb152ded5e2b)


  0%|          | 0/3 [00:00<?, ?it/s]

In [2]:
TEMPLATE = (
    f"Based on the following context answer this question: {question} Context: {text}"
)
t5(TEMPLATE.format(question=question, article=text))




'<pad> cough syncope, a rare form of syncope, may be a result of transient failure of the cerebral autoregulatory mechanism to cope with sudden decrease in cerebral blood flow. this case highlights the fact that cough syncope, a rare form of syncope, may be associated with intracranial mass lesions that indirectly exaggerate the increase in icp in response to cough.</s>'

### Notes for further development

- In order to Get a model that is extremely well in a single task, we can fine-tune long-t5 by fine tuning
    - distilling from the data generated from GPT-4? (Stanford's alpaca approach)
    - fine-tune on open datasets focused on long-text QA 

## Visualize saliency in T5

In [3]:
from transformers import AutoTokenizer, LongT5ForConditionalGeneration
from captum.attr import Saliency
from captum.attr import visualization as viz

model_name = "google/long-t5-tglobal-base"
# Tokenize input sentences
tokenizer = AutoTokenizer.from_pretrained(model_name)
inputs_dict = tokenizer(
    f"answer_me: {question} context: {text}",
    max_length=16384,
    padding="max_length",
    truncation=True,
    return_tensors="pt",
)

# Generate summary
model = LongT5ForConditionalGeneration.from_pretrained(model_name)
y = model.generate(
    inputs_dict.input_ids, attention_mask=inputs_dict.attention_mask, max_length=512
)



In [4]:
model.eval()
model.zero_grad()

In [5]:
y = model(**inputs_dict)

: 

: 

In [1]:
saliency = Saliency(model)

NameError: name 'Saliency' is not defined

In [None]:
saliency.attribute(inputs_dict.input_ids, target=0)

In [None]:
y = model(inputs_dict.input_ids, attention_mask=inputs_dict.attention_mask)

In [None]:
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer
from captum.attr import Saliency
from captum.attr import visualization as viz

# Load the T5 model and tokenizer
model_name = "google/long-t5-tglobal-base"
model = T5ForConditionalGeneration.from_pretrained(model_name)
tokenizer = T5Tokenizer.from_pretrained(model_name)

# Define the context and the question
context = "The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France. It is named after the engineer Gustave Eiffel, whose company designed and built the tower."
question = "Who designed the Eiffel Tower?"

# Prepare the input for the T5 model
input_text = f"answer_me: {question} context: {context}"
input_tokens = tokenizer.encode(input_text, return_tensors="pt")

# Ensure the model is in evaluation mode and requires gradient
model.eval()
model.zero_grad()
input_tokens.requires_grad = True

# Forward pass
logits = model(input_tokens).logits

# Calculate the loss for the answer token (the first token of the output)
loss = logits[0, 0, tokenizer.encode(question)[0]].sum()
loss.backward()

# Use Captum's Saliency algorithm for attributions
saliency = Saliency(model)
attributions = saliency.attribute(input_tokens)

# Visualize the attributions
tokens = tokenizer.convert_ids_to_tokens(input_tokens[0])
viz.visualize_text(
    [
        viz.VisualizationDataRecord(
            attributions[0],
            torch.tensor(0.0),  # Dummy score for visualization purposes
            torch.sum(attributions),
            tokens,
            None,
        )
    ]
)
