In [None]:
!pip install --upgrade datasets huggingface_hub


In [None]:
pip install transformers accelerate datasets

In [None]:
from datasets import load_dataset
multinews = load_dataset("multi_news", split="test")

In [None]:
multinews.to_pandas()


In [None]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained('t5-small')

In [None]:
multi_news = multinews.train_test_split(test_size=0.2)

In [None]:
prefix = "summarize: "

def process_function(examples):
    inputs = [prefix + doc for doc in examples['document']]
    model_inputs = tokenizer(inputs, max_length=1024, truncation=True)
    labels = tokenizer(text = examples['summary'], max_length=128, truncation=True)
    model_inputs['labels'] = labels['input_ids']

    return model_inputs


In [None]:
tokenizer_multi_news = multi_news.map(process_function, batched=True)

In [None]:
from transformers import DataCollatorForSeq2Seq, AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer
data_collator = DataCollatorForSeq2Seq(tokenizer= tokenizer, model='t5-small')
model = AutoModelForSeq2SeqLM.from_pretrained('t5-small')

In [None]:
trainings_args = Seq2SeqTrainingArguments(
    output_dir="./results",
    do_eval=True,
    learning_rate=2e-5,
    per_device_train_batch_size=10,
    per_device_eval_batch_size=10,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=10,
    fp16=True
)


In [None]:
trainer  = Seq2SeqTrainer(
    model=model,
    args=trainings_args,
    data_collator=data_collator,
    train_dataset=tokenizer_multi_news['train'],
    eval_dataset=tokenizer_multi_news['test'],
)

In [None]:
trainer.train()

In [None]:
text = '''In 1954, U.S. Marshal Edward "Teddy" Daniels and his new partner Chuck Aule travel to Ashecliffe Hospital for the criminally insane on the inhospitable Shutter Island, Boston Harbor, to investigate the disappearance of Rachel Solando, a patient of the hospital who had previously drowned her three children.

The staff, led by psychiatrist Dr. John Cawley and his colleague Dr. Jerimiah Naehring, appear uncooperative. The marshals learn that Dr. Lester Sheehan, who was treating Solando, had left the island on vacation immediately after Solando disappeared. Teddy experiences migraine headaches, flashbacks of his experiences as a U.S. Army soldier during the liberation of Dachau, and also vivid dreams of his wife Dolores, who was killed in a fire set by arsonist Andrew Laeddis. Teddy explains to Chuck that he took the case to find Laeddis, believing he is on the island. Solando suddenly resurfaces and believes Teddy is her husband. Teddy later breaks into the restricted Ward C to find Laeddis, where he meets patient George Noyce who appears to know Teddy. He tells Teddy that the doctors experiment on patients and some are taken to a lighthouse to be lobotomized. He warns Teddy that everyone is deceiving him and tells him not to trust Chuck.

Teddy regroups with Chuck and they climb the cliffs toward the lighthouse but become separated. Believing he saw Chuck's body on the rocks below, Teddy climbs down but finds only a cave where a woman claiming to be the real Solando is hiding. She states that she is a former psychiatrist who discovered clandestine experiments to develop mind control but was forcibly committed. She says that Cawley and Dr. Naehring will use Teddy's war trauma to feign a psychotic break, allowing them to have him also committed. Teddy returns to the hospital and is greeted by Cawley. When Teddy asks about Chuck's whereabouts, Cawley insists that Teddy does not have a partner and that he arrived on the island alone.

Convinced Chuck was taken to the lighthouse, Teddy heads there but runs into Naehring, who attempts to sedate him. Teddy overpowers him and breaks into the lighthouse, only to discover Cawley waiting for him. Teddy confronts Cawley and reveals his encounter with Solando, saying he believes Cawley is experimenting on him. Cawley denies that Solando ever existed, and insists that Teddy has not been drugged, explaining the tremors as withdrawals from chlorpromazine, a neuroleptic medication that Teddy has been taking for two years. Chuck arrives and reveals he is, in fact, Dr. Sheehan. Cawley explains that "Teddy" is Andrew Laeddis, a U.S. Marshal incarcerated at Ashecliffe for murdering his manic depressive wife after she drowned their three children. Andrew did not seek treatment for Dolores when she burned down their apartment and instead moved his family to a lake house, where Dolores carried out the killings. Cawley explains that Andrew's delusion is a result of his guilt, that his migraines and hallucinations are withdrawal symptoms, and that he had created the alternate persona of Edward Daniels,[a] also a Marshal, who acted violently and espoused conspiracy theories about the facility. The "investigation" is an elaborate role-play to regain his true persona. Overwhelmed by his sudden recall, Andrew faints.

Awakening later, Andrew calmly recounts the truth, satisfying the doctors that he is lucid. Cawley notes that they had achieved this state nine months before, but that Andrew had quickly regressed. He warns that this will be Andrew's last chance and if he lapses again he will be lobotomized due to his very violent conduct towards other patients such as Noyce, and towards the guards. Sometime later, Andrew relaxes on the hospital grounds with Sheehan. Appearing delusional, Andrew again refers to Sheehan as "Chuck" and says they must leave the island. Sheehan signals to Cawley, who orders that Andrew be lobotomized. Andrew then asks Sheehan if it would be worse "to live as a monster, or to die as a good man". A stunned Sheehan calls Andrew "Teddy" but the latter does not respond and leaves peacefully with the orderlies for his operation.'''

In [None]:
input_ids = tokenizer(text, max_length=1024,truncation = True, return_tensors= 'pt').input_ids
input_ids = input_ids.to('cuda')

In [None]:
import torch
with torch.no_grad():
  if model.device.type == 'cuda':
    output = model.generate(input_ids, max_length = 128, num_beams =5)

summary_ids = output[0].tolist()

summary = tokenizer.decode(summary_ids, skip_special_tokens = True)
print(summary)

In [None]:
ref_summary = '''In 1954, U.S. Marshal Edward "Teddy" Daniels and his partner Chuck Aule investigate Rachel Solando's disappearance at Ashecliffe Hospital on Shutter Island. They encounter Dr. John Cawley and Dr. Jerimiah Naehring, who seem uncooperative. Teddy experiences migraine headaches, flashbacks, and vivid dreams of his wife Dolores, who was killed by arsonist Andrew Laeddis. He meets George Noyce, who warns him not to trust Chuck. Teddy overpowers Naehring and breaks into the lighthouse, where he discovers Dr. Sheehan is Andrew Laeddis, a U.S. Marshal incarcerated for murdering his wife. Andrew regains lucidity, but Cawley warns him of lobotomization if he lapses again.'''

In [None]:
pip install rouge

In [None]:
from rouge import Rouge
rouge = Rouge()
scores = rouge.get_scores(summary, ref_summary)
scores

In [None]:
trainer.save_model()

In [None]:
model.save_pretrained("summarizer")