# Abstractive Text Summarization with BART

Model doc: https://huggingface.co/transformers/model_doc/bart.html

In [3]:
# Importing the model
from transformers import BartForConditionalGeneration, BartTokenizer, BartConfig
import pandas as pd

In [4]:
data = pd.read_csv("./data/data_news.csv", encoding="utf-8")
data

Unnamed: 0,category,summary,text
0,entertainment,Nigel McCune from the Musicians' Union said Br...,Musicians to tackle US red tape\n\nMusicians' ...
1,entertainment,But they still want more.They have to want to ...,"U2's desire to be number one\n\nU2, who have w..."
2,entertainment,"Babyshambles, which he formed after his acrimo...",Rocker Doherty in on-stage fight\n\nRock singe...
3,entertainment,A Series of Unfortunate Events also stars Scot...,Snicket tops US box office chart\n\nThe film a...
4,entertainment,"Ocean's Twelve, the crime caper sequel starrin...",Ocean's Twelve raids box office\n\nOcean's Twe...
...,...,...,...
2219,tech,Writing a Microsoft Word document can be a dan...,Warning over Windows Word files\n\nWriting a M...
2220,tech,The lifts take only 30 seconds to whisk passen...,Fast lifts rise into record books\n\nTwo high-...
2221,tech,Nintendo is releasing an adapter for its DS ha...,Nintendo adds media playing to DS\n\nNintendo ...
2222,tech,The new strains of the Cabir mobile phone viru...,Fast moving phone viruses appear\n\nSecurity f...


In [2]:
# Loading the model and tokenizer for bart-large-cnn

# bart-large-cnn model
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')

#bart base model
#tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')
#model = BartForConditionalGeneration.from_pretrained('facebook/bart-base')

In [5]:
# set the text to try to summarize

n = 15
original_text = data.text.values[n]
original_summary = data.summary.values[n]


In [6]:
# Encoding the inputs and passing them to model.generate()

inputs = tokenizer.batch_encode_plus([original_text], return_tensors='pt')
summary_ids = model.generate(inputs['input_ids'], early_stopping = True)

In [7]:
# Decoding and printing the summary

bart_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

In [8]:
# let's try with BART

print(f"""
      Original Text: 
      
      {original_text}
      
      Original Summary: 
      
      {original_summary}
      
      Generated Summary: 
      
      {bart_summary}
      
      """)


      Original Text: 
      
      Rapper Snoop Dogg sued for 'rape'

US rapper Snoop Dogg has been sued for $25m (£13m) by a make-up artist who claimed he and his entourage drugged and raped her two years ago.

The woman said she was assaulted after a recording of the Jimmy Kimmel Live TV show on the ABC network in 2003. The rapper's spokesman said the allegations were "untrue" and the woman was "misusing the legal system as a means of extracting financial gain". ABC said the claims had "no merit". The star has not been charged by police.

The lawsuit, filed in Los Angeles on Friday, says the woman's champagne was spiked and she was then assaulted. The rapper's spokesperson said: "Snoop will have the opportunity to prove in a court of law that [the alleged victim] is opportunistic and deceitful. "We are confident that in this case, [the alleged victim's] claims against Snoop Dogg will be rejected." The lawsuit names Snoop Dogg - real name Calvin Broadus - plus three associates, The W

In [9]:
# wow it's really good