In [1]:
from src.models import (Summarizer, 
                        AnswerSelector,
                        EntityExtractor,
                        QuestionGenerator,
                        QuestionAnswerer, 
                        SyntaxEvaluator,
                        Linker,
                        Highlighter)

In [2]:
# TEXT TO BE ANALYZED
text = """CBS broadcast Super Bowl 50 in the U.S., and charged an average of $5 million for a 30-second commercial during the game.
The Super Bowl 50 halftime show was headlined by the British rock group Coldplay with special guest performers Beyoncé and Bruno Mars, who headlined the Super Bowl XLVII and Super Bowl XLVIII halftime shows, respectively.
It was the third-most watched U.S. broadcast ever."""

# Summarization

In [3]:
model = Summarizer()
model(text)

models/final/en/sumup


['CBS broadcast Super Bowl 50 in the U.S., charged an average of $5 million for a 30-second commercial during the game. Beyoncé and Bruno Mars were headlined by the British rock group Coldplay.']

# AnswerSelector

In [4]:
model = AnswerSelector()
model(text)



[{'text': '30-second commercial', 'verification_type': 'semantic'},
 {'text': 'Super Bowl 50 halftime show', 'verification_type': 'semantic'},
 {'text': 'CBS', 'verification_type': 'orthographic'},
 {'text': 'U.S.', 'verification_type': 'orthographic'},
 {'text': '$5 million', 'verification_type': 'orthographic'},
 {'text': 'Coldplay', 'verification_type': 'orthographic'},
 {'text': 'Beyoncé', 'verification_type': 'orthographic'},
 {'text': 'Super Bowl XLVIII', 'verification_type': 'orthographic'}]

# EntityExtractor

In [5]:
model = EntityExtractor()
model(text)

[{'text': 'CBS', 'ner_label': 'ORG', 'start': 0, 'end': 3},
 {'text': '$5 million', 'ner_label': 'MONEY', 'start': 67, 'end': 77},
 {'text': '30-second', 'ner_label': 'TIME', 'start': 84, 'end': 93},
 {'text': 'British', 'ner_label': 'NORP', 'start': 175, 'end': 182},
 {'text': 'Coldplay', 'ner_label': 'ORG', 'start': 194, 'end': 202},
 {'text': 'Beyoncé', 'ner_label': 'PERSON', 'start': 233, 'end': 240},
 {'text': 'Bruno Mars', 'ner_label': 'PERSON', 'start': 245, 'end': 255},
 {'text': 'Super Bowl XLVIII', 'ner_label': 'EVENT', 'start': 296, 'end': 313},
 {'text': 'third', 'ner_label': 'ORDINAL', 'start': 355, 'end': 360}]

# QuestionGenerator

In [6]:
model = QuestionGenerator()
answers = ['$5 million', '30-second', 'third']

models/final/en/qg


In [7]:
model(text, answers=answers)

[{'answer': '$5 million',
  'verification_type': None,
  'answer_start': 67,
  'answer_end': 77,
  'question': 'How much did CBS charge for a 30-second commercial during Super Bowl 50?',
  'explanation': 'CBS broadcast Super Bowl 50 in the U.S., and charged an average of $5 million for a 30-second commercial during the game.',
  'sentence_id': 0,
  'type': 'open'},
 {'answer': '30-second',
  'verification_type': None,
  'answer_start': 84,
  'answer_end': 93,
  'question': 'How long was the commercial during Super Bowl 50?',
  'explanation': 'CBS broadcast Super Bowl 50 in the U.S., and charged an average of $5 million for a 30-second commercial during the game.',
  'sentence_id': 0,
  'type': 'open'},
 {'answer': 'third',
  'verification_type': None,
  'answer_start': 11,
  'answer_end': 16,
  'question': 'What was the highest-watched U.S. broadcast ever?',
  'explanation': 'It was the third-most watched U.S. broadcast ever.',
  'sentence_id': 2,
  'type': 'open'}]

# QuestionAnswerer

In [8]:
model = QuestionAnswerer()
questions = ['How much did CBS charge for a 30-second commercial during Super Bowl 50?',
             'How long was the commercial during Super Bowl 50?',
             'What was the highest-watched U.S. broadcast ever?']

No model was supplied, defaulted to distilbert-base-cased-distilled-squad (https://huggingface.co/distilbert-base-cased-distilled-squad)


In [9]:
model(question=questions, context=text)

[{'score': 0.800715982913971, 'start': 67, 'end': 77, 'answer': '$5 million'},
 {'score': 0.9657741785049438, 'start': 84, 'end': 93, 'answer': '30-second'},
 {'score': 0.21026048064231873, 'start': 355, 'end': 360, 'answer': 'third'}]

# SyntaxEvaluator

In [10]:
model = SyntaxEvaluator()
questions = ['How much did CBS charge for a 30-second commercial during Super Bowl 50?',
             'How long was the commercial during Super Bowl 50?',
             'What was the highest-watched U.S. broadcast ever?']

Some weights of the model checkpoint at salesken/query_wellformedness_score were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [11]:
model(questions)

[1.0007057189941406, 0.9883835911750793, 0.9966148734092712]

# Linker

In [12]:
model = Linker()
model(text)

{'entities': [{'entity': 'CBS',
   'kb_id': 'Q43380',
   'positions': [{'start': 0, 'end': 3, 'text': 'CBS'}],
   'enable': True,
   'relevance': 0.201,
   'confidence': 19.61},
  {'entity': 'Coldplay',
   'kb_id': 'Q45188',
   'positions': [{'start': 194, 'end': 202, 'text': 'Coldplay'}],
   'enable': True,
   'relevance': 0.2417,
   'confidence': 10.72},
  {'entity': 'Beyoncé',
   'kb_id': 'Q36153',
   'positions': [{'start': 233, 'end': 240, 'text': 'Beyoncé'}],
   'enable': True,
   'relevance': 0.2033,
   'confidence': 16.43},
  {'entity': 'Bruno Mars',
   'kb_id': 'Q1450',
   'positions': [{'start': 245, 'end': 255, 'text': 'Bruno Mars'}],
   'enable': True,
   'relevance': 0.3803,
   'confidence': 20.0},
  {'entity': 'Super Bowl XLVIII halftime show',
   'kb_id': 'Q17026444',
   'positions': [{'start': 296,
     'end': 328,
     'text': 'Super Bowl XLVIII halftime shows'}],
   'enable': False,
   'relevance': 0.6836,
   'confidence': 3.878},
  {'entity': 'Super Bowl 50',
   'kb_

# Highlighter

In [13]:
model = Highlighter()
model(text)

{'sentences': ['CBS broadcast Super Bowl 50 in the U.S., and charged an average of $5 million for a 30-second commercial during the game.',
  'The Super Bowl 50 halftime show was headlined by the British rock group Coldplay with special guest performers Beyoncé and Bruno Mars, who headlined the Super Bowl XLVII and Super Bowl XLVIII halftime shows, respectively.',
  'It was the third-most watched U.S. broadcast ever.'],
 'highlight_25': [0],
 'highlight_40': [0]}