In [None]:
# Import external resources
import json
from allennlp.common.util import import_submodules
from allennlp.models.archival import load_archive
from allennlp.predictors import Predictor
from collections import defaultdict
from typing import List

In [None]:
# Change the working directory to be the root of the Github repo
# so that the module's code can be found by AllenNLP
import os
os.chdir('../..')
os.getcwd()
import_submodules('summarize')

In [None]:
# Load the extractive model (selects exactly 1 sentence)
overrides = '{"model.metrics": [], "model.initializer": null, "dataset_reader.max_num_sentences": null, "model.max_words": null, "model.max_sents": 1}'
extractive_archive = load_archive('https://danieldeutsch.s3.amazonaws.com/summarize/experiments/deutsch2019/v1.1/extractive-step/extractive-model/model/topics/context/model.tar.gz',
                                  overrides=overrides)

# Load the extractive step model (selects 200 words as a preprocessing step)
overrides = '{"model.metrics": [], "model.initializer": null, "dataset_reader.max_num_sentences": null}'
extractive_step_archive = load_archive('https://danieldeutsch.s3.amazonaws.com/summarize/experiments/deutsch2019/v1.1/extractive-step/extractive-model/model/topics/context/model.tar.gz',
                                       overrides=overrides)

# Load the abstractive step model
overrides = '{"model.metrics": [], "model.initializer": null}'
abstractive_archive = load_archive('https://danieldeutsch.s3.amazonaws.com/summarize/experiments/deutsch2019/v1.1/abstractive-step/coverage/model/extractive-model/context/model.tar.gz',
                                   overrides=overrides)

extractive_predictor = Predictor.from_archive(extractive_archive, 'cloze-extractive-predictor')
extractive_step_predictor = Predictor.from_archive(extractive_step_archive, 'cloze-extractive-predictor')
abstractive_predictor = Predictor.from_archive(abstractive_archive, 'cloze-abstractive-predictor')

In [None]:
# Define the method to produce the summary
def _run_extractive_model(predictor: Predictor, document: List[str], topics: List[str], context: List[str]) -> List[str]:
    output = predictor.predict_json({'document': document, 'topics': topics, 'context': context})
    indices = output['predicted_indices']
    document = output['metadata']['document']
    cloze = [document[index] for index in indices]
    return cloze

def run_extractive_model(document: List[str], topics: List[str], context: List[str]) -> List[str]:
    return _run_extractive_model(extractive_predictor, document, topics, context)

def run_extractive_step(document: List[str], topics: List[str], context: List[str]) -> List[str]:
    return _run_extractive_model(extractive_step_predictor, document, topics, context)

def run_abstractive_step(document: List[str], topics: List[str], context: List[str]) -> str:
    output = abstractive_predictor.predict_json({'document': document, 'topics': topics, 'context': context})
    return output['cloze']

In [None]:
# Define the input data. The text should be pretokenized
topics = ['Barack Obama', 'Early life and career', 'Family and personal life']

document = [
    "Michelle Robinson and Barack Obama had been dating for a couple of years , and she was tired of his endless debates about whether marriage still meant anything as an institution .",
    "So when Obama launched into one of those discussions yet again over dinner at a fancy restaurant in 1991 , Robinson lit into her boyfriend , lecturing him on the need to get serious in their relationship .",
    "Then dessert came .",
    "On the plate was a box .",
    "Inside was an engagement ring .",
    "`` He said , 'That kind of shuts you up , does n't it ? ' `` Michelle Obama recounted years later .",
    "The couple married the following year .",
    "And today , Michelle , 43 , and Democratic presidential hopeful Barack Obama , 46 , will celebrate their 15th wedding anniversary .",
    "The marriage might never have happened .",
    "They met in 1989 when Obama spent his summer as a first-year law student at the Chicago law firm of Sidley & Austin , and Michelle Robinson was the lawyer assigned to be his adviser .",
    "Everybody at the firm had been buzzing about the smart , first-year Harvard Law School student , so she was expecting him to be `` nerdy , strange , off-putting . ''",
    "`` But I was charmed , '' she said .",
    "`` I was pleasantly surprised by who he turned out to be . ''",
    "Still , because of their professional relationship , Michelle Robinson tried to fix Obama up with her friends .",
    "Then , halfway through the summer , Obama asked her out .",
    "On their first date , they went to the Art Institute , strolled down Michigan Avenue and caught Spike Lee 's `` Do the Right Thing . ''",
    "`` It was fantastic , '' Michelle Obama said in 2004 .",
    "`` He was definitely putting on the charm . ... It worked .",
    "He swept me off my feet . ''",
    "Two years later came the proposal dinner at Gordon 's on Clark Street .",
    "And what was the dessert next to that engagement ring ?",
    "`` I do n't even remember , '' Michelle Obama conceded in 2004 .",
    "`` I do n't think I even ate it .",
    "I was so shocked and sort of a little embarrassed because he did sort of shut me up . ''",
]

context = [
    'In June 1989, Obama met Michelle Robinson when he was employed as a summer associate at the Chicago law firm of Sidley Austin .',
    'Robinson was assigned for three months as Obama\'s adviser at the firm, and she joined him at several group social functions but declined his initial requests to date.'
]

In [None]:
# Runs the extractive model which selects 1 sentence from the input document
run_extractive_model(document, topics, context)

In [None]:
# Runs the extractive preprocessing step that takes ~200 tokens from the document
preprocessed_data = run_extractive_step(document, topics, context)
print(json.dumps(preprocessed_data, indent=2))

In [None]:
# Run the abstractive step on the preprocessed data to generate the cloze
run_abstractive_step(preprocessed_data, topics, context)