In [2]:
# imports
import pandas as pd
df = pd.read_csv("../data/dummy.csv")
from src.data import DoceeForInference
dataset = DoceeForInference(df, use_title=True)
from transformers.utils import PaddingStrategy
from transformers import AutoTokenizer, pipeline

pretrained_model_name_or_path = "ainize/bart-base-cnn"
max_sequence_lenght=512

tokenizer = AutoTokenizer.from_pretrained(
    pretrained_model_name_or_path=pretrained_model_name_or_path,
    padding=PaddingStrategy.LONGEST,
    model_max_length=max_sequence_lenght,
    use_fast=True
)
summarizer = pipeline(
    "summarization",
    model=pretrained_model_name_or_path,
    tokenizer=tokenizer,
    device=0,
    framework="pt"
)


In [5]:
from functools import partial


# okay, so now lets do wrapper functions which invoke different encoding methods
def get_default_decoder(
        summarizer,
        truncation: bool,
        batch_size: int,
        num_workers: int,
        min_length: int,
        max_length: int,
        early_stopping: bool,
        temperature: float,
        num_return_sequences: int
):
    return partial(summarizer,
        truncation=truncation,
        batch_size=batch_size,
        num_workers=num_workers,
        min_length=min_length,
        max_length=max_length,
        early_stopping=early_stopping,
        temperature=temperature,
        num_return_sequences=num_return_sequences
    )


In [27]:
def get_beam_search_decoder(
    partial_decoder,
    num_beams: int,
):
    return partial(
        partial_decoder,
        num_beams=num_beams,
        top_k=0,
        top_p=1,
        do_sample=True,
        penalty_alpha=0.0
    )

In [13]:
default_decoder = get_default_decoder(
    summarizer,
    truncation=True,
    batch_size=1,
    num_workers=1,
    min_length=20,
    max_length=200,
    early_stopping=True,
    temperature=1.0,
    num_return_sequences=3
)

In [22]:
example = dataset[0]
print(example)

Five Iraqi Shia, Sunni and Kurdish political parties sign a deal forming a new "Moderates Front" supporting Prime Minister  Nouri al–Maliki. The accord was the second step towards rebuilding Iraq's political process, Mr Maliki said, after four Kurdish and Shia parties formed a new alliance.
A committee formed by the parties had "accomplished some solutions", he said.
Mr Maliki has been criticised in recent days by US politicians, but the White House hailed Sunday's deal as "an important symbol" of unity in Iraq.
"We congratulate Iraq's leaders on the important agreement reached today in Baghdad... [and] their commitment to work together for the benefit of all Iraqis," said a statement issued from President George W Bush's Texas ranch. 'Sense of movement'
Issues under discussion between Iraqi politicians include holding provincial elections and easing a ban on former Baath party members in the civil service and military.
After the meeting, Mr Maliki appeared at a news conference alongsi

In [28]:
beam_search_decoder = get_beam_search_decoder(default_decoder, num_beams=5)

In [29]:
outputs = beam_search_decoder(example)
outputs

[{'summary_text': "Five Iraqi Shia, Sunni and Kurdish political parties sign deal .\nThe accord is the second step towards rebuilding Iraq's political process .\nIt comes after four Kurdish and Shia parties formed a new alliance .\nMr Maliki has been criticised in recent days by US politicians .\nBut the White House hailed Sunday's deal as an important symbol of unity ."},
 {'summary_text': "Five Iraqi Shia, Sunni and Kurdish political parties sign deal .\nThey form a new 'Moderates Front' supporting Prime Minister Nouri al–Maliki .\nThe accord is the second step towards rebuilding Iraq's political process .\nMr Maliki has been criticised in recent days by US politicians ."},
 {'summary_text': 'Five Iraqi Shia, Sunni and Kurdish political parties sign a deal .\nThey form a new "Moderates Front" supporting Prime Minister Nouri al–Maliki .\nThe accord is the second step towards rebuilding Iraq\'s political process .\nIssues under discussion include holding provincial elections and easing

In [31]:
# works like a charm

def get_top_p_decoder(
    partial_decoder,
    top_p: float
):
    return partial(
        partial_decoder,
        top_p=top_p,
        top_k=0,
        num_beams=1,
        penalty_alpha=0.0,
        do_sample=True
    )

In [32]:
top_p_decoder = get_top_p_decoder(default_decoder, top_p=0.95)
outputs = top_p_decoder(example)
outputs

[{'summary_text': 'Five Iraqi Shia, Sunni and Kurdish political parties form new coalition .\nNew agreement also signed with four Kurdish and Shia parties .\nThis comes after key new deal came to power in Iraq .'},
 {'summary_text': "Northern Iraqi parties form a new deal with Prime Minister Nouri al-Maliki .\nFive Iraqi Shia, Sunni and Kurdish parties sign agreement on unity .\nDeal has been pushed by the US as a key step towards national reconciliation .\nMr Maliki says African states were acting like Iraq was their property .\nHe hits back at senators who say Iraq was 'their property'"},
 {'summary_text': "The accord was the second step towards rebuilding Iraq's political process .\nNegotiations will over holding provincial elections and easing a ban on ex-Baath party members .\nIssues under discussion between Iraqi politicians include holding provincial election and easing the ban on former Baath party member .\nMr Maliki hit back at top US politicians who have called for him to be

In [33]:
def get_top_k_decoder(
        partial_decoder,
        top_k: int
):
    return partial(
        partial_decoder,
        top_k=top_k,
        top_p=1.0,
        penalty_alpha=0.0,
        do_sample=True,
        num_beams=1
    )

top_k_decoder = get_top_k_decoder(default_decoder, top_k=5)
outputs_top_k = top_k_decoder(example)
outputs_top_k

[{'summary_text': 'Five Iraqi Shia, Sunni and Kurdish parties sign deal with PM .\nThey form new "Moderates Front," supporting Prime Minister Nouri al–Maliki .\nMr Maliki has been criticised by US politicians over Iraq .'},
 {'summary_text': 'Five Iraqi Shia, Sunni and Kurdish political parties sign new deal .\nThey form an alliance supporting Prime Minister Nouri al–Maliki .\nDeal has been pushed by U.S. as a key step in Iraqi reconciliation .'},
 {'summary_text': 'Five Iraqi Shia, Sunni and Kurdish political parties sign a deal .\nThe accord was the second step towards rebuilding Iraq\'s political process .\nIt comes after four Iraqi Kurdish and Shia parties formed a new alliance .\nPresident George W Bush hails the deal as "an important symbol"'}]

In [38]:
def get_contrastive_decoder(
        partial_decoder,
        penalty_alpha: float,
        top_k: int
):
    return partial(
        partial_decoder,
        num_beams=1,
        penalty_alpha=penalty_alpha,
        do_sample=True,
        top_k=top_k,
        top_p=1.0
    )

In [39]:
contrastive_decoder = get_contrastive_decoder(
    default_decoder,
    penalty_alpha=0.6,
    top_k=5
)

contrastive_output = contrastive_decoder(example)
contrastive_output



[{'summary_text': 'Five parties form a new \'Moderates Front\' supporting Prime Minister Nouri al–Maliki .\nA committee formed by the parties has "accomplished some solutions", he said .\nMr Maliki has been criticised in recent days by US politicians .\nThe deal has been pushed by the US as a key step on the path to national reconciliation .'},
 {'summary_text': "Iraqi Prime Minister Nouri al-Maliki signed an accord with five political parties .\nThe agreement comes after four Kurdish, Shia and Sunni parties formed a new alliance .\nMr Maliki hit back at senators for calling for him to be removed .\nHe said they were acting as if Iraq was 'their property'\nUS commander in Iraq is expected to report to Congress in mid-September .\nA report by Gen David Petraeus is supposed give Iraq time to make political progress .\nUS troops are supposed to arrive in Baghdad in September ."},
 {'summary_text': "Five Iraqi Shia, Sunni and Kurdish political parties signed deal .\nIt was the second step 

In [None]:
# works like a charm tbh
# TODO - a way to compose different decoding methods

# for now, this is enough