In [12]:
import torch
from transformers import BertTokenizer, BertModel
import numpy as np
from scipy.spatial.distance import cosine
from typing import List
import re

class MoverScore:
    def __init__(self, model_name='bert-base-uncased'):
        self.tokenizer = BertTokenizer.from_pretrained(model_name)
        self.model = BertModel.from_pretrained(model_name)
        self.model.eval()

    def get_bert_embedding(self, text):
        tokens = self.tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
        with torch.no_grad():
            outputs = self.model(**tokens)
        embeddings = outputs.last_hidden_state.squeeze(0)
        return embeddings.mean(dim=0).cpu().numpy()  # Average pooling

    def compute_idf(self, corpus):
        idf_dict = {}
        num_docs = len(corpus)
        for doc in corpus:
            tokens = self.tokenizer.tokenize(doc)
            for token in set(tokens):
                idf_dict[token] = idf_dict.get(token, 0) + 1
        for token, count in idf_dict.items():
            idf_dict[token] = np.log(num_docs / count)
        return idf_dict

    def compute_moverscore(self, hypothesis, reference, idf_dict):
        hyp_embedding = self.get_bert_embedding(hypothesis)
        ref_embedding = self.get_bert_embedding(reference)
        similarity = 1 - cosine(hyp_embedding, ref_embedding)
        return similarity


def extract_key_numbers(article: str) -> List[int]:
    """Extract key numbers from the article."""
    numbers = re.findall(r'\b\d+\b', article)
    return list(map(int, numbers))


def add_numbers_from_article(article: str) -> int:
    """Add all numbers from the article to calculate a total."""
    numbers = extract_key_numbers(article)
    return sum(numbers)


def summarize_with_addition(article: str) -> str:
    """Summarize the article with numeral reasoning using addition."""
    numbers = extract_key_numbers(article)
    if len(numbers) > 1:
        total = sum(numbers)
        return f"Millions of Necrons were awankend in a tomb world, while only {total} Salamanders helped liberate the planet and deafeating our enemies."
    return "Not enough numerical data to calculate a total."


def fill_cloze_with_total(article: str, cloze_sentence: str, ground_truth: str) -> str:
    """Fill in the cloze sentence using the total of numbers from the article."""
    total = add_numbers_from_article(article)
    filled_cloze = cloze_sentence.replace("___", str(total), 1)
    return filled_cloze


# Example Usage
article_text = (
    "June of this year 600 Salamander space marines joined the fight against the Necrons as they awaken from a tomb world. "
    "I a proud Salamander from the fourth company lead my men through the lands, killing any Necrons that came our way. "
    "As the continue our liberation of the planet, other Salamanders answered our calls for backup and came in droves first it was 30, then 200, and during the final push to the planets liberation we awakened 10 dreadnoughts for the final days of battle. As we chared to meet our enemy, we screamed out from the fires of battle into the anvil of war."
    "As we have liberated this planet I looked around and saw my brothers bodies as well as the little sparks that came with us slumped dead. I then helped with giving each of our brothers funeral rights and awaited my orders for the next battle."

)

cloze_sentence = "The Salamanders who came to liberate the planet were ___ ."
cloze_gt = "The Salamanders defended a world with only 600 space marines from different companies."

# Task 1: Add numbers and fill in the blank
filled_cloze = fill_cloze_with_total(article_text, cloze_sentence, cloze_gt)
print(f"Filled Cloze: {filled_cloze}")

# Task 2: Numeral-aware headline with addition
numeral_aware_summary = summarize_with_addition(article_text)
print(f"Numeral-aware Summary: {numeral_aware_summary}")


Filled Cloze: The Salamanders who came to liberate the planet were 840 .
Numeral-aware Summary: Millions of Necrons were awankend in a tomb world, while only 840 Salamanders helped liberate the planet and pushed them back.
