In [1]:
import time
import torch
import itertools as it
import more_itertools as mit
from transformers import pipeline, PegasusForConditionalGeneration, PegasusTokenizer

In [2]:
model_name = "google/pegasus-xsum"
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name)

In [3]:
def summarize(text: str | list[str]) -> str | list[str]:
    # Avoid CUDA out of memory errors by chunking (should only use ~8GB)
    if type(text) is list and len(text) > 5:
        return list(it.chain(*map(summarize, mit.chunked(text, 5))))

    # Tokenize input
    tokens = tokenizer(text, truncation=True, padding="longest", return_tensors="pt")
    
    # Use CUDA to accelerate computations
    tokens = tokens.to("cuda")
    txt_summary = model.to("cuda").generate(**tokens)
    
    # Decode the output
    response = tokenizer.batch_decode(txt_summary, skip_special_tokens=True)
    return response if type(text) is not str else response[0]

In [4]:
filename = "/../../24212003_requirements_for_artificial_intelligence/attachments/2662901.txt"

with open(filename, "r", encoding="utf-8") as f:
    text = f.read()

In [5]:
# CUDA accelerated is 10x faster than CPU!
start = time.time()
response = summarize(text.split("\n\n"))
print(f"Total time: {time.time() - start}")
response



Total time: 5.972177267074585


['The European Association of Co-operative Banks (EACB) represents, promotes and defends the common interests of its 27 member institutions and of cooperative banks with regard to banking as well as to co-operative legislation.',
 'The European Association of Co-operative Banks (EACB) has released the following statement on the European Commission’s Artificial Intelligence (AI) legislative proposal.',
 'The European Association of Credit Banks (EACB) has given its reaction to the European Commission’s proposal on artificial intelligence (AI) in the financial sector.',
 "Recital 80 of the European Parliament and the Council's Capital Requirements Directive (CRR) on prudential requirements for credit institutions and investment firms contains the following:",
 'Co-operative banks have been exploring the possibility offered by AI systems with the GDPR in mind.',
 'We would welcome a more targeted approach, limited to rule-based techniques and approaches built on AI in the sense that such 

In [6]:
start = time.time()
tokens = tokenizer(
    text.split("\n\n"), padding="longest", truncation=True, return_tensors="pt"
).to("cpu")
txt_summary = model.to("cpu").generate(**tokens)
response = tokenizer.batch_decode(txt_summary, skip_special_tokens=True)
print(time.time() - start)
response

46.69537925720215


['The European Association of Co-operative Banks (EACB) represents, promotes and defends the common interests of its 27 member institutions and of cooperative banks with regard to banking as well as to co-operative legislation.',
 'The European Association of Co-operative Banks (EACB) has released the following statement on the European Commission’s Artificial Intelligence (AI) legislative proposal.',
 'The European Association of Credit Banks (EACB) has given its reaction to the European Commission’s proposal on artificial intelligence (AI) in the financial sector.',
 "Recital 80 of the European Parliament and the Council's Capital Requirements Directive (CRR) on prudential requirements for credit institutions and investment firms contains the following:",
 'Co-operative banks have been exploring the possibility offered by AI systems with the GDPR in mind.',
 'We would welcome a more targeted approach, limited to rule-based techniques and approaches built on AI in the sense that such 