In [8]:
import torch
from transformers import BartForConditionalGeneration, BartTokenizer

# Load the model and tokenizer
model = BartForConditionalGeneration.from_pretrained("facebook/bart-large", forced_bos_token_id=0).cuda()
tok = BartTokenizer.from_pretrained("facebook/bart-large")

# Example phrase
example_english_phrase = "UN Chief Says There Is No <mask> in Syria"
batch = tok(example_english_phrase, return_tensors="pt").to('cuda')

# Generate output
generated_ids = model.generate(batch["input_ids"])

# Decode the output
decoded_output = tok.batch_decode(generated_ids, skip_special_tokens=True)
print(decoded_output)


memory_allocated_mib = torch.cuda.memory_allocated() / 1024 ** 2
memory_reserved_mib = torch.cuda.memory_reserved() / 1024 ** 2

print(f"Memory Allocated: {memory_allocated_mib:.2f} MiB")
print(f"Memory Reserved: {memory_reserved_mib:.2f} MiB")

['UN Chief Says There Is No Plan to Stop Chemical Weapons in Syria']
Memory Allocated: 1558.20 MiB
Memory Reserved: 3162.00 MiB


# Bert

In [4]:
import torch
from transformers import BartForConditionalGeneration, BartTokenizer

# Measure memory usage before loading the model
initial_memory_allocated = torch.cuda.memory_allocated() / 1024 ** 2
initial_memory_reserved = torch.cuda.memory_reserved() / 1024 ** 2

# Load the model and tokenizer
model = BartForConditionalGeneration.from_pretrained("facebook/bart-large", forced_bos_token_id=0).cuda()
tok = BartTokenizer.from_pretrained("facebook/bart-large")


final_memory_allocated = torch.cuda.memory_allocated() / 1024 ** 2
final_memory_reserved = torch.cuda.memory_reserved() / 1024 ** 2

allocated_diff = final_memory_allocated - initial_memory_allocated
reserved_diff = final_memory_reserved - initial_memory_reserved

print(f"Memory Allocated by the Model: {allocated_diff:.2f} MiB")
print(f"Memory Reserved by the Model: {reserved_diff:.2f} MiB")

# Example phrase and generation
example_english_phrase = "UN Chief Says There Is No <mask> in Syria"
batch = tok(example_english_phrase, return_tensors="pt").to('cuda')

# Generate output
generated_ids = model.generate(batch["input_ids"])

# Decode the output
decoded_output = tok.batch_decode(generated_ids, skip_special_tokens=True)
print(decoded_output)


Memory Allocated by the Model: 0.00 MiB
Memory Reserved by the Model: 1554.00 MiB
['UN Chief Says There Is No Plan to Stop Chemical Weapons in Syria']


In [10]:
from transformers import pipeline

summarizer = pipeline("summarization", model="Falconsai/text_summarization")

ARTICLE = """ 
UN Chief Says There Is No Plan to Stop Chemical Weapons in Syria
"""
print(summarizer(ARTICLE, max_length=15, min_length=10, do_sample=False))



[{'summary_text': 'UN Chief Says There Is No Plan to Stop Chemical Weapons'}]
