In [1]:
from transformers.utils import logging
logging.set_verbosity_error()

In [2]:
from transformers import pipeline 
import torch

In [3]:
translator = pipeline(task="translation",
                      model="facebook/nllb-200-distilled-600M",
                      torch_dtype=torch.bfloat16)     #bfloat16 to compress

In [4]:
text = """\
My puppy is adorable, \
His name is Leo.
Leo is friendly.
He is also thoughtful. \
We all have nice pets!"""

In [5]:
text_translated = translator(text,
                             src_lang="eng_Latn",
                             tgt_lang="hin_Deva")

In [6]:
print(text_translated)

[{'translation_text': '‡§Æ‡•á‡§∞‡§æ ‡§™‡§ø‡§≤‡•ç‡§≤‡§æ ‡§™‡•ç‡§Ø‡§æ‡§∞‡§æ ‡§π‡•à, ‡§â‡§∏‡§ï‡§æ ‡§®‡§æ‡§Æ ‡§≤‡§ø‡§Ø‡•ã ‡§π‡•à ‡§≤‡§ø‡§Ø‡•ã ‡§¶‡•ã‡§∏‡•ç‡§§‡§æ‡§®‡§æ ‡§π‡•à, ‡§µ‡§π ‡§≠‡•Ä ‡§µ‡§ø‡§ö‡§æ‡§∞‡§∂‡•Ä‡§≤ ‡§π‡•à ‡§π‡§Æ‡§æ‡§∞‡•á ‡§™‡§æ‡§∏ ‡§∏‡§≠‡•Ä ‡§Ö‡§ö‡•ç‡§õ‡•á ‡§™‡§æ‡§≤‡§§‡•Ç ‡§ú‡§æ‡§®‡§µ‡§∞ ‡§π‡•à‡§Ç'}]


<h3> Free up memory by calling garbage collector</h3>

In [7]:
import gc
del translator
gc.collect()

8

<h2>Build the summarization pipeline using ü§ó Transformers Library</h2>

In [8]:
summarizer = pipeline(task="summarization",
                      model="facebook/bart-large-cnn",
                      torch_dtype=torch.bfloat16)

In [9]:
text= """ Large Language Models (LLMs) have significantly advanced natural language processing, but tokenization-based architectures bring notable limitations. These models depend on fixed-vocabulary tokenizers like Byte Pair Encoding (BPE) to segment text into predefined tokens before training. While functional, tokenization can introduce inefficiencies and biases, particularly when dealing with multilingual data, noisy inputs, or long-tail distributions. Additionally, tokenization enforces uniform compute allocation across tokens, regardless of their complexity, limiting scalability and generalization for diverse data types.Training on byte-level sequences has traditionally been computationally intensive due to the long sequence lengths required. Even with improvements in self-attention mechanisms, tokenization continues to be a bottleneck, reducing robustness and adaptability in high-entropy tasks. These challenges highlight the need for a more flexible and efficient approach.Meta AI‚Äôs Byte Latent Transformer (BLT) seeks to address these issues by eliminating tokenization altogether. \
BLT is a tokenizer-free architecture that processes raw byte sequences and dynamically groups them into patches based on data complexity. \
This approach enables efficient scaling, matching, or exceeding the performance of tokenization-based LLMs while improving robustness and inference efficiency."""

In [10]:
summary = summarizer(text,
                     min_length=100,
                     max_length=1000)

In [11]:
print(summary)

[{'summary_text': 'Large Language Models (LLMs) have significantly advanced natural language processing. These models depend on fixed-vocabulary tokenizers to segment text into predefined tokens. Tokenization can introduce inefficiencies and biases, particularly when dealing with multilingual data, noisy inputs, or long-tail distributions.Meta AI‚Äôs Byte Latent Transformer (BLT) seeks to address these issues by eliminating tokenization altogether. BLT is a tokenizer-free architecture that processes raw byte sequences and dynamically groups them into patches based on data complexity.'}]
