# Machine Translation

### Dowloading and Saving Model.

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_name = "facebook/nllb-200-distilled-1.3B"
save_directory = "./nllb-200-distilled-1.3B"

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Save the tokenizer and model locally
tokenizer.save_pretrained(save_directory)
model.save_pretrained(save_directory)

print(f"Model and tokenizer saved to {save_directory}")


### Translation Frame Work.

### For longer texts

In [26]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


# Define the path to the model
model_id = "facebook/nllb-200-distilled-1.3B"
local_model_path = model_id

# Load the tokenizer and model from the local directory
tokenizer = AutoTokenizer.from_pretrained(local_model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(local_model_path).to(device)

Using device: cuda




In [38]:
def split_text(text, max_length):
    words = text.split()
    chunks = []
    current_chunk = []
    current_length = 0

    for word in words:
        if current_length + len(word) + 1 <= max_length:
            current_chunk.append(word)
            current_length += len(word) + 1
        else:
            chunks.append(" ".join(current_chunk))
            current_chunk = [word]
            current_length = len(word) + 1

    if current_chunk:
        chunks.append(" ".join(current_chunk))

    return chunks

def translate_text(text, src_lang, tgt_lang, max_length=512):
    chunks = split_text(text, max_length)
    translated_chunks = []

    for chunk in chunks:
        inputs = tokenizer(chunk, return_tensors="pt", truncation=True, padding="longest").to(device)
        translated_tokens = model.generate(
            inputs["input_ids"],
            forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang],
            num_beams=4,
            early_stopping=True
        )
        translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
        translated_chunks.append(translated_text)

    full_translated_text = " ".join(translated_chunks)

    return full_translated_text

In [39]:
# Language usage - https://github.com/facebookresearch/flores/tree/main/flores200#languages-in-flores-200
src_text = """ 
"Atomic Habits" by James Clear is a self-help book that offers a comprehensive guide to building good habits and 
breaking bad ones, providing actionable strategies and techniques to help readers create lasting changes in their lives. 
The book introduces the concept of "atomic habits," which refers to small habits that, when practiced consistently, can lead 
to significant improvements in our lives, emphasizing that small changes can add up over time, much like the way atoms combine
to form molecules. Clear illustrates the concept of the "aggregation of marginal gains" using the example of the British cycling 
team, which dominated the Tour de France by making small improvements in nutrition, training, and equipment, and applies this 
concept to habit formation, where small, incremental changes can lead to significant gains over time. The book outlines the 4 Laws 
of Behavior Change, which are: make it obvious, make it attractive, make it easy, and make it satisfying, and provides strategies 
for building good habits, such as starting small, creating an implementation intention, using visual cues, and celebrating milestones.
 Clear also provides strategies for breaking bad habits, including reframing your identity, finding alternative behaviors, and 
 using the 4 Laws of Behavior Change to reverse engineer the bad habit. He emphasizes the importance of tracking progress, being 
 patient, and staying consistent in order to achieve success, and provides examples of how small changes can lead to significant 
 improvements in various areas of life, such as fitness, productivity, and relationships. The book also explores the role of 
 identity and motivation in shaping our habits, and provides strategies for creating an environment that supports good habits, 
 such as eliminating distractions, creating a schedule, and using implementation intentions. Clear also discusses the importance 
 of community and accountability in maintaining good habits, and provides strategies for overcoming obstacles and setbacks, such 
 as using the "2-minute rule" and creating a "habit scorecard." Throughout the book, Clear draws on a wide range of sources, 
 including psychology, neuroscience, and real-life examples, to provide a comprehensive and accessible guide to building good 
 habits and breaking bad ones. By applying the strategies and techniques outlined in the book, readers can create lasting changes 
 in their lives and achieve their goals, and the book provides a valuable resource for anyone looking to improve their habits and 
 achieve success. The book is divided into four main sections, the first section focuses on the fundamentals of habits, the second 
 section focuses on how to build good habits, the third section focuses on how to break bad habits, and the fourth section focuses
on how to create an environment that supports good habits. Each section is filled with actionable strategies and techniques that 
readers can apply to their own lives, and the book provides a comprehensive and accessible guide to building good habits and 
breaking bad ones. Overall, "Atomic Habits" is a valuable resource for anyone looking to improve their habits and achieve success,
and provides a comprehensive and accessible guide to building good habits and breaking bad ones.
"""
print(len(src_text.split(" ")))

511


In [40]:
src_lang = "eng_Latn"
tgt_lang = "tel_Telu" #"hin_Deva"

translated_text = translate_text(src_text, src_lang, tgt_lang)
print("Translated Text:", translated_text)

Translated Text: జేమ్స్ క్లీర్ రాసిన "అటామిక్ హబిట్స్" అనేది మంచి అలవాట్లను నిర్మించడానికి మరియు చెడు అలవాట్లను విచ్ఛిన్నం చేయడానికి సమగ్ర మార్గదర్శిని అందించే స్వయం సహాయక పుస్తకం, పాఠకులకు వారి జీవితాల్లో శాశ్వత మార్పులను సృష్టించడానికి సహాయపడే చర్య తీసుకోగల వ్యూహాలు మరియు పద్ధతులను అందిస్తుంది. ఈ పుస్తకం "అటామిక్ అలవాట్లు" అనే భావనను పరిచయం చేస్తుంది, ఇది నిరంతరం సాధించినప్పుడు మన జీవితాల్లో గణనీయమైన మెరుగుదలలకు దారితీసే చిన్న అలవాట్లను సూచిస్తుంది, అణువులు అణువులను ఏర్పరచడానికి మిళితమైన విధంగానే చిన్న మార్పులు కాలక్రమేణా జోడించవచ్చని నొక్కి చెబుతుంది. పోషకాహారం, శిక్షణ మరియు పరికరాలలో చిన్న మెరుగుదలలు చేయడం ద్వారా టూర్ డి ఫ్రాన్స్లో ఆధిపత్యం చెలాయించిన బ్రిటిష్ సైక్లింగ్ జట్టు ఉదాహరణను ఉపయోగించి "పరిమితి లాభాల కలయిక" అనే భావనను వివరిస్తుంది మరియు ఈ భావనను అలవాటు ఏర్పడటానికి వర్తిస్తుంది, ఇక్కడ చిన్న, ఇంక్రిమెంటల్ మార్పులు కాలక్రమేణా గణనీయమైన లాభాలకు దారితీస్తాయి. చిన్నదిగా ప్రారంభించడం, అమలు ఉద్దేశాన్ని సృష్టించడం, దృశ్యమాన సూచనలను ఉపయోగించడం మరియు మైలురాళ్లను జరుపుకోవడం వంటి అలవాట్

In [41]:
import gc
del model
gc.collect()
print("Model has been removed from the device and memory is freed.")

Model has been removed from the device and memory is freed.
