<a href="https://colab.research.google.com/github/bisht16sumit/Multi-Agent-Channel/blob/main/huggingface_interface/colab_inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# IndicTrans2 HF Inference

We provide an example notebook on how to use our IndicTrans2 models which were originally trained with the fairseq to HuggingFace transformers for inference purpose.


## Setup

Please run the cells below to install the necessary dependencies.


In [1]:
%%capture
!git clone https://github.com/AI4Bharat/IndicTrans2.git

In [2]:
%%capture
%cd /content/IndicTrans2/huggingface_interface

In [3]:
%%capture
!python3 -m pip install nltk sacremoses pandas regex mock transformers==4.53.2 mosestokenizer
!python3 -c "import nltk; nltk.download('punkt')"
!python3 -m pip install bitsandbytes scipy accelerate datasets
!python3 -m pip install sentencepiece

!git clone https://github.com/VarunGumma/IndicTransToolkit.git
%cd IndicTransToolkit
!python3 -m pip install --editable ./
%cd ..

**IMPORTANT : Restart your run-time first and then run the cells below.**

## Inference


In [1]:
import torch
from transformers import AutoModelForSeq2SeqLM, BitsAndBytesConfig, AutoTokenizer
from IndicTransToolkit.processor import IndicProcessor

BATCH_SIZE = 4
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
quantization = None

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

if device == 'cuda':
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("CPU mode - slower but will work")

Using device: cuda
GPU: Tesla T4
VRAM: 15.6 GB


In [3]:
import json
import torch
from pathlib import Path
from typing import List
from datetime import datetime
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

In [4]:
COMMENTARY_PATH = Path("commentary.json")
OUTPUT_PATH = Path("commentary_translated.json")
CHECKPOINT_DIR = Path("./checkpoints")
CHECKPOINT_DIR.mkdir(exist_ok=True)

print(f"Loading from: {COMMENTARY_PATH}")

with open(COMMENTARY_PATH, 'r', encoding='utf-8') as f:
    all_data = json.load(f)

swami_entries = [e for e in all_data if e.get('author_id') == 1]
swami_entries.sort(key=lambda x: x.get('verseNumber', 0))

print(f"Total entries: {len(all_data)}")
print(f"Swami Ramsukhdas: {len(swami_entries)}")
print(f"Language: {swami_entries[0].get('lang')}")

Loading from: commentary.json
Total entries: 11186
Swami Ramsukhdas: 701
Language: hindi


In [5]:
# Login to HuggingFace
from huggingface_hub import login

login()  # This will prompt you to paste a token


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
MODEL_NAME = "ai4bharat/indictrans2-indic-indic-1B"
SRC_LANG = "san_Deva"
TGT_LANG = "hin_Deva"
# MODEL_NAME = "ai4bharat/indictrans2-indic-en-1B"
# SRC_LANG = "hin_Deva"
# TGT_LANG = "eng_Latn"

print(f"Loading model: {MODEL_NAME}")
print("This takes 2-3 minutes...\n")

# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
# model = AutoModelForSeq2SeqLM.from_pretrained(
#     MODEL_NAME,
#     trust_remote_code=True,
#     torch_dtype=torch.float16 if device == 'cuda' else torch.float32
# )
# model = model.to(device)
# model.eval()

# ip = IndicProcessor(inference=True)

print("✓ Model ready!")

In [6]:
def translate_text(text: str) -> str:
    """Translate Hindi to English without IndicProcessor preprocessing."""
    if not text or not text.strip():
        return ""

    try:
        # Direct tokenization without preprocessing
        inputs = tokenizer(
            text,
            truncation=True,
            padding=True,
            return_tensors="pt",
            max_length=512
        ).to(device)

        # Generate translation
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_length=256,
                num_beams=3,
                early_stopping=True
            )

        # Decode directly
        translated = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return translated.strip()

    except Exception as e:
        print(f"Translation error: {e}")
        return ""

print("✓ Translation function ready")


✓ Translation function ready


In [6]:
def initialize_model_and_tokenizer(ckpt_dir, quantization):
    if quantization == "4-bit":
        qconfig = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_compute_dtype=torch.bfloat16,
        )
    elif quantization == "8-bit":
        qconfig = BitsAndBytesConfig(
            load_in_8bit=True,
            bnb_8bit_use_double_quant=True,
            bnb_8bit_compute_dtype=torch.bfloat16,
        )
    else:
        qconfig = None

    tokenizer = AutoTokenizer.from_pretrained(ckpt_dir, trust_remote_code=True)
    model = AutoModelForSeq2SeqLM.from_pretrained(
        ckpt_dir,
        trust_remote_code=True,
        low_cpu_mem_usage=True,
        quantization_config=qconfig,
    )

    if qconfig == None:
        model = model.to(DEVICE)
        if DEVICE == "cuda":
            model.half()

    model.eval()

    return tokenizer, model


def batch_translate(input_sentences, src_lang, tgt_lang, model, tokenizer, ip):
    translations = []
    for i in range(0, len(input_sentences), BATCH_SIZE):
        batch = input_sentences[i : i + BATCH_SIZE]

        # Preprocess the batch and extract entity mappings
        batch = ip.preprocess_batch(batch, src_lang=src_lang, tgt_lang=tgt_lang)

        # Tokenize the batch and generate input encodings
        inputs = tokenizer(
            batch,
            truncation=True,
            padding="longest",
            return_tensors="pt",
            return_attention_mask=True,
        ).to(DEVICE)

        # Generate translations using the model
        with torch.no_grad():
            generated_tokens = model.generate(
                **inputs,
                use_cache=True,
                min_length=0,
                max_length=256,
                num_beams=5,
                num_return_sequences=1,
            )

        # Decode the generated tokens into text
        generated_tokens = tokenizer.batch_decode(
            generated_tokens,
            skip_special_tokens=True,
            clean_up_tokenization_spaces=True,
        )

        # Postprocess the translations, including entity replacement
        translations += ip.postprocess_batch(generated_tokens, lang=tgt_lang)

        del inputs
        torch.cuda.empty_cache()

    return translations

### English to Indic Example


In [7]:
import re

def wrap_quoted_entities(text):
    """
    Finds all text between single quotes and wraps it for IndicTrans2.
    Example: 'तु' becomes <register>'तु'</register>
    """
    if not text:
        return ""

    # This regex looks for: ' followed by any characters (non-greedy) followed by '
    # It wraps the entire thing, including the quotes, to keep the output clean.
    protected_text = re.sub(r"('(.*?)')", r"<register>\1</register>", text)

    return protected_text

In [8]:
def translate_long_text(text, src_lang, tgt_lang, model, tokenizer, ip, batch_size=1):
    # Ensure text isn't empty
    if not text or len(text.strip()) == 0:
        return ""

    # 1. Manual Clean: NLTK sometimes chokes on '।।'
    clean_text = text.replace('।।', '।').strip()
    sentences = nltk.sent_tokenize(clean_text)

    translated_sentences = []

    for i in range(0, len(sentences), batch_size):
        batch = sentences[i : i + batch_size]

        # 2. Debug: See what is actually being sent to the model
        # If this is empty, the issue is in the 'ip.preprocess_batch' logic
        batch_prepped = ip.preprocess_batch(batch, src_lang=src_lang, tgt_lang=tgt_lang)

        if not batch_prepped:
            continue

        inputs = tokenizer(
            batch_prepped,
            truncation=True,
            padding="longest",
            return_tensors="pt",
        ).to(DEVICE)

        with torch.no_grad():
            generated_tokens = model.generate(
                **inputs,
                use_cache=True,
                max_length=256,
                num_beams=5,
            )

        decoded = tokenizer.batch_decode(
            generated_tokens,
            skip_special_tokens=True,
            clean_up_tokenization_spaces=True
        )

        post_processed = ip.postprocess_batch(decoded, lang=tgt_lang)
        translated_sentences.extend(post_processed)

        # 3. Aggressive Memory Release
        del inputs, generated_tokens
        torch.cuda.empty_cache()
        if 'batch_prepped' in locals(): del batch_prepped

    return " ".join(translated_sentences)

In [9]:
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [33]:
# Replace 'model' and 'tokenizer' with your variable names
import torch
import gc

# 1. Garbage collect Python objects
gc.collect()

# 2. Clear the actual GPU cache
torch.cuda.empty_cache()

# 3. (Optional) Check memory again
print(f"Memory allocated: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
print(f"Memory reserved: {torch.cuda.memory_reserved() / 1024**2:.2f} MB")


Memory allocated: 9.12 MB
Memory reserved: 22.00 MB


In [None]:
en_indic_ckpt_dir = "ai4bharat/indictrans2-indic-indic-1B"  # ai4bharat/indictrans2-en-indic-dist-200M
en_indic_tokenizer, en_indic_model = initialize_model_and_tokenizer(en_indic_ckpt_dir, quantization)
print(en_indic_tokenizer.model_max_length)
print("Testing on first 3 entries...\n")
ip = IndicProcessor(inference=True)
src_lang, tgt_lang = "san_Deva", "hin_Deva"
test_entries = swami_entries[:3]
sans_texts = [e.get('description', '') for e in test_entries]

translated_texts = []

for idx, text in enumerate(sans_texts):
    print(f"Processing Verse {idx+1}...")
    if not text.strip():
        translated_texts.append("[Empty Input]")
        continue

    try:
        result = translate_long_text(text, src_lang, tgt_lang, en_indic_model, en_indic_tokenizer, ip)
        translated_texts.append(result)
    except Exception as e:
        print(f"Error in Verse {idx+1}: {e}")
        translated_texts.append("[Translation Error]")

# Display results
for sans, hindi in zip(sans_texts, translated_texts):
    print("============================================")
    print(f"sans {sans}:")
    print(f"hindi: {hindi}")
    print(f"\n")

del en_indic_tokenizer, en_indic_model
torch.cuda.empty_cache()



In [17]:
torch.cuda.empty_cache()

In [None]:
en_indic_ckpt_dir = "ai4bharat/indictrans2-indic-indic-1B"  # ai4bharat/indictrans2-en-indic-dist-200M
en_indic_tokenizer, en_indic_model = initialize_model_and_tokenizer(en_indic_ckpt_dir, quantization)
ip = IndicProcessor(inference=True)
src_lang, tgt_lang = "san_Deva", "hin_Deva"

print(en_indic_tokenizer.model_max_length)

def save_checkpoint(data, idx):
    """Save checkpoint."""
    path = CHECKPOINT_DIR / f"progress_{idx:04d}.json"
    with open(path, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)
    print(f"  ✓ Checkpoint: {len(data)} entries saved")

batch_size = 16 if device == 'cuda' else 1
checkpoint_interval = 50

print(f"Batch size: {batch_size}")
print(f"Checkpoint interval: {checkpoint_interval}\n")

results = []
start_time = datetime.now()

for batch_idx in tqdm(range(0, len(swami_entries), batch_size), desc="Translating"):
    batch_entries = swami_entries[batch_idx:batch_idx + batch_size]
    hindi_texts = [e.get('description', '') for e in batch_entries]

    # Translate batch
    for entry, hindi_text in zip(batch_entries, hindi_texts):
        try:
            english_text = translate_long_text(hindi_text, src_lang, tgt_lang, en_indic_model, en_indic_tokenizer, ip)

            result = {
                'id': entry.get('id'),
                'verse_id': entry.get('verse_id'),
                'verseNumber': entry.get('verseNumber'),
                'authorName': entry.get('authorName'),
                'author_id': entry.get('author_id'),
                'lang': 'english',
                'language_id': 1,
                'description': english_text,
                'source_lang': 'hindi',
                'translation_model': 'indictrans2-indic-en-1B',
                'translation_date': start_time.isoformat()
            }
            results.append(result)

        except Exception as e:
            print(f"Failed verse {entry.get('verseNumber')}: {e}")

    # Save checkpoint
    if (batch_idx + batch_size) % checkpoint_interval == 0:
        save_checkpoint(results, batch_idx + batch_size)

end_time = datetime.now()
elapsed = end_time - start_time

# ============================================
# SAVE FINAL OUTPUT
# ============================================
with open('commentary_translated.json', 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=2)

print(f"\n✓ Translation Complete!")
print(f"  Total entries: {len(swami_entries)}")
print(f"  Successfully translated: {len(results)}")
print(f"  Time elapsed: {elapsed}")
print(f"  Avg per entry: {elapsed.total_seconds() / len(swami_entries):.2f}s")
print(f"  File saved: commentary_translated.json")

# Cleanup
del en_indic_tokenizer, en_indic_model
torch.cuda.empty_cache()
print("\n✓ Memory cleaned up!")



256
Batch size: 16
Checkpoint interval: 50



Translating:  57%|█████▋    | 25/44 [39:02<24:45, 78.19s/it]

  ✓ Checkpoint: 400 entries saved


Translating:  80%|███████▉  | 35/44 [1:26:13<1:07:56, 452.96s/it]

In [None]:
# Check how many translations are actually done
if 'results' in dir():
    print(f"Total results in memory: {len(results)}")
    if len(results) > 0:
        print(f"First: Verse {results[0]['verseNumber']}")
        print(f"Last: Verse {results[-1]['verseNumber']}")
        print(f"Sample: {results[0]['description'][:100]}")
else:
    print("results variable not found")


In [None]:
import nltk
from nltk.tokenize import sent_tokenize

# Download punkt tokenizer for sentence splitting
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')

def translate_long_text(text, src_lang, tgt_lang, model, tokenizer, ip, batch_size=1):
    """
    Translate long text by splitting into sentences and translating in batches.
    Handles variable-length inputs efficiently.
    """
    # Ensure text isn't empty
    if not text or len(text.strip()) == 0:
        return ""

    try:
        # 1. Clean: Remove '।।' and normalize
        clean_text = text.replace('।।', '।').strip()
        sentences = sent_tokenize(clean_text)

        if not sentences:
            return ""

        translated_sentences = []

        # 2. Process sentences in batches
        for i in range(0, len(sentences), batch_size):
            batch = sentences[i : i + batch_size]

            # 3. Preprocess batch with IndicProcessor
            batch_prepped = ip.preprocess_batch(batch, src_lang=src_lang, tgt_lang=tgt_lang)

            # 4. Tokenize
            inputs = tokenizer(
                batch_prepped,
                truncation=True,
                padding=True,
                return_tensors="pt",
                max_length=512
            ).to(device)

            # 5. Generate translations
            with torch.no_grad():
                outputs = model.generate(
                    **inputs,
                    max_length=256,
                    num_beams=3,
                    early_stopping=True
                )

            # 6. Decode and postprocess
            for output in outputs:
                translated = tokenizer.decode(output, skip_special_tokens=True)
                translated = ip.postprocess(translated, lang=tgt_lang)
                translated_sentences.append(translated.strip())

        # Join sentences back together
        result = " ".join(translated_sentences)
        return result

    except Exception as e:
        print(f"Error translating text: {e}")
        return "[Translation Error]"

def translate_batch(texts, src_lang, tgt_lang, model, tokenizer, ip, batch_size=8):
    """Translate multiple long texts, each handling sentence-level batching."""
    translations = []

    for text in texts:
        translated = translate_long_text(
            text,
            src_lang,
            tgt_lang,
            model,
            tokenizer,
            ip,
            batch_size=batch_size
        )
        translations.append(translated)

    return translations




In [None]:
# ============================================
# FULL TRANSLATION WITH SMART BATCHING
# ============================================
batch_size = 32  # Number of entries to process together
sentence_batch = 4  # Number of sentences to process together within each entry

results = []
start_time = datetime.now()

for batch_idx in tqdm(range(0, len(swami_entries), batch_size), desc="Translating"):
    batch_entries = swami_entries[batch_idx:batch_idx + batch_size]
    hindi_texts = [e.get('description', '') for e in batch_entries]

    # Translate entire batch with sentence-level batching
    english_texts = translate_batch(
        hindi_texts,
        src_lang,
        tgt_lang,
        model,
        tokenizer,
        ip,
        batch_size=sentence_batch
    )

    # Save results
    for entry, english_text in zip(batch_entries, english_texts):
        result = {
            'id': entry.get('id'),
            'verse_id': entry.get('verse_id'),
            'verseNumber': entry.get('verseNumber'),
            'authorName': entry.get('authorName'),
            'author_id': entry.get('author_id'),
            'lang': 'english',
            'language_id': 1,
            'description': english_text,
            'source_lang': 'hindi',
            'translation_model': 'indictrans2-indic-en-1B',
            'translation_date': start_time.isoformat()
        }
        results.append(result)

    # Checkpoint
    if (batch_idx + batch_size) % 50 == 0:
        save_checkpoint(results, batch_idx + batch_size)

end_time = datetime.now()
elapsed = end_time - start_time

# Save final output
with open('commentary_translated.json', 'w', encoding='utf-8') as f:
    json.dump(results, f, ensure_ascii=False, indent=2)

print(f"\n✓ Translation Complete!")
print(f"  Total entries: {len(results)}")
print(f"  Time: {elapsed}")
print(f"  Avg per entry: {elapsed.total_seconds() / len(results):.2f}s")

# Cleanup
del tokenizer, model
torch.cuda.empty_cache()

In [18]:
del en_indic_tokenizer, en_indic_model

In [None]:
en_indic_ckpt_dir = "ai4bharat/indictrans2-en-indic-1B"  # ai4bharat/indictrans2-en-indic-dist-200M
en_indic_tokenizer, en_indic_model = initialize_model_and_tokenizer(en_indic_ckpt_dir, quantization)

ip = IndicProcessor(inference=True)

en_sents = [
    "When I was young, I used to go to the park every day.",
    "He has many old books, which he inherited from his ancestors.",
    "I can't figure out how to solve my problem.",
    "She is very hardworking and intelligent, which is why she got all the good marks.",
    "We watched a new movie last week, which was very inspiring.",
    "If you had met me at that time, we would have gone out to eat.",
    "She went to the market with her sister to buy a new sari.",
    "Raj told me that he is going to his grandmother's house next month.",
    "All the kids were having fun at the party and were eating lots of sweets.",
    "My friend has invited me to his birthday party, and I will give him a gift.",
]

src_lang, tgt_lang = "eng_Latn", "hin_Deva"
hi_translations = batch_translate(en_sents, src_lang, tgt_lang, en_indic_model, en_indic_tokenizer, ip)

print(f"\n{src_lang} - {tgt_lang}")
for input_sentence, translation in zip(en_sents, hi_translations):
    print(f"{src_lang}: {input_sentence}")
    print(f"{tgt_lang}: {translation}")

# flush the models to free the GPU memory
del en_indic_tokenizer, en_indic_model

### Indic to English Example

In [None]:
indic_en_ckpt_dir = "ai4bharat/indictrans2-indic-en-1B"  # ai4bharat/indictrans2-indic-en-dist-200M
indic_en_tokenizer, indic_en_model = initialize_model_and_tokenizer(indic_en_ckpt_dir, quantization)

ip = IndicProcessor(inference=True)

hi_sents = [
    "जब मैं छोटा था, मैं हर रोज़ पार्क जाता था।",
    "उसके पास बहुत सारी पुरानी किताबें हैं, जिन्हें उसने अपने दादा-परदादा से विरासत में पाया।",
    "मुझे समझ में नहीं आ रहा कि मैं अपनी समस्या का समाधान कैसे ढूंढूं।",
    "वह बहुत मेहनती और समझदार है, इसलिए उसे सभी अच्छे मार्क्स मिले।",
    "हमने पिछले सप्ताह एक नई फिल्म देखी जो कि बहुत प्रेरणादायक थी।",
    "अगर तुम मुझे उस समय पास मिलते, तो हम बाहर खाना खाने चलते।",
    "वह अपनी दीदी के साथ बाजार गयी थी ताकि वह नई साड़ी खरीद सके।",
    "राज ने मुझसे कहा कि वह अगले महीने अपनी नानी के घर जा रहा है।",
    "सभी बच्चे पार्टी में मज़ा कर रहे थे और खूब सारी मिठाइयाँ खा रहे थे।",
    "मेरे मित्र ने मुझे उसके जन्मदिन की पार्टी में बुलाया है, और मैं उसे एक तोहफा दूंगा।",
]
src_lang, tgt_lang = "hin_Deva", "eng_Latn"
en_translations = batch_translate(hi_sents, src_lang, tgt_lang, indic_en_model, indic_en_tokenizer, ip)


print(f"\n{src_lang} - {tgt_lang}")
for input_sentence, translation in zip(hi_sents, en_translations):
    print(f"{src_lang}: {input_sentence}")
    print(f"{tgt_lang}: {translation}")

# flush the models to free the GPU memory
del indic_en_tokenizer, indic_en_model


hin_Deva - eng_Latn
hin_Deva: जब मैं छोटा था, मैं हर रोज़ पार्क जाता था।
eng_Latn: When I was young, I used to go to the park every day.
hin_Deva: उसके पास बहुत सारी पुरानी किताबें हैं, जिन्हें उसने अपने दादा-परदादा से विरासत में पाया।
eng_Latn: She has a lot of old books, which she inherited from her grandparents.
hin_Deva: मुझे समझ में नहीं आ रहा कि मैं अपनी समस्या का समाधान कैसे ढूंढूं।
eng_Latn: I don't know how to find a solution to my problem.
hin_Deva: वह बहुत मेहनती और समझदार है, इसलिए उसे सभी अच्छे मार्क्स मिले।
eng_Latn: He is very hardworking and understanding, so he got all the good marks.
hin_Deva: हमने पिछले सप्ताह एक नई फिल्म देखी जो कि बहुत प्रेरणादायक थी।
eng_Latn: We saw a new movie last week that was very inspiring.
hin_Deva: अगर तुम मुझे उस समय पास मिलते, तो हम बाहर खाना खाने चलते।
eng_Latn: If you'd given me a pass at that time, we'd have gone out to eat.
hin_Deva: वह अपनी दीदी के साथ बाजार गयी थी ताकि वह नई साड़ी खरीद सके।
eng_Latn: She had gone to the market wit

### Indic to Indic Example


In [None]:
indic_indic_ckpt_dir = "ai4bharat/indictrans2-indic-indic-1B"  # ai4bharat/indictrans2-indic-indic-dist-320M
indic_indic_tokenizer, indic_indic_model = initialize_model_and_tokenizer(indic_indic_ckpt_dir, quantization)

ip = IndicProcessor(inference=True)

hi_sents = [
    "जब मैं छोटा था, मैं हर रोज़ पार्क जाता था।",
    "उसके पास बहुत सारी पुरानी किताबें हैं, जिन्हें उसने अपने दादा-परदादा से विरासत में पाया।",
    "मुझे समझ में नहीं आ रहा कि मैं अपनी समस्या का समाधान कैसे ढूंढूं।",
    "वह बहुत मेहनती और समझदार है, इसलिए उसे सभी अच्छे मार्क्स मिले।",
    "हमने पिछले सप्ताह एक नई फिल्म देखी जो कि बहुत प्रेरणादायक थी।",
    "अगर तुम मुझे उस समय पास मिलते, तो हम बाहर खाना खाने चलते।",
    "वह अपनी दीदी के साथ बाजार गयी थी ताकि वह नई साड़ी खरीद सके।",
    "राज ने मुझसे कहा कि वह अगले महीने अपनी नानी के घर जा रहा है।",
    "सभी बच्चे पार्टी में मज़ा कर रहे थे और खूब सारी मिठाइयाँ खा रहे थे।",
    "मेरे मित्र ने मुझे उसके जन्मदिन की पार्टी में बुलाया है, और मैं उसे एक तोहफा दूंगा।",
]
src_lang, tgt_lang = "hin_Deva", "mar_Deva"
mr_translations = batch_translate(hi_sents, src_lang, tgt_lang, indic_indic_model, indic_indic_tokenizer, ip)

print(f"\n{src_lang} - {tgt_lang}")
for input_sentence, translation in zip(hi_sents, mr_translations):
    print(f"{src_lang}: {input_sentence}")
    print(f"{tgt_lang}: {translation}")

# flush the models to free the GPU memory
del indic_indic_tokenizer, indic_indic_model


hin_Deva - mar_Deva
hin_Deva: जब मैं छोटा था, मैं हर रोज़ पार्क जाता था।
mar_Deva: मी लहान होतो तेव्हा मी दररोज उद्यानाला जायचे.
hin_Deva: उसके पास बहुत सारी पुरानी किताबें हैं, जिन्हें उसने अपने दादा-परदादा से विरासत में पाया।
mar_Deva: तिच्याकडे बरीच जुनी पुस्तके आहेत, जी तिला तिच्या आजोबांकडून वारशाने मिळाली आहेत.
hin_Deva: मुझे समझ में नहीं आ रहा कि मैं अपनी समस्या का समाधान कैसे ढूंढूं।
mar_Deva: माझ्या समस्येवर तोडगा कसा काढायचा हे मला समजत नाही.
hin_Deva: वह बहुत मेहनती और समझदार है, इसलिए उसे सभी अच्छे मार्क्स मिले।
mar_Deva: तो खूप मेहनती आणि बुद्धिमान आहे, त्यामुळे त्याला सर्व चांगले गुण मिळाले.
hin_Deva: हमने पिछले सप्ताह एक नई फिल्म देखी जो कि बहुत प्रेरणादायक थी।
mar_Deva: आम्ही गेल्या आठवड्यात एक नवीन चित्रपट पाहिला जो खूप प्रेरणादायी होता.
hin_Deva: अगर तुम मुझे उस समय पास मिलते, तो हम बाहर खाना खाने चलते।
mar_Deva: जर तुम्हाला त्या वेळी मला पास मिळाला तर आम्ही बाहेर जेवायला जाऊ.
hin_Deva: वह अपनी दीदी के साथ बाजार गयी थी ताकि वह नई साड़ी खरीद सके।
mar_Deva: ती तिच्या ब