In [1]:
# !pip install accelerate
# !pip install nltk
# !pip install mosestokenizer
# !pip install evaluate
# !pip install rouge_score
# !pip install  nltk
# !git clone https://github.com/VarunGumma/IndicTransTokenizer
# !cd IndicTransTokenizer
# !pip install --editable /kaggle/working/IndicTransTokenizer
from datetime import datetime
import os

In [2]:
def loadSentences(path):
    with open(path, 'r') as f:
        return f.readlines()
    

def writeToFile(sentences, path):
  with open (path, 'w') as f:
    for sentence in sentences:
      f.write(sentence + '\n')

## Load the sentenses

In [3]:
englishSentences = loadSentences('/kaggle/input/sentence/English.txt')
hindiSentences = loadSentences('/kaggle/input/sentence/Hindi.txt')
banglaSentences = loadSentences('/kaggle/input/sentence/Bangla.txt')

In [4]:
print(englishSentences[:10])

['We now experience how something grows naturally.\n', 'They believe that the simultaneous emergence of India and China as two large economies and major powers with strategic and decisional autonomy, has implications of regional and global significance.\n', 'We are aware of the power of the collective resolve, hard work, sacrifice and devotion of our 125 crore citizens.\n', 'We have also decided to start an annual exchange of bilateral visits by 100 young people from science-related educational streams.\n', 'With their collective efforts, they have not only changed their own fortunes but have changed fortunes of the whole region.\n', '101 of these satellites were owned by countries such as USA, Israel, Switzerland, Netherlands, Kazakhstan and UAE.\n', 'We need to support such initiative and leadership through policies and practices.\n', 'India is one of the most investor-friendly economies in the world.\n', 'We agreed that there is an urgent need for all countries to decisively act aga

# IndicTrans


In [5]:
import sys
import torch
from IndicTransTokenizer import IndicTransTokenizer, IndicProcessor
from transformers import AutoModelForSeq2SeqLM, BitsAndBytesConfig
from mosestokenizer import MosesSentenceSplitter
from nltk import sent_tokenize
from indicnlp.tokenize.sentence_tokenize import sentence_split, DELIM_PAT_NO_DANDA


en_indic_ckpt_dir = "ai4bharat/indictrans2-en-indic-dist-200M"  # ai4bharat/indictrans2-en-indic-dist-200M
indic_en_ckpt_dir = "ai4bharat/indictrans2-indic-en-dist-200M"  # ai4bharat/indictrans2-indic-en-dist-200M
indic_indic_ckpt_dir = "ai4bharat/indictrans2-indic-indic-dist-320M"  # ai4bharat/indictrans2-indic-indic-dist-320M
BATCH_SIZE = 4
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

if len(sys.argv) > 1:
    quantization = sys.argv[1]
else:
    quantization = ""


# FLORES language code mapping to 2 letter ISO language code for compatibility
# with Indic NLP Library (https://github.com/anoopkunchukuttan/indic_nlp_library)
flores_codes = {
    "asm_Beng": "as",
    "awa_Deva": "hi",
    "ben_Beng": "bn",
    "bho_Deva": "hi",
    "brx_Deva": "hi",
    "doi_Deva": "hi",
    "eng_Latn": "en",
    "gom_Deva": "kK",
    "guj_Gujr": "gu",
    "hin_Deva": "hi",
    "hne_Deva": "hi",
    "kan_Knda": "kn",
    "kas_Arab": "ur",
    "kas_Deva": "hi",
    "kha_Latn": "en",
    "lus_Latn": "en",
    "mag_Deva": "hi",
    "mai_Deva": "hi",
    "mal_Mlym": "ml",
    "mar_Deva": "mr",
    "mni_Beng": "bn",
    "mni_Mtei": "hi",
    "npi_Deva": "ne",
    "ory_Orya": "or",
    "pan_Guru": "pa",
    "san_Deva": "hi",
    "sat_Olck": "or",
    "snd_Arab": "ur",
    "snd_Deva": "hi",
    "tam_Taml": "ta",
    "tel_Telu": "te",
    "urd_Arab": "ur",
}


def split_sentences(input_text, lang):
    if lang == "eng_Latn":
        input_sentences = sent_tokenize(input_text)
        with MosesSentenceSplitter(flores_codes[lang]) as splitter:
            sents_moses = splitter([input_text])
        sents_nltk = sent_tokenize(input_text)
        if len(sents_nltk) < len(sents_moses):
            input_sentences = sents_nltk
        else:
            input_sentences = sents_moses
        input_sentences = [sent.replace("\xad", "") for sent in input_sentences]
    else:
        input_sentences = sentence_split(input_text, lang=flores_codes[lang], delim_pat=DELIM_PAT_NO_DANDA)
    return input_sentences


def initialize_model_and_tokenizer(ckpt_dir, direction, quantization):
    if quantization == "4-bit":
        qconfig = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_compute_dtype=torch.bfloat16,
        )
    elif quantization == "8-bit":
        qconfig = BitsAndBytesConfig(
            load_in_8bit=True,
            bnb_8bit_use_double_quant=True,
            bnb_8bit_compute_dtype=torch.bfloat16,
        )
    else:
        qconfig = None

    tokenizer = IndicTransTokenizer(direction=direction)
    model = AutoModelForSeq2SeqLM.from_pretrained(
        ckpt_dir,
        trust_remote_code=True,
        low_cpu_mem_usage=True,
        quantization_config=qconfig,
    )

    if qconfig == None:
        model = model.to(DEVICE)
        # model.half() gpu

    model.eval()

    return tokenizer, model


def batch_translate(input_sentences, src_lang, tgt_lang, model, tokenizer, ip):
    translations = []
    for i in range(0, len(input_sentences), BATCH_SIZE):
        batch = input_sentences[i : i + BATCH_SIZE]

        # Preprocess the batch and extract entity mappings
        batch = ip.preprocess_batch(batch, src_lang=src_lang, tgt_lang=tgt_lang)

        # Tokenize the batch and generate input encodings
        inputs = tokenizer(
            batch,
            src=True,
            truncation=True,
            padding="longest",
            return_tensors="pt",
            return_attention_mask=True,
        ).to(DEVICE)

        # Generate translations using the model
        with torch.no_grad():
            generated_tokens = model.generate(
                **inputs,
                use_cache=True,
                min_length=0,
                max_length=256,
                num_beams=5,
                num_return_sequences=1,
            )

        # Decode the generated tokens into text
        generated_tokens = tokenizer.batch_decode(generated_tokens.detach().cpu().tolist(), src=False)

        # Postprocess the translations, including entity replacement
        translations += ip.postprocess_batch(generated_tokens, lang=tgt_lang)

        del inputs
        torch.cuda.empty_cache()

    return translations


ip = IndicProcessor(inference=True)

en_indic_tokenizer, en_indic_model = initialize_model_and_tokenizer(en_indic_ckpt_dir, "en-indic", quantization)
indic_en_tokenizer, indic_en_model = initialize_model_and_tokenizer(indic_en_ckpt_dir, "indic-en", quantization)
indic_indic_tokenizer, indic_indic_model = initialize_model_and_tokenizer(
    indic_indic_ckpt_dir, "indic-indic", quantization
)

config.json:   0%|          | 0.00/1.27k [00:00<?, ?B/s]

configuration_indictrans.py:   0%|          | 0.00/14.1k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/ai4bharat/indictrans2-en-indic-dist-200M:
- configuration_indictrans.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_indictrans.py:   0%|          | 0.00/61.3k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/ai4bharat/indictrans2-en-indic-dist-200M:
- modeling_indictrans.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/1.10G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/163 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.27k [00:00<?, ?B/s]

configuration_indictrans.py:   0%|          | 0.00/14.1k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/ai4bharat/indictrans2-indic-en-dist-200M:
- configuration_indictrans.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_indictrans.py:   0%|          | 0.00/61.3k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/ai4bharat/indictrans2-indic-en-dist-200M:
- modeling_indictrans.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


pytorch_model.bin:   0%|          | 0.00/914M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


generation_config.json:   0%|          | 0.00/163 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.28k [00:00<?, ?B/s]

configuration_indictrans.py:   0%|          | 0.00/14.1k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/ai4bharat/indictrans2-indic-indic-dist-320M:
- configuration_indictrans.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_indictrans.py:   0%|          | 0.00/61.3k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/ai4bharat/indictrans2-indic-indic-dist-320M:
- modeling_indictrans.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


pytorch_model.bin:   0%|          | 0.00/1.28G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

In [6]:
%%time

import torch
from IndicTransTokenizer import IndicTransTokenizer, IndicProcessor
from transformers import AutoModelForSeq2SeqLM


CPU times: user 19 µs, sys: 8 µs, total: 27 µs
Wall time: 32.4 µs


# English to Hindi

In [7]:
%%time
# ---------------------------------------------------------------------------
#                              Hindi to English
# ---------------------------------------------------------------------------
hindiToEnglish = batch_translate(hindiSentences, "hin_Deva", "eng_Latn", indic_en_model, indic_en_tokenizer, ip)
writeToFile(hindiToEnglish, "hindiToEnglish.txt")

CPU times: user 2min 37s, sys: 5.69 s, total: 2min 43s
Wall time: 2min 43s


In [38]:
from nltk.translate.bleu_score import sentence_bleu 
from nltk.translate.bleu_score import SmoothingFunction

def calculateBLEU(originalSentences, translatedSentences):
    cc = SmoothingFunction()
    score=0
    for i in range(len(originalSentences)):
        reference = originalSentences[i].split()
        candidate = translatedSentences[i].split()
        score += sentence_bleu([reference], candidate)
    return score/len(originalSentences)


In [39]:
print(calculateBLEU(englishSentences,hindiToEnglish))

0.4381767179595883


In [19]:
%%time
# ---------------------------------------------------------------------------
#                              English to Hindi
# ---------------------------------------------------------------------------
englishToHindi = batch_translate(englishSentences, "eng_Latn", "hin_Deva", en_indic_model, en_indic_tokenizer, ip)
writeToFile(englishToHindi, "englishToHindi.txt")

CPU times: user 2min 58s, sys: 5.92 s, total: 3min 4s
Wall time: 3min 4s


In [20]:
%%time
# ---------------------------------------------------------------------------
#                              Hindi to Bangla
# ---------------------------------------------------------------------------
hindiToBangla = batch_translate(hindiSentences, "hin_Deva", "ben_Beng", indic_indic_model, indic_indic_tokenizer, ip)
writeToFile(hindiToBangla, "hindiToBangla.txt")

CPU times: user 2min 47s, sys: 5.71 s, total: 2min 52s
Wall time: 2min 52s


In [21]:
%%time
# # ---------------------------------------------------------------------------
# #                             Bangla to Hindi
# # ---------------------------------------------------------------------------
banglaToHindi = batch_translate(banglaSentences, "ben_Beng", "hin_Deva", indic_indic_model, indic_indic_tokenizer, ip)
writeToFile(banglaToHindi, 'banglaToHindi.txt')

CPU times: user 3min 9s, sys: 6.18 s, total: 3min 15s
Wall time: 3min 15s


In [32]:
# writeToFile(englishToHindi, 'IndicTrans/EnglishToHindi.txt')
# writeToFile(hindiToEnglish, 'IndicTrans/HindiToEnglish.txt')
# writeToFile(hindiToBangla, 'IndicTrans/HindiToBangla.txt')
# writeToFile(banglaToHindi, 'IndicTrans/BanglaToHindi.txt')

## BLEU Scores and ROUGH Scores 

In [36]:
import evaluate
# bleu = evaluate.load("bleu")
from nltk.tokenize import word_tokenize
rogue = evaluate.load('rouge')

# import nltk
# from nltk.translate.bleu_score import sentence_bleu 
# from nltk.translate.bleu_score import SmoothingFunction

# def calculateBLEU(originalSentences, translatedSentences):
#     score = []
#     for i in range(len(originalSentences)):
#         reference = originalSentences[i].split()
#         candidate = translatedSentences[i].split()
#         score += sentence_bleu([reference], candidate)
#     return score/len(originalSentences)

In [40]:
# def printScore(translatedSentences, originalSentences):
#     print("BLEU Score: ", calculateBLEU(originalSentences, translatedSentences))
#     print("BLEU with tokenizer Score: ", bleu.compute(predictions=translatedSentences, references=originalSentences, tokenizer=word_tokenize))
#     print("ROUGE Score: ", rogue.compute(predictions = translatedSentences, references=originalSentences))




# # Hindi to English
# print("Hindi to English")
# printScore(hindiToEnglish,englishSentences )

# # English to Hindi
# print("English to Hindi")
# printScore(englishToHindi, hindiSentences)

# # Hindi to Bangla
# print("Hindi to Bangla")
# printScore(hindiToBangla, banglaSentences)

# # Bangla to Hindi
# print("Bangla to Hindi")
# printScore(banglaToHindi,hindiSentences )

Hindi to English
BLEU Score:  0.4381767179595883
BLEU with tokenizer Score:  {'bleu': 0.39825033494705087, 'precisions': [0.6847119726842164, 0.45704627578971907, 0.32968189132318354, 0.2438155551157728], 'brevity_penalty': 1.0, 'length_ratio': 1.0058719255484156, 'translation_length': 18158, 'reference_length': 18052}
ROUGE Score:  {'rouge1': 0.710477203501947, 'rouge2': 0.501047244567161, 'rougeL': 0.6622810299232798, 'rougeLsum': 0.6624845473122015}


In [47]:
def printBeutifully(blueScore, blueScoreWithTokenizer, rougeScore):
    print("Scores")
#     print("BLUE Score\n")
    
    print("BLUE Score(Sentence BLEU)" + " : " +  str(blueScore)+ "\n")
    print("\nBLUE Score with Tokenizer\n")
    for i in blueScoreWithTokenizer:
        print(str(i)+ " : " +  str(blueScoreWithTokenizer[i]))
    print("\nROUGE Score\n")
    for i in rougeScore:
        print(str(i)+ " : " +  str(rougeScore[i]))



def printScore(translatedSentences, originalSentences):
    bleuScore = calculateBLEU(originalSentences, translatedSentences)
    blueScoreWithTokenizer =  bleu.compute(predictions=translatedSentences, references=originalSentences, tokenizer=word_tokenize)
    rougheScore = rogue.compute(predictions = translatedSentences, references=originalSentences)

    printBeutifully(bleuScore, blueScoreWithTokenizer, rougheScore)



print("==========================================================")
# Hindi to English
print("Hindi to English")
printScore(hindiToEnglish,englishSentences )
print("==========================================================")
# English to Hindi
print("English to Hindi")
printScore(englishToHindi, hindiSentences)
print("==========================================================")
# Hindi to Bangla
print("Hindi to Bangla")
printScore(hindiToBangla, banglaSentences)
print("==========================================================")
# Bangla to Hindi
print("Bangla to Hindi")
printScore(banglaToHindi,hindiSentences )
print("==========================================================")



Hindi to English
Scores
BLUE Score(Sentence BLEU) : 0.4381767179595883


BLUE Score with Tokenizer

bleu : 0.39825033494705087
precisions : [0.6847119726842164, 0.45704627578971907, 0.32968189132318354, 0.2438155551157728]
brevity_penalty : 1.0
length_ratio : 1.0058719255484156
translation_length : 18158
reference_length : 18052

ROUGE Score

rouge1 : 0.710477203501947
rouge2 : 0.501047244567161
rougeL : 0.6622810299232798
rougeLsum : 0.6624845473122015
English to Hindi
Scores
BLUE Score(Sentence BLEU) : 0.41951860706645017


BLUE Score with Tokenizer

bleu : 0.34882455158383746
precisions : [0.6316470017779214, 0.4124480382666135, 0.28428235009963165, 0.19991003727027373]
brevity_penalty : 1.0
length_ratio : 1.0137637227592986
translation_length : 18561
reference_length : 18309

ROUGE Score

rouge1 : 0.12013809523809527
rouge2 : 0.017047619047619048
rougeL : 0.11931785714285714
rougeLsum : 0.11951904761904764
Hindi to Bangla
Scores
BLUE Score(Sentence BLEU) : 0.40468429658451016


BLU

# NLLB 

In [49]:
# !mkdir nllb
# !pwd

/kaggle/working


In [50]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline

tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")

tokenizer_config.json:   0%|          | 0.00/564 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/4.85M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.3M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/3.55k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/846 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.46G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

In [51]:
%%time
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
model=model.to(device)

Using device: cuda
CPU times: user 574 ms, sys: 74 ms, total: 648 ms
Wall time: 645 ms


In [52]:

def translate_sentence(sentence, src_lang, tgt_lang):
    """Translates a single sentence."""
    inputs = tokenizer(sentence, return_tensors="pt").to(device)
    translated_ids = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang])
    translated_sentence = tokenizer.batch_decode(translated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
    return translated_sentence[0]


def translate_batch(sentences, src_lang, tgt_lang, batch_size=100):
    """Translates a batch of sentences with progress updates."""
    translations = []
    for i in range(0, len(sentences), batch_size):
        batch = sentences[i: i + batch_size]
        translations += [translate_sentence(sentence, src_lang, tgt_lang) for sentence in batch]

        if (i + batch_size) % 50 == 0:
            now = datetime.now()
            current_time = now.strftime("%H:%M:%S")
            print(f"Batch Translation Progress ({src_lang} -> {tgt_lang}): {i + batch_size}", current_time)

    return translations


In [53]:

def calculate_metrics(predictions, references):
    """Calculates BLEU and ROUGE metrics."""
    bleu_score = calculateBLEU(references, predictions)
    rouge_score = evaluate.load('rouge').compute(predictions=predictions, references=references)
    return bleu_score, rouge_score

def evaluate_translation(predictions, references):
    """Evaluates the translation using BLEU and ROUGE metrics."""
    bleu_score, rouge_score = calculate_metrics(predictions, references)
    print("Scores")
    print("\nBLUE Score" + " : " +  str(bleu_score))
    print("\nROUGE Score\n")
    for i in rouge_score:
        print(str(i) + " : " +  str(rouge_score[i]))

In [54]:

#enlish to hindi
srcSentences = englishSentences
tgtSentences = []

tgtSentences = translate_batch(srcSentences, "eng_Latn", "hin_Deva")
writeToFile(tgtSentences, 'nllb/englishToHindi.txt')


evaluate_translation(tgtSentences, hindiSentences)

the `lang_code_to_id` attribute is deprecated. The logic is natively handled in the `tokenizer.adder_tokens_decoder` this attribute will be removed in `transformers` v4.38


Batch Translation Progress (eng_Latn -> hin_Deva): 100 07:32:39
Batch Translation Progress (eng_Latn -> hin_Deva): 200 07:33:24
Batch Translation Progress (eng_Latn -> hin_Deva): 300 07:34:09
Batch Translation Progress (eng_Latn -> hin_Deva): 400 07:34:52
Batch Translation Progress (eng_Latn -> hin_Deva): 700 07:37:10
Batch Translation Progress (eng_Latn -> hin_Deva): 800 07:37:54
Batch Translation Progress (eng_Latn -> hin_Deva): 900 07:38:37
Batch Translation Progress (eng_Latn -> hin_Deva): 1000 07:39:20
Scores

BLUE Score : 0.39649608439150036

ROUGE Score

rouge1 : 0.12606428571428574
rouge2 : 0.022523809523809526
rougeL : 0.1260547619047619
rougeLsum : 0.12648214285714288


In [55]:

#hindi to english
srcSentences = hindiSentences
tgtSentences = []

tgtSentences = translate_batch(srcSentences, "hin_Deva", "eng_Latn")
writeToFile(tgtSentences, 'nllb/hindiToEnglish.txt')
evaluate_translation(tgtSentences, englishSentences)

    

Batch Translation Progress (hin_Deva -> eng_Latn): 100 07:41:54
Batch Translation Progress (hin_Deva -> eng_Latn): 200 07:42:36
Batch Translation Progress (hin_Deva -> eng_Latn): 300 07:43:18
Batch Translation Progress (hin_Deva -> eng_Latn): 400 07:43:59
Batch Translation Progress (hin_Deva -> eng_Latn): 500 07:44:43
Batch Translation Progress (hin_Deva -> eng_Latn): 600 07:45:27
Batch Translation Progress (hin_Deva -> eng_Latn): 700 07:46:11
Batch Translation Progress (hin_Deva -> eng_Latn): 800 07:46:52
Batch Translation Progress (hin_Deva -> eng_Latn): 900 07:47:34
Batch Translation Progress (hin_Deva -> eng_Latn): 1000 07:48:14
Scores

BLUE Score : 0.4006749182856718

ROUGE Score

rouge1 : 0.6632913008000563
rouge2 : 0.4363122996990892
rougeL : 0.6154112365033515
rougeLsum : 0.6156424321620952


In [56]:
#hindi to bangla
srcSentences = hindiSentences
tgtSentences = []

tgtSentences = translate_batch(srcSentences, "hin_Deva", "ben_Beng")
writeToFile(tgtSentences, 'nllb/hindiToBangla.txt')
evaluate_translation(tgtSentences, banglaSentences)

Batch Translation Progress (hin_Deva -> ben_Beng): 100 07:49:01
Batch Translation Progress (hin_Deva -> ben_Beng): 200 07:49:47
Batch Translation Progress (hin_Deva -> ben_Beng): 300 07:50:32
Batch Translation Progress (hin_Deva -> ben_Beng): 400 07:51:17
Batch Translation Progress (hin_Deva -> ben_Beng): 500 07:52:05
Batch Translation Progress (hin_Deva -> ben_Beng): 600 07:52:52
Batch Translation Progress (hin_Deva -> ben_Beng): 700 07:53:39
Batch Translation Progress (hin_Deva -> ben_Beng): 800 07:54:24
Batch Translation Progress (hin_Deva -> ben_Beng): 900 07:55:08
Batch Translation Progress (hin_Deva -> ben_Beng): 1000 07:55:53
Scores

BLUE Score : 0.4042740409800657

ROUGE Score

rouge1 : 0.003
rouge2 : 0.0
rougeL : 0.003
rougeLsum : 0.003


In [57]:
#bangla to hindi
srcSentences = banglaSentences
tgtSentences = []

tgtSentences = translate_batch(srcSentences, "ben_Beng", "hin_Deva")
writeToFile(tgtSentences, 'nllb/banglaToHindi.txt')
evaluate_translation(tgtSentences, hindiSentences)

Batch Translation Progress (ben_Beng -> hin_Deva): 100 07:56:38
Batch Translation Progress (ben_Beng -> hin_Deva): 200 07:57:23
Batch Translation Progress (ben_Beng -> hin_Deva): 300 07:58:07
Batch Translation Progress (ben_Beng -> hin_Deva): 400 07:58:52
Batch Translation Progress (ben_Beng -> hin_Deva): 500 07:59:38
Batch Translation Progress (ben_Beng -> hin_Deva): 600 08:00:25
Batch Translation Progress (ben_Beng -> hin_Deva): 900 08:02:40
Batch Translation Progress (ben_Beng -> hin_Deva): 1000 08:03:23
Scores

BLUE Score : 0.3530685342038333

ROUGE Score

rouge1 : 0.10510238095238096
rouge2 : 0.0141
rougeL : 0.10499880952380954
rougeLsum : 0.10570714285714287
