In [1]:
from indic_transliteration import sanscript
from indic_transliteration.sanscript import transliterate
import spacy
import random
from googletrans import Translator
from nltk.translate.bleu_score import sentence_bleu

def extract_top_keywords(sentence, num_keywords=15):
    # Load English language model
    nlp = spacy.load("en_core_web_sm")

    # Process the input sentence
    doc = nlp(sentence)

    # Extract keywords (nouns and adjectives)
    keywords = [token.text.lower() for token in doc if token.pos_ in ["NOUN", "ADJ"]]

    return keywords

def translate_to_hindi(number):
    hindi_numerals = {
        '0': '०',
        '1': '१',
        '2': '२',
        '3': '३',
        '4': '४',
        '5': '५',
        '6': '६',
        '7': '७',
        '8': '८',
        '9': '९',
    }

    # Check if the character is a digit, otherwise keep it unchanged
    return ''.join(hindi_numerals[digit] if digit.isdigit() else digit for digit in str(number))




def translate_to_hinglish(sentence):
    keywords = extract_top_keywords(sentence)
    words = sentence.split()
    translated_words = []

    for word in words:
        if word.lower() in keywords:
            translated_words.append(word)
        elif word.isdigit():
            hindi_number = translate_to_hindi(word)
            translated_words.append(hindi_number)
        else:
            transliterated_word = transliterate(word, sanscript.ITRANS, sanscript.DEVANAGARI)
            if random.random() > 0.5:
                translated_word = translate(transliterated_word, 'hi')
            else:
                translated_word = transliterated_word

            # Check for None values in the translation
            if translated_word is not None:
                # Handle the case where translation result is a list
                if isinstance(translated_word, list):
                    translated_word = translated_word[0].text
                translated_words.append(translated_word)

    return ' '.join(translated_words)

def translate(text, target_language='hi'):
    translator = Translator()
    translation = translator.translate(text, dest=target_language)
    # Check for None values in the translation
    return translation.text if translation is not None else text

def calculate_bleu(reference, candidate):
    reference = [reference.split()]
    candidate = candidate.split()
    return sentence_bleu(reference, candidate)

def main():
    sentences = ["I had about a 30 minute demo just using this new headset",
                 "Definitely share your feedback in the comment section.",
                 "So even if it's a big video, I will clearly mention all the products.",
                 "I was waiting for my bag."
                 ]

    total_bleu_score = 0.0

    for sentence in sentences:
        hinglish_sentence = translate_to_hinglish(sentence)
        reference_sentence = translate_to_hindi(sentence)

        print(f"Input Sentence: {sentence}")
        print(f"Hinglish Translation: {hinglish_sentence}")

        bleu_score = calculate_bleu(reference_sentence, hinglish_sentence)
        total_bleu_score += bleu_score

        print(f"Bleu Score: {bleu_score}")
        print()

    average_bleu_score = total_bleu_score / len(sentences)
    print(f"Average Bleu Score: {average_bleu_score}")

if __name__ == "__main__":
    main()

Input Sentence: I had about a 30 minute demo just using this new headset
Hinglish Translation: ई हद् अबोउत् ए ३० minute demo अभी उसिन्ग् थिस् new headset
Bleu Score: 3.9876353728947065e-78



The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


Input Sentence: Definitely share your feedback in the comment section.
Hinglish Translation: डेफ़िनितेल्य् शेयर करना योउर् feedback इन् थे comment सेच्तिओन्।
Bleu Score: 1.2508498911928379e-231



The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


Input Sentence: So even if it's a big video, I will clearly mention all the products.
Hinglish Translation: दिखाओ एवेन् इफ़् इत्'स् ए big विदेओ, ई विल्ल् च्लेअर्ल्य् मेन्तिओन् अल्ल् थे प्रोदुच्त्स्।
Bleu Score: 9.418382295637229e-232

Input Sentence: I was waiting for my bag.
Hinglish Translation: ई वस् वैतिन्ग् फ़ोर् म्य् बग्।
Bleu Score: 0

Average Bleu Score: 9.969088432236766e-79
