In [1]:
from transformers import MarianMTModel, MarianTokenizer
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
import re

In [2]:
model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-hi")
tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-hi")



In [3]:
def spot_nouns(en_article):
    words = word_tokenize(en_article)
    tagged_words = pos_tag(words)
    nouns = [words for words, pos in tagged_words if pos.startswith('NN')]
    translated_nouns = {} # dictionary to store english and HINGLISH nouns
    for noun in nouns:
        translated_noun = hi_translation(noun)
        hinglish_noun = hinglish_translate(translated_noun)
        translated_nouns[noun] = hinglish_noun        
    return translated_nouns

In [4]:
# English to Hindi Translation
def hi_translation(en_article):
    inputs = tokenizer.encode(en_article, return_tensors="pt")
    translated_id = model.generate(inputs, max_length=150, num_return_sequences=1, num_beams=4)
    translated_output=tokenizer.decode(translated_id[0], skip_special_tokens=True)  
    translated_output = translated_output.replace('\u200d', '') # handle ZWJ characters
    return translated_output

In [14]:
# Hindi to English Transliteration 
def hinglish_translate(hi_text):
    consonant = {'क':['ka','k'],'ख':['kha','kh'],'ग':['ga','g'],'घ':['gha','gh'],'च':['cha','ch'],'छ':['chha','chh'],'ज':['ja','j'],'झ':['jha','jh'],'ट':['ta','t'],'ठ':['tha','th'],'ड':['da','d'],'ढ':['dha','dh'],'ण':['nda', 'nd'],'त':['ta','t'],'थ':['tha','th'],'द':['da','d'],'ध':['dha','dh'],'न':['na','n'],'प':['pa','p'],'फ':['pha','ph'],'ब':['ba','b'],'भ':['bha','bh'],'म':['ma','m'],'य':['ya','y'],'र':['ra','r'],'ल':['la','l'],'व':['va','v'],'श':['sha','sh'],'ष':['shha','shh'],'स':['sa','s'],'ह':['ha','h'],'ञ':['gya','gy'],'ण':['da','d']}
    vowel = {'अ':['a'],'आ':['aa'],'इ':['e'],'ई':['e'],'उ':['u'],'ऊ':['u'],'ए':['e'],'ऐ':['ae'],'ओ':['o'],'औ':['ao'],'ा':['a'],'ि':['i'],'ी':['ee'],'ु':['u'],'ू':['u'],'े':['e'],'ै':['ae'],'ो':['o'],'ौ':['ao'],'ं':['n'],'ँ':['n'],'्':['a'], '़':[''],'ः':['ah'], '।': ['.']}
    words = hi_text.split(' ')
    hinglish = ''
    for j in range(len(words)):
        w = words[j]
        for i in range(len(w)):
            char = w[i]            
            if char in vowel:
                hinglish += vowel[char][0]
            elif char in consonant:
                if i+1 == len(w) or i+1 < len(w) and w[i+1] in vowel:
                    hinglish += consonant[char][1]
                else:
                    hinglish += consonant[char][0]
            else:
                hinglish += char
        hinglish += ' '
    hinglish = hinglish.replace('\u200d', '') # handle ZWJ characters
    return hinglish.strip()

In [12]:
# Switching Hindi nouns to English nouns to keep certain words in English
def noun_switch(nouns, hinglish_text):
    for key, value in nouns.items():
        matches = re.findall(r'\b' + re.escape(value) + r'\b', hinglish_text)    
        for match in matches:
            hinglish_text = hinglish_text.replace(match, key)
    return hinglish_text

In [20]:
# SENTENCE 1
en_article = input("INPUT : ")
nouns = spot_nouns(en_article)
hi_text = hi_translation(en_article)
hinglish_text = hinglish_translate(hi_text)
output = noun_switch(nouns, hinglish_text)

print("\nOUTPUT : ", output)

INPUT : Definitely share your feedback in the comment section.

OUTPUT :  comment khadad men apanee paratikariya ko nishachit hee share karen .


In [21]:
# SENTENCE 2
en_article = input("INPUT : ")
nouns = spot_nouns(en_article)
hi_text = hi_translation(en_article)
hinglish_text = hinglish_translate(hi_text)
output = noun_switch(nouns, hinglish_text)

print("\nOUTPUT : ", output)

INPUT : So even if it's a big video, I will clearly mention all the products. 

OUTPUT :  to yah ek bada video hae, to bhee maen sapashhat rup se sabhee utapadon ka ulalekh karenge.


In [22]:
# SENTENCE 3
en_article = input("INPUT : ")
nouns = spot_nouns(en_article)
hi_text = hi_translation(en_article)
hinglish_text = hinglish_translate(hi_text)
output = noun_switch(nouns, hinglish_text)

print("\nOUTPUT : ", output)

INPUT : I was waiting for my bag.

OUTPUT :  maen apane baeg ke lie entajar kar raha tha.
