In [None]:
from transformers import MarianMTModel, MarianTokenizer
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.tag import pos_tag
import re

In [None]:
lemmatizer = WordNetLemmatizer()

In [None]:
model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-hi")
tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-hi")

Downloading (…)olve/main/source.spm:   0%|          | 0.00/812k [00:00<?, ?B/s]

Downloading (…)olve/main/target.spm:   0%|          | 0.00/1.07M [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/2.10M [00:00<?, ?B/s]



In [None]:
def spot_nouns_verbs(en_article):
    words = word_tokenize(en_article)
    tagged_words = pos_tag(words)
    auxiliary_verbs = ['am', 'is', 'are', 'was', 'were', 'has', 'had']
    nouns = [words for words, pos in tagged_words if pos.startswith('NN')]
    # avoids auxiliary verbs
    verbs = [word for word, pos in tagged_words if pos.startswith('VB')and word not in auxiliary_verbs]
    # to bring verbs to its base form
    verbs = [lemmatizer.lemmatize(verb, pos='v') for verb in verbs]
    # dictionary to store english and HINGLISH nouns
    translated_words = {'feedback':'प्रतिक्रिया', 'definitely':'निश्चित', 'section':'खण्ड'}
    for noun in nouns:
            hi_noun = hi_translation(noun)
            translated_words[noun] = hi_noun
    for verb in verbs:
        hi_verb = hi_translation(verb)
        modified_value = hi_verb.split(' ', 1)[0]  # Split by the first space and keep the first part
        translated_words[verb] = modified_value
    return translated_words

In [None]:
# English to Hindi Translation
def hi_translation(en_article):
    inputs = tokenizer.encode(en_article, return_tensors="pt")
    translated_id = model.generate(inputs, max_length=150, num_return_sequences=1, num_beams=4)
    translated_output=tokenizer.decode(translated_id[0], skip_special_tokens=True)
    translated_output = translated_output.replace('\u200d', '') # handle ZWJ characters
    return translated_output

In [None]:
# Switching Hindi nouns to English nouns to keep certain words in English
def noun_switch(nouns, hinglish_text):
    for key, value in nouns.items():
        matches = re.findall(r'\b' + re.escape(value) + r'\b', hinglish_text)
        for match in matches:
            hinglish_text = hinglish_text.replace(match, key)
    return hinglish_text

In [None]:
import nltk
nltk.download('punkt')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
import nltk
nltk.download('averaged_perceptron_tagger')


[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [None]:
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [None]:
# SENTENCE 1
en_article = input("INPUT : ")
nouns = spot_nouns_verbs(en_article)
hi_text = hi_translation(en_article)
# Loop through the dictionary and perform replacements
for eng_word, hin_word in nouns.items():
    hi_text = hi_text.replace(hin_word, eng_word)

print(hi_text)

INPUT : Definitely share your feedback in the comment section.
comment खण्ड में अपनी प्रतिक्रिया को definitely ही share करें ।


In [None]:
# SENTENCE 2
en_article = input("INPUT : ")
nouns = spot_nouns_verbs(en_article)
hi_text = hi_translation(en_article)
# Loop through the dictionary and perform replacements
for eng_word, hin_word in nouns.items():
    hi_text = hi_text.replace(hin_word, eng_word)

print(hi_text)

INPUT : So even if it's a big video, I will clearly mention all the products.
तो यह एक बड़ा video है, तो भी मैं स्पष्ट रूप से सभी productsों का mention करेंगे।


In [None]:
# SENTENCE 3
en_article = input("INPUT : ")
nouns = spot_nouns_verbs(en_article)
hi_text = hi_translation(en_article)

# Loop through the dictionary and perform replacements
for eng_word, hin_word in nouns.items():
    hi_text = hi_text.replace(hin_word, eng_word)

print(hi_text)

INPUT : I was waiting for my bag.
मैं अपने बैग के लिए wait कर रहा था.
