In [21]:
# English, French, Hindi Translator Chatbot using pre-trained model using Hugging Face Transformers library

# Import required libraries
from transformers import pipeline, AutoModelForSeq2SeqLM
from transformers import AutoTokenizer
#import torch
#import random

In [22]:
# Define Path for en, fr, hi model
en_fr_model_path = "Helsinki-NLP/opus-mt-en-fr"
fr_en_model_path = "Helsinki-NLP/opus-mt-fr-en"
en_hi_model_path = "Helsinki-NLP/opus-mt-en-hi"
hi_en_model_path = "Helsinki-NLP/opus-mt-hi-en"

In [23]:
# Load pre-trained tokenizer
en_fr_tokenizer = AutoTokenizer.from_pretrained(en_fr_model_path)
fr_en_tokenizer = AutoTokenizer.from_pretrained(fr_en_model_path)
en_hi_tokenizer = AutoTokenizer.from_pretrained(en_hi_model_path)
hi_en_tokenizer = AutoTokenizer.from_pretrained(hi_en_model_path)

In [24]:
# Load pre-trained models for en, fr, hi languages
en_fr_model = AutoModelForSeq2SeqLM.from_pretrained(en_fr_model_path)
fr_en_model = AutoModelForSeq2SeqLM.from_pretrained(fr_en_model_path)
en_hi_model = AutoModelForSeq2SeqLM.from_pretrained(en_hi_model_path)
hi_en_model = AutoModelForSeq2SeqLM.from_pretrained(hi_en_model_path)

In [25]:
# Create translation class

class TranslatorBot:
    def __init__(self):
        self.translator = {}
        self.translator['en_fr'] = pipeline("translation_en_to_fr", model=en_fr_model, tokenizer=en_fr_tokenizer)
        self.translator['fr_en'] = pipeline("translation_fr_to_en", model=fr_en_model, tokenizer=fr_en_tokenizer)
        self.translator['en_hi'] = pipeline("translation_en_to_hi", model=en_hi_model, tokenizer=en_hi_tokenizer)
        self.translator['hi_en'] = pipeline("translation_hi_to_en", model=hi_en_model, tokenizer=hi_en_tokenizer)
        
    def translate(self, text, source_lang, target_lang):
        if source_lang == target_lang:
            return text
        
        translator = self.translator.get(f'{source_lang}_{target_lang}')
        if translator is None:
            return f'Sorry, I don\'t support {source_lang}_{target_lang} translation.'
        
        return translator(text, max_length=400)[0]['translation_text']

# Include argument device=0 in pipeline function to run torch library on GPU

# You can also use following line of code for direct use of pre-trained model pipelines
#en_fr_translator = pipeline("translation_en_to_fr")
#fr_en_translator = pipeline("translation_fr_to_en")
#en_hi_translator = pipeline("translation_en_to_hi")
#hi_en_translator = pipeline("translation_hi_to_en")

In [26]:
# Define the greeting messages for the chatbot
#greetings = ["Hello! I'm a language translator chatbot. How can I help you today?",
#             "Hi there! I can translate between English, French, and Hindi. What do you need translated?",
#             "Greetings! What can I do for you today?"]

# Define the goodbye messages for the chatbot
#goodbyes = ["Goodbye! Have a great day!",
#            "See you later!",
#            "Take care!"]

# Define a function to generate a random greeting message
#def get_greeting():
#    return random.choice(greetings)

# Define a function to generate a random goodbye message
#def get_goodbye():
#    return random.choice(goodbyes)

In [27]:
# Define a function to translate text from one language to another
#def translate(text, source_lang, target_lang):
#    if source_lang == "en" and target_lang == "fr":
#        return en_fr_translator(text, max_length=100)[0]['translation_text']
#    elif source_lang == "fr" and target_lang == "en":
#        return fr_en_translator(text, max_length=100)[0]['translation_text']
#    elif source_lang == "en" and target_lang == "hi":
#        return en_hi_translator(text, max_length=100)[0]['translation_text']
#    elif source_lang == "hi" and target_lang == "en":
#        return hi_en_translator(text, max_length=100)[0]['translation_text']
#    else:
#        return "Sorry, I don't support that translation."

In [28]:
# Define the main function for the chatbot
#def main():
#    print(get_greeting())
#    while True:
        # Get input from the user
#        user_input = input("You: ")

        # Check if the user wants to quit
#        if user_input.lower() in ["bye", "goodbye", "exit"]:
#            print(get_goodbye())
#            break

        # Parse the user input for the source and target languages and the text to translate
#        try:
#            source_lang, target_lang, text = user_input.split(maxsplit=2)
#        except ValueError:
#            print("Sorry, I didn't understand that. Please try again.")
#            continue

        # Translate the text
#        translated_text = translate(text, source_lang, target_lang)

        # Print the translated text
#        print(f"Bot: {translated_text}")
        
#if __name__ == "__main__":
#    main()

In [29]:
# Custom built nlp translator

#import torch
#import torch.nn as nn
#import torch.optim as optim
#from torchtext.datasets import IMDB
#from torchtext.data import Field, LabelField, BucketIterator

# Define the text and label fields
#TEXT = Field(tokenize='spacy', lower=True)
#LABEL = LabelField(dtype=torch.float)

# Load the IMDB dataset
#train_data, test_data = IMDB.splits(TEXT, LABEL)

# Build the vocabulary
#TEXT.build_vocab(train_data, max_size=10000, vectors='glove.6B.100d')
#LABEL.build_vocab(train_data)

# Define the model architecture
#class SentimentClassifier(nn.Module):
#    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
#        super().__init__()
#        self.embedding = nn.Embedding(vocab_size, embedding_dim)
#        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=2, bidirectional=True, dropout=0.5)
#        self.fc = nn.Linear(hidden_dim * 2, output_dim)
#        self.dropout = nn.Dropout(0.5)
        
#    def forward(self, text):
#        embedded = self.dropout(self.embedding(text))
#        output, (hidden, cell) = self.lstm(embedded)
#        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1))
#        return self.fc(hidden)

# Instantiate the model
#model = SentimentClassifier(len(TEXT.vocab), 100, 256, 1)

# Define the loss function and optimizer
#criterion = nn.BCEWithLogitsLoss()
#optimizer = optim.Adam(model.parameters())

# Split the data into batches
#BATCH_SIZE = 64
#train_iterator, test_iterator = BucketIterator.splits((train_data, test_data), batch_size=BATCH_SIZE)

# Train the model
#N_EPOCHS = 10
#for epoch in range(N_EPOCHS):
#    train_loss = 0.0
#    train_acc = 0.0
#    model.train()
#    for batch in train_iterator:
#        optimizer.zero_grad()
#        text = batch.text
#        label = batch.label
#        predictions = model(text).squeeze(1)
#        loss = criterion(predictions, label)
#        loss.backward()
#        optimizer.step()
#        train_loss += loss.item()
#        train_acc += ((predictions > 0.5).float() == label).float().mean().item()
#    train_loss /= len(train_iterator)
#    train_acc /= len(train_iterator)
#    print(f'Epoch: {epoch+1:02}, Train Loss: {train_loss:.3f}, Train Acc: {train_acc:.3f}')
    
    # Evaluate the model on the test set
#    test_loss = 0.0
#    test_acc = 0.0
#    model.eval()
#    with torch.no_grad():
#        for batch in test_iterator:
#            text = batch.text
#            label = batch.label
#            predictions = model(text).squeeze(1)
#            loss = criterion(predictions, label)
#            test_loss += loss.item()
#            test_acc += ((predictions > 0.5).float() == label).float().mean().item()
#    test_loss /= len(test_iterator)
#    test_acc /= len(test_iterator)
#    print(f'Epoch: {epoch+1:02}, Test Loss: {test_loss:.3f}, Test Acc: {test_acc:.3f}')
