<a href="https://colab.research.google.com/github/ebhawana/multilingual-chatbot/blob/main/Multilingual_chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install transformers torch sentencepiece langid



In [None]:
from langdetect import detect
from transformers import (AutoTokenizer, AutoModelForSeq2SeqLM,AutoModelForCausalLM,
                          XLMRobertaTokenizer, XLMRobertaForSequenceClassification,
                          AutoModelForSequenceClassification,
                          MBartForConditionalGeneration, MBart50TokenizerFast)
import torch

LANG_CODES = {
    'fr': 'French',
    'de': 'German',
    'es': 'Spanish',
    'hi': 'Hindi',
    'en': 'English'
}

In [None]:
'''
# Load MarianMT models dynamically
def get_translation_model(src_lang, tgt_lang):
    model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    return (tokenizer, model)


# Translate text
def translate(text, tokenizer, model):
    inputs = tokenizer(text, return_tensors="pt", padding=True,truncation=True)
    translated = model.generate(**inputs)
    return tokenizer.batch_decode(translated, skip_special_tokens=True)[0]

'''

'\n# Load MarianMT models dynamically\ndef get_translation_model(src_lang, tgt_lang):\n    model_name = f"Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}"\n    tokenizer = AutoTokenizer.from_pretrained(model_name)\n    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)\n    return (tokenizer, model)\n\n\n# Translate text\ndef translate(text, tokenizer, model):\n    inputs = tokenizer(text, return_tensors="pt", padding=True,truncation=True)\n    translated = model.generate(**inputs)\n    return tokenizer.batch_decode(translated, skip_special_tokens=True)[0]\n  \n'

In [None]:
# MBART for translation
mbart_model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
mbart_tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
lang_map = {
    "en": "en_XX",
    "hi": "hi_IN",
    "fr": "fr_XX",
    "es": "es_XX",
    "de": "de_DE",
    "bn": "bn_IN",
    "mr": "mr_IN",
    "ta": "ta_IN",
    "te": "te_IN",
}

In [None]:
def translate(text, src_lang, tgt_lang):
    mbart_tokenizer.src_lang = lang_map[src_lang]
    encoded = mbart_tokenizer(text, return_tensors="pt")
    generated_tokens = mbart_model.generate(**encoded)
    return mbart_tokenizer.decode(generated_tokens[0], skip_special_tokens=True)

In [None]:
# Load Intent Classifier
intent_tokenizer = AutoTokenizer.from_pretrained("Falconsai/intent_classification")
intent_model = AutoModelForSequenceClassification.from_pretrained("Falconsai/intent_classification")
intent_labels = {
    0: "weather",
    1: "greeting",
    2: "goodbye",
    3: "thanks",
    4: "joke"
}

In [None]:
def classify_intent(text):
    inputs = intent_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = intent_model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=1)
        conf, pred = torch.max(probs, dim=1)
    if conf.item() < 0.6:
        return "unknown"
    return intent_labels.get(pred.item(), "unknown")


In [None]:
# DialoGPT for dialogue
dialogue_tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
dialogue_model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")

In [None]:
# Chat history memory
chat_history_ids = None

def generate_dialogue_response(user_input_en):
    global chat_history_ids
    new_input_ids = dialogue_tokenizer.encode(user_input_en + dialogue_tokenizer.eos_token, return_tensors='pt')

    bot_input_ids = torch.cat([chat_history_ids, new_input_ids], dim=-1) if chat_history_ids is not None else new_input_ids
    chat_history_ids = dialogue_model.generate(bot_input_ids, max_length=2000, pad_token_id=dialogue_tokenizer.eos_token_id)

    response = dialogue_tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
    return response

In [None]:
# Rule-based chatbot responses in English
'''responses = {
    "hello": "Hello! How can I help you?",
    "how are you": "I'm just a bot, but I'm doing fine!",
    "what is your name": "I'm a multilingual chatbot.",
    "bye": "Goodbye! Have a great day!",
    "default": "I'm not sure how to respond to that.",
    "thanks": "You're welcome!",
    "sorry": "It's okay.",
    }

# Response generator
def get_response(user_input):
    user_input = user_input.lower()
    for key in responses:
        if key in user_input:
            return responses[key]
    return "Sorry, I didn’t understand that."
'''


'responses = {\n    "hello": "Hello! How can I help you?",\n    "how are you": "I\'m just a bot, but I\'m doing fine!",\n    "what is your name": "I\'m a multilingual chatbot.",\n    "bye": "Goodbye! Have a great day!",\n    "default": "I\'m not sure how to respond to that.",\n    "thanks": "You\'re welcome!",\n    "sorry": "It\'s okay.",\n    }\n\n# Response generator\ndef get_response(user_input):\n    user_input = user_input.lower()\n    for key in responses:\n        if key in user_input:\n            return responses[key]\n    return "Sorry, I didn’t understand that."\n'

In [None]:
import langid

def detect_language(text):
    lang, _ = langid.classify(text)
    return lang

In [None]:
# Chatbot pipeline
def multilingual_chatbot(user_input):
    detected_lang = detect_language(user_input)
    print("Detected language: ",detected_lang)
    if detected_lang not in lang_map:
        return "Sorry, I don't support this language yet."

    #translate first to english
    if detected_lang != "en":
        #tokenizer, model = get_translation_model(detected_lang, "en")
        input_en = translate(user_input, detected_lang, "en")
    else:
        input_en = user_input

    #Classify intent
    intent = classify_intent(input_en)
    print("Intent: ",intent)

     #Generate response in English
    #prompt = f"As a friendly assistant, answer this {intent} message: {input_en}"
    #response_en = generate_dialogue_response(prompt)


    response_en = generate_dialogue_response(input_en)
    print("Response in English: ",input_en)


    #Translate back to english
    if detected_lang != "en":
        #tokenizer_back, model_back = get_translation_model("en", detected_lang)
        response_final = translate(response_en, detected_lang, "en")
    else:
        response_final = response_en

    return response_final



In [None]:
# Example interaction
print("Multilingual Chatbot\n")
print("Hi I am MBot.(Type 'quit' to exit)")
while True:
  user_input = input("\nYou: ")
  if user_input.lower() in ["exit", "quit"]:
    break
  response = multilingual_chatbot(user_input)
  print("Bot:", response)

Multilingual Chatbot

Hi I am MBot.(Type 'quit' to exit)

You: आप कैसे हैं
Detected language:  hi
Intent:  unknown
Response in English:  How are you
Bot: I 'm good, how are you?

You: Quit
