In [None]:
pip install nltk textblob langdetect


Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: langdetect
  Building wheel for langdetect (setup.py) ... [?25l[?25hdone
  Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993222 sha256=71d6bfdf269f7e04ff360c3cbfb7d79b485c136b2ad344037f845b8437748d42
  Stored in directory: /root/.cache/pip/wheels/95/03/7d/59ea870c70ce4e5a370638b5462a7711ab78fba2f655d05106
Successfully built langdetect
Installing collected packages: langdetect
Successfully installed langdetect-1.0.9


In [None]:
import nltk
nltk.download('punkt')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
from nltk.translate.bleu_score import sentence_bleu
from textblob import TextBlob
from langdetect import detect

# Sample Inputs and Bot Responses
inputs = [
    "मेरे परिवार में मानसिक स्वास्थ्य के बारे में बात करना एक टैबू है।",
    "त्योहारों के दौरान मुझे हमेशा अपने परिवार के लिए खुश रहना पड़ता है, लेकिन अंदर से मैं तनाव में रहता हूँ।"
]

expected_responses = [
    "यह समझना मुश्किल है, लेकिन किसी प्रोफेशनल से बात करना फायदेमंद हो सकता है।",
    "आपके खुद के लिए समय निकालना महत्वपूर्ण है और अपने परिवार के साथ अपनी भावनाओं को साझा करना भी सहायक हो सकता है।"
]

bot_responses = [
    "Oh, I see! It can be challenging to open up to family members, especially when it comes to mental health. Have you considered talking to a professional?",
    "It's important to prioritize your own happiness while also taking care of your family's needs. Take time for yourself each day."
]

# Cultural Keywords for Sensitivity Check (Example for Hindi context)
cultural_keywords = {
    "Hindi": ["परिवार", "त्योहार", "टैबू", "प्रोफेशनल", "खुश रहना"]
}

# Function to Calculate BLEU Score
def calculate_bleu(reference, response):
    reference_tokens = [reference.split()]
    response_tokens = response.split()
    score = sentence_bleu(reference_tokens, response_tokens)
    return score

# Function for Sentiment Analysis
def analyze_sentiment(response):
    analysis = TextBlob(response)
    return analysis.sentiment.polarity

# Function for Language Detection
def detect_language(response):
    try:
        language = detect(response)
        return language
    except Exception as e:
        return str(e)

# Function for Cultural Sensitivity Check
def check_cultural_sensitivity(language, response):
    keywords = cultural_keywords.get(language, [])
    matched_keywords = [word for word in keywords if word in response]
    sensitivity_score = len(matched_keywords) / len(keywords) if keywords else 0
    return sensitivity_score, matched_keywords

# Evaluation
def evaluate_responses(inputs, expected_responses, bot_responses, language_code):
    results = []

    for i in range(len(inputs)):
        input_text = inputs[i]
        expected = expected_responses[i]
        bot_response = bot_responses[i]

        # BLEU Score
        bleu_score = calculate_bleu(expected, bot_response)

        # Sentiment Analysis
        sentiment = analyze_sentiment(bot_response)

        # Language Detection
        detected_language = detect_language(bot_response)

        # Cultural Sensitivity Check
        sensitivity_score, matched_keywords = check_cultural_sensitivity(language_code, bot_response)

        # Check if the response language matches the input language
        input_language = detect(input_text)
        language_match = (detected_language == input_language)

        results.append({
            "Input": input_text,
            "Expected Response": expected,
            "Bot Response": bot_response,
            "BLEU Score": round(bleu_score, 2),
            "Sentiment": round(sentiment, 2),
            "Detected Language": detected_language,
            "Language Match": language_match,
            "Cultural Sensitivity Score": round(sensitivity_score, 2),
            "Matched Cultural Keywords": matched_keywords
        })

    return results

# Run Evaluation for Hindi
results = evaluate_responses(inputs, expected_responses, bot_responses, "Hindi")

# Display Results
for result in results:
    print("Input:", result["Input"])
    print("Expected Response:", result["Expected Response"])
    print("Bot Response:", result["Bot Response"])
    print("BLEU Score:", result["BLEU Score"])
    print("Sentiment:", result["Sentiment"])
    print("Detected Language:", result["Detected Language"])
    print("Language Match:", result["Language Match"])
    print("Cultural Sensitivity Score:", result["Cultural Sensitivity Score"])
    print("Matched Cultural Keywords:", result["Matched Cultural Keywords"])
    print("-" * 50)


Input: मेरे परिवार में मानसिक स्वास्थ्य के बारे में बात करना एक टैबू है।
Expected Response: यह समझना मुश्किल है, लेकिन किसी प्रोफेशनल से बात करना फायदेमंद हो सकता है।
Bot Response: Oh, I see! It can be challenging to open up to family members, especially when it comes to mental health. Have you considered talking to a professional?
BLEU Score: 0
Sentiment: 0.1
Detected Language: en
Language Match: False
Cultural Sensitivity Score: 0.0
Matched Cultural Keywords: []
--------------------------------------------------
Input: त्योहारों के दौरान मुझे हमेशा अपने परिवार के लिए खुश रहना पड़ता है, लेकिन अंदर से मैं तनाव में रहता हूँ।
Expected Response: आपके खुद के लिए समय निकालना महत्वपूर्ण है और अपने परिवार के साथ अपनी भावनाओं को साझा करना भी सहायक हो सकता है।
Bot Response: It's important to prioritize your own happiness while also taking care of your family's needs. Take time for yourself each day.
BLEU Score: 0
Sentiment: 0.57
Detected Language: en
Language Match: False
Cultural Sensitivity S

In [None]:
# Function for Cultural Sensitivity Check (Language Agnostic)
def check_cultural_sensitivity(response, cultural_context):
    matched_keywords = [word for word in cultural_context if word in response.lower()]
    sensitivity_score = len(matched_keywords) / len(cultural_context) if cultural_context else 0
    return sensitivity_score, matched_keywords

# Example Cultural Context for Hindi Input
cultural_context = ["family", "festival", "obligations", "stress", "sharing feelings"]

# Bot Response (in English)
bot_response = "It's important to prioritize your own happiness while also taking care of your family's needs. Take time for yourself each day to relax."

# Test Cultural Sensitivity
sensitivity_score, matched_keywords = check_cultural_sensitivity(bot_response, cultural_context)

print(f"Cultural Sensitivity Score: {sensitivity_score}")
print(f"Matched Cultural Keywords: {matched_keywords}")


Cultural Sensitivity Score: 0.2
Matched Cultural Keywords: ['family']


ZERO - SHOT PROMPTING TECHNIQUE

In [None]:
import nltk
from nltk.translate.bleu_score import sentence_bleu
from textblob import TextBlob
from langdetect import detect
from transformers import pipeline

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('punkt')
nltk.download('punkt_tab')
# Initialize sentiment analysis pipeline
sentiment_pipeline = pipeline("sentiment-analysis")

# Sample data (replace this with your actual input and bot responses)
test_data = [
    {"language": "Arabic", "input": "أشعر بالحزن عندما أرى الاحتفالات العائلية.", "response": "It can be challenging to deal with the emotional aftermath of family gatherings.", "reference": "It’s hard for me to see family gatherings without feeling sad."},
    {"language": "French", "input": "Je trouve difficile de parler de mes sentiments pendant les fêtes.", "response": "During family gatherings, it's common to feel overwhelmed or drained.", "reference": "I find it hard to express my feelings during the holidays."},
    {"language": "Spanish", "input": "Me siento responsable de cuidar a mis padres ancianos, pero es abrumador.", "response": "Caring for elderly parents can be emotionally demanding.", "reference": "I feel overwhelmed by the responsibility of caring for my elderly parents."},
]

# Initialize results list
results = []

# Evaluation function
for data in test_data:
    input_text = data["input"]
    response = data["response"]
    reference = data["reference"]
    language = data["language"]

    # 1. BLEU Score Calculation
    reference_tokens = [nltk.word_tokenize(reference.lower())]
    response_tokens = nltk.word_tokenize(response.lower())
    bleu_score = sentence_bleu(reference_tokens, response_tokens)

    # 2. Sentiment Analysis
    response_sentiment = sentiment_pipeline(response)[0]
    sentiment_score = response_sentiment['score']
    sentiment_label = response_sentiment['label']

    # 3. Language Detection
    detected_language = detect(response)

    # 4. Cultural Sensitivity Check (Basic Keyword Analysis)
    cultural_keywords = {
        "Arabic": ["عائلة", "احتفال", "حزن"],
        "French": ["fêtes", "sentiments", "famille"],
        "Spanish": ["padres", "responsabilidad", "abrumador"],
    }
    cultural_match = any(keyword in response.lower() for keyword in cultural_keywords.get(language, []))

    # Store results
    result = {
        "Language": language,
        "BLEU Score": bleu_score,
        "Sentiment Score": sentiment_score,
        "Sentiment Label": sentiment_label,
        "Detected Language": detected_language,
        "Cultural Sensitivity": cultural_match,
        "Language Match": detected_language == language.lower()
    }
    results.append(result)

# Display results
import pandas as pd
df_results = pd.DataFrame(results)
print(df_results)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still o

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


  Language     BLEU Score  Sentiment Score Sentiment Label Detected Language  \
0   Arabic  6.072910e-155         0.998888        POSITIVE                en   
1   French  1.356868e-231         0.997213        NEGATIVE                en   
2  Spanish  5.838742e-155         0.997374        NEGATIVE                en   

   Cultural Sensitivity  Language Match  
0                 False           False  
1                 False           False  
2                 False           False  


In [None]:
import nltk
from nltk.translate.bleu_score import sentence_bleu
from textblob import TextBlob
from langdetect import detect
from transformers import pipeline
import pandas as pd

# Download necessary NLTK data
nltk.download('punkt')

# Initialize sentiment analysis pipeline
sentiment_pipeline = pipeline("sentiment-analysis")

# Sample data for all languages
test_data = [
    {"language": "Arabic", "input": "أشعر بالحزن عندما أرى الاحتفالات العائلية.",
     "response": "It can be challenging to deal with the emotional aftermath of family gatherings.",
     "reference": "It’s hard for me to see family gatherings without feeling sad."},

    {"language": "French", "input": "Je trouve difficile de parler de mes sentiments pendant les fêtes.",
     "response": "During family gatherings, it's common to feel overwhelmed or drained.",
     "reference": "I find it hard to express my feelings during the holidays."},

    {"language": "Spanish", "input": "Me siento responsable de cuidar a mis padres ancianos, pero es abrumador.",
     "response": "Caring for elderly parents can be emotionally demanding.",
     "reference": "I feel overwhelmed by the responsibility of caring for my elderly parents."},

    {"language": "Hindi", "input": "मेरे परिवार में मानसिक स्वास्थ्य के बारे में बात करना एक टैबू है।",
     "response": "It can be difficult to talk about mental health in families where it's considered taboo.",
     "reference": "In my family, talking about mental health is a taboo."},

    {"language": "Korean", "input": "가족 모임에서 항상 행복해 보여야 한다는 압박감을 느낍니다.",
     "response": "Feeling pressure to always be happy around family is common.",
     "reference": "I feel pressured to look happy during family gatherings."},

    {"language": "Russian", "input": "Я испытываю тревогу, видя конфликты и новости о войне.",
     "response": "It's understandable to feel anxious when exposed to conflict or war news.",
     "reference": "I feel anxious seeing conflicts and hearing news about the war."},

    {"language": "Japanese", "input": "私は感情を見せない方がいいと言われますが、それが苦しいです。",
     "response": "It's hard when you're told to hide your emotions, but it’s important to express them.",
     "reference": "I’m often told not to show my emotions, but it feels painful."}
]

# Define cultural keywords for sensitivity check
cultural_keywords = {
    "Arabic": ["عائلة", "احتفال", "حزن"],
    "French": ["fêtes", "sentiments", "famille"],
    "Spanish": ["padres", "responsabilidad", "abrumador"],
    "Hindi": ["परिवार", "स्वास्थ्य", "मानसिक"],
    "Korean": ["가족", "행복", "압박감"],
    "Russian": ["тревога", "война", "конфликт"],
    "Japanese": ["感情", "見せない", "苦しい"]
}

# Initialize results list
results = []

# Evaluation function
for data in test_data:
    input_text = data["input"]
    response = data["response"]
    reference = data["reference"]
    language = data["language"]

    # 1. BLEU Score Calculation
    reference_tokens = [nltk.word_tokenize(reference.lower())]
    response_tokens = nltk.word_tokenize(response.lower())
    bleu_score = sentence_bleu(reference_tokens, response_tokens)

    # 2. Sentiment Analysis
    response_sentiment = sentiment_pipeline(response)[0]
    sentiment_score = response_sentiment['score']
    sentiment_label = response_sentiment['label']

    # 3. Language Detection
    detected_language = detect(response)

    # 4. Cultural Sensitivity Check (Basic Keyword Analysis)
    cultural_match = any(keyword in response.lower() for keyword in cultural_keywords.get(language, []))

    # Store results
    result = {
        "Language": language,
        "BLEU Score": bleu_score,
        "Sentiment Score": sentiment_score,
        "Sentiment Label": sentiment_label,
        "Detected Language": detected_language,
        "Cultural Sensitivity": cultural_match,
        "Language Match": detected_language == language.lower()
    }
    results.append(result)

# Display results
df_results = pd.DataFrame(results)
print(df_results)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of low

   Language     BLEU Score  Sentiment Score Sentiment Label Detected Language  \
0    Arabic  6.072910e-155         0.998888        POSITIVE                en   
1    French  1.356868e-231         0.997213        NEGATIVE                en   
2   Spanish  5.838742e-155         0.997374        NEGATIVE                en   
3     Hindi   3.147670e-78         0.996663        NEGATIVE                en   
4    Korean  1.414735e-231         0.955125        NEGATIVE                en   
5   Russian  6.072910e-155         0.964894        POSITIVE                en   
6  Japanese   1.352046e-01         0.998227        POSITIVE                en   

   Cultural Sensitivity  Language Match  
0                 False           False  
1                 False           False  
2                 False           False  
3                 False           False  
4                 False           False  
5                 False           False  
6                 False           False  


***ONE-SHOT ***

In [None]:
import nltk
from nltk.translate.bleu_score import sentence_bleu
from textblob import TextBlob
from langdetect import detect
from transformers import pipeline

# Download necessary NLTK data
nltk.download('punkt')

# Initialize sentiment analysis pipeline
sentiment_pipeline = pipeline("sentiment-analysis")

# Example data for evaluation
test_data = [
    {
        "language": "Hindi",
        "input": "मेरे लिए अपनी भावनाओं को परिवार के सामने व्यक्त करना कठिन होता है, क्योंकि इसे कमजोरी समझा जाता है। क्या आप मुझे कोई सुझाव दे सकते हैं?",
        "response": "To navigate feelings of uncertainty and overwhelm when expressing emotions, practice self-compassion, identify your emotions, find a safe space, express yourself creatively, and challenge societal expectations. These strategies can help you gain clarity and understanding about your emotions.",
        "reference": "It can be difficult to express feelings in front of family as it is seen as a weakness. How can I handle this situation?"
    },
    {
        "language": "Hindi",
        "input": "मुझे अपने परिवार की अपेक्षाओं को पूरा करने में बहुत तनाव महसूस होता है, खासकर जब वे मुझसे हमेशा खुश रहने की उम्मीद करते हैं। मुझे इससे कैसे निपटना चाहिए?",
        "response": "Ah, I see. It can be challenging to deal with the constant pressure to conform to societal expectations, especially when it comes to expressing yourself and being true to who you are. Can you tell me more about what you mean by 'constantly feeling like I'm not good enough'?",
        "reference": "I feel stressed meeting my family’s expectations, especially when they always expect me to be happy. How should I manage this?"
    }
]


results = []


for data in test_data:
    input_text = data["input"]
    response = data["response"]
    reference = data["reference"]
    language = data["language"]


    reference_tokens = [nltk.word_tokenize(reference.lower())]
    response_tokens = nltk.word_tokenize(response.lower())
    bleu_score = sentence_bleu(reference_tokens, response_tokens)


    response_sentiment = sentiment_pipeline(response)[0]
    sentiment_score = response_sentiment['score']
    sentiment_label = response_sentiment['label']


    detected_language = detect(response)


    cultural_keywords = {
        "Hindi": ["परिवार", "भावनाओं", "कमजोरी", "तनाव", "अपेक्षाओं"],
    }
    cultural_match = any(keyword in response.lower() for keyword in cultural_keywords.get(language, []))


    result = {
        "Language": language,
        "BLEU Score": bleu_score,
        "Sentiment Score": sentiment_score,
        "Sentiment Label": sentiment_label,
        "Detected Language": detected_language,
        "Cultural Sensitivity": cultural_match,
        "Language Match": detected_language == "hi"  # "hi" is the language code for Hindi
    }
    results.append(result)

# Display results
import pandas as pd
df_results = pd.DataFrame(results)
print(df_results)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of low

  Language     BLEU Score  Sentiment Score Sentiment Label Detected Language  \
0    Hindi  1.144141e-231         0.999449        POSITIVE                en   
1    Hindi   5.230954e-02         0.827303        POSITIVE                en   

   Cultural Sensitivity  Language Match  
0                 False           False  
1                 False           False  


In [None]:
import nltk
from nltk.translate.bleu_score import sentence_bleu
from langdetect import detect
from transformers import pipeline

# Download NLTK punkt tokenizer
nltk.download('punkt')

# Initialize sentiment analysis pipeline
sentiment_pipeline = pipeline("sentiment-analysis")

# Example test data for one-shot prompting (replace with actual responses)
test_data = [
    {
        "language": "Arabic",
        "input": "أشعر بالقلق عندما أتحدث عن مشاكلي العائلية. هل يمكن أن تعطيني بعض النصائح؟",
        "response": "Dealing with family issues can be tough, but there are ways to stay motivated and focused.",
        "reference": "I feel stressed when I talk about my family problems. Can you give me some advice?"
    },
    {
        "language": "French",
        "input": "Parfois, je me sens submergé par les attentes sociales.",
        "response": "Feeling overwhelmed by pressure to succeed professionally during holidays?",
        "reference": "Sometimes I feel overwhelmed by societal expectations."
    },
    {
        "language": "Hindi",
        "input": "मेरे लिए अपनी भावनाओं को परिवार के सामने व्यक्त करना कठिन होता है।",
        "response": "To navigate feelings of uncertainty and overwhelm when expressing emotions, practice self-compassion.",
        "reference": "It is difficult for me to express my emotions in front of my family."
    }
]

# Initialize results list
results = []

# Evaluation function
for data in test_data:
    input_text = data["input"]
    response = data["response"]
    reference = data["reference"]
    language = data["language"]

    # 1. BLEU Score Calculation
    reference_tokens = [nltk.word_tokenize(reference.lower())]
    response_tokens = nltk.word_tokenize(response.lower())
    bleu_score = sentence_bleu(reference_tokens, response_tokens)

    # 2. Sentiment Analysis
    sentiment = sentiment_pipeline(response)[0]
    sentiment_score = sentiment['score']
    sentiment_label = sentiment['label']

    # 3. Language Detection
    detected_language = detect(response)

    # 4. Cultural Sensitivity Check (Basic Keyword Match)
    cultural_keywords = {
        "Arabic": ["عائلة", "نصائح", "مشاكل"],
        "French": ["attentes", "submergé", "société"],
        "Hindi": ["भावनाओं", "परिवार", "व्यक्त"]
    }
    cultural_match = any(keyword in response.lower() for keyword in cultural_keywords.get(language, []))

    # Store results
    result = {
        "Language": language,
        "BLEU Score": bleu_score,
        "Sentiment Score": sentiment_score,
        "Sentiment Label": sentiment_label,
        "Detected Language": detected_language,
        "Cultural Sensitivity": cultural_match
    }
    results.append(result)

# Display results in a table format
import pandas as pd
df_results = pd.DataFrame(results)
print(df_results)

# Save results to a CSV file (optional)
df_results.to_csv("one_shot_prompting_evaluation.csv", index=False)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of low

  Language     BLEU Score  Sentiment Score Sentiment Label Detected Language  \
0   Arabic  1.164047e-231         0.990799        POSITIVE                en   
1   French  5.759292e-155         0.998487        NEGATIVE                en   
2    Hindi  1.240139e-231         0.859822        POSITIVE                en   

   Cultural Sensitivity  
0                 False  
1                 False  
2                 False  


In [None]:
# Import necessary libraries
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from transformers import pipeline
from langdetect import detect
import pandas as pd

# Example data
data = [
    {
        "input_language": "ar",
        "reference": "أشعر بالخجل عند التعبير عن مشاعري أمام عائلتي. كيف يمكنني التغلب على هذا الشعور؟",
        "response": "You're going through a tough time with a family member's mental health issues. It's okay to feel overwhelmed and alone, but know you're not. Many people understand what you're going through and there are resources available for support. Consider reaching out to a therapist or counselor for a safe space to talk and get guidance. Take things one step at a time and focus on self-care. Feel free to share more or ask questions, I'm here to listen and help."
    }
]

# Initialize sentiment analysis pipeline
sentiment_analyzer = pipeline("sentiment-analysis")

# Initialize smoothing function for BLEU
smoothing = SmoothingFunction().method1

# Function to calculate BLEU score
def calculate_bleu(reference, response):
    reference_tokens = reference.split()  # Tokenize reference
    response_tokens = response.split()  # Tokenize response
    return sentence_bleu([reference_tokens], response_tokens, smoothing_function=smoothing)

# Function to detect language match
def detect_language_match(input_language, response):
    detected_language = detect(response)
    return detected_language, detected_language == input_language

# Initialize results list
results = []

# Process each entry
for entry in data:
    reference = entry["reference"]
    response = entry["response"]
    input_language = entry["input_language"]

    # Calculate BLEU Score
    bleu_score = calculate_bleu(reference, response)

    # Perform Sentiment Analysis
    sentiment = sentiment_analyzer(response)[0]
    sentiment_score = sentiment["score"]
    sentiment_label = sentiment["label"]

    # Detect Language Match
    detected_language, language_match = detect_language_match(input_language, response)

    # Append results
    results.append({
        "Input Language": input_language,
        "Reference": reference,
        "Response": response,
        "BLEU Score": bleu_score,
        "Sentiment Score": sentiment_score,
        "Sentiment Label": sentiment_label,
        "Detected Language": detected_language,
        "Language Match": language_match
    })

# Create a DataFrame
results_df = pd.DataFrame(results)

# Display results
print(results_df)

# Optionally, save results to a CSV
results_df.to_csv("multilingual_response_metrics.csv", index=False)


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


  Input Language                                          Reference  \
0             ar  أشعر بالخجل عند التعبير عن مشاعري أمام عائلتي....   

                                            Response  BLEU Score  \
0  You're going through a tough time with a famil...           0   

   Sentiment Score Sentiment Label Detected Language  Language Match  
0         0.997258        POSITIVE                en           False  
