In [None]:
import re

def detect_language(response):
    """
    Detect if the response is primarily in Hindi.
    This is a basic check assuming Hindi script (Devanagari characters).
    """
    hindi_chars = re.findall(r'[\u0900-\u097F]', response)  # Devanagari Unicode range
    if len(hindi_chars) > len(response) * 0.9:  # If 50% or more of the text is in Hindi script
        return "Hindi"
    else:
        return "Mixed/English"

def evaluate_response(query, response):
    """
    Evaluate the model's response based on language, coherence, and relevance.
    """
    metrics = {}

    metrics["Language"] = detect_language(response)

    print(f"Query: {query}")
    print(f"Response: {response}")
    coherence_score = int(input("Rate coherence (1-5): "))
    metrics["Coherence"] = coherence_score

    relevance_score = int(input("Rate relevance (1-5): "))
    metrics["Relevance"] = relevance_score

    return metrics

def main():
    query = "बच्चे की नींद की समस्या को कैसे हल करें?"
    response = """बच्चे की नींद की समस्या को हल करने के लिए कुछ벤िफिट करें:
    1. शेकड़ता का निर्माण करें: एक सुनिश्चित नींद के लिए Needed अनुसूचित टाइम टेबल बनाएं।
    2. नियमित दिनचर्या का निर्माण करें: नियमित दिनचर्याKids की नींद को नियमित करें।
    3. शोर और हिंसा का निर्माण करें: शोर और हिंसाFree एंटेना में काम आयें। Kids के टीम में निद्रा का माहौल बनाएं।
    4. स्वतन्त्र नींद के लिए: Kids की नींद के लिए स्वतन्त्र शेकड़ता दें।
    5. नींद के लिए मदद: बच्चे के डॉक्टर से नींद के लिए मदद लें।
    6. हेल्पफुल टिप्स: Kids की नींद के लिए हेल्पफुल टिप्स जैसे "सुनिश्चित नींद", "टाइम टेबल", "शोर मुक्त माहौल", "स्वतन्त्र निद्रा" पालन करें।"""

=    metrics = evaluate_response(query, response)

    print("\nEvaluation Metrics:")
    for key, value in metrics.items():
        print(f"{key}: {value}")

if __name__ == "__main__":
    main()


Query: बच्चे की नींद की समस्या को कैसे हल करें?
Response: बच्चे की नींद की समस्या को हल करने के लिए कुछ벤िफिट करें:
    1. शेकड़ता का निर्माण करें: एक सुनिश्चित नींद के लिए Needed अनुसूचित टाइम टेबल बनाएं।
    2. नियमित दिनचर्या का निर्माण करें: नियमित दिनचर्याKids की नींद को नियमित करें।
    3. शोर और हिंसा का निर्माण करें: शोर और हिंसाFree एंटेना में काम आयें। Kids के टीम में निद्रा का माहौल बनाएं।
    4. स्वतन्त्र नींद के लिए: Kids की नींद के लिए स्वतन्त्र शेकड़ता दें।
    5. नींद के लिए मदद: बच्चे के डॉक्टर से नींद के लिए मदद लें।
    6. हेल्पफुल टिप्स: Kids की नींद के लिए हेल्पफुल टिप्स जैसे "सुनिश्चित नींद", "टाइम टेबल", "शोर मुक्त माहौल", "स्वतन्त्र निद्रा" पालन करें।
Rate coherence (1-5): 3
Rate relevance (1-5): 4

Evaluation Metrics:
Language: Mixed/English
Coherence: 3
Relevance: 4


In [None]:
responses = [
    {"Query": "बच्चे की नींद की समस्या को कैसे हल करें?", "Response": "Response Text", "Language": "Hindi", "Coherence": 4, "Relevance": 5},
    {"Query": "बच्चे को सामाजिक गतिविधियों में कैसे प्रोत्साहित करें?", "Response": "Response Text", "Language": "English", "Coherence": 3, "Relevance": 3},
]

total_responses = len(responses)
hindi_responses = sum(1 for r in responses if r["Language"] == "Hindi")
average_coherence = sum(r["Coherence"] for r in responses) / total_responses
average_relevance = sum(r["Relevance"] for r in responses) / total_responses
hindi_success_rate = (hindi_responses / total_responses) * 100

print(f"Average Coherence: {average_coherence}")
print(f"Average Relevance: {average_relevance}")
print(f"Percentage of Responses in Hindi: {hindi_success_rate}%")


Average Coherence: 3.5
Average Relevance: 4.0
Percentage of Responses in Hindi: 50.0%


In [None]:
!pip install nltk pandas

import re
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

def calculate_bleu(reference, candidate):
    """
    Calculate BLEU score for a single response.
    Args:
        reference (str): The expected (ground-truth) response.
        candidate (str): The model's generated response.
    Returns:
        float: BLEU score.
    """
    reference_tokens = [reference.split()]
    candidate_tokens = candidate.split()
    smooth_fn = SmoothingFunction().method1
    return sentence_bleu(reference_tokens, candidate_tokens, smoothing_function=smooth_fn)

def calculate_hindi_percentage_in_response(response):
    """
    Calculate the percentage of Hindi characters in a response.
    Args:
        response (str): The model's generated response.
    Returns:
        float: Percentage of Hindi characters.
    """
    hindi_chars = re.findall(r'[\u0900-\u097F]', response)  # Devanagari Unicode range
    total_chars = len(response)
    if total_chars == 0:
        return 0
    return (len(hindi_chars) / total_chars) * 100

def evaluate_model(data):
    """
    Evaluate the model's performance using BLEU, Hindi percentage, and additional metrics.
    Args:
        data (list of dict): List of query, reference, and candidate responses.
    Returns:
        pd.DataFrame: Results with BLEU, Hindi percentage, and other metrics.
    """
    results = []
    for item in data:
        query = item['query']
        reference = item['reference']
        candidate = item['candidate']
        human_scores = item.get('human_scores', {})


        bleu = calculate_bleu(reference, candidate)


        hindi_percentage = calculate_hindi_percentage_in_response(candidate)


        results.append({
            "Query": query,
            "Reference": reference,
            "Candidate": candidate,
            "BLEU": bleu,
            "Hindi_Percentage": hindi_percentage,
            **human_scores  # Add human evaluation scores if present
        })


    results_df = pd.DataFrame(results)


    print("\nEvaluation Results:")
    print(results_df)
    return results_df


data = [
    {
        "query": "बच्चे की नींद की समस्या को कैसे हल करें?",
        "reference": "बच्चे की नींद की समस्या को हल करने के लिए सुनिश्चित दिनचर्या बनाएं, शांत और अंधेरा माहौल दें, और सोने से पहले स्क्रीन का उपयोग न करें। अगर समस्या बनी रहती है, तो डॉक्टर से सलाह लें।",
        "candidate": "बच्चे की नींद की समस्या को हल करने के लिए कुछ벤िफिट करें: 1. शेकड़ता का निर्माण करें: एक सुनिश्चित नींद के लिए Needed अनुसूचित टाइम टेबल बनाएं।",
        "human_scores": {"fluency": 2, "coherence": 3, "relevance": 2}
    }
]


results_df = evaluate_model(data)


results_df.to_csv("evaluation_results.csv", index=False)
print("\nResults saved to evaluation_results.csv")



Evaluation Results:
                                      Query  \
0  बच्चे की नींद की समस्या को कैसे हल करें?   

                                           Reference  \
0  बच्चे की नींद की समस्या को हल करने के लिए सुनि...   

                                           Candidate      BLEU  \
0  बच्चे की नींद की समस्या को हल करने के लिए कुछ벤...  0.239018   

   Hindi_Percentage  fluency  coherence  relevance  
0         73.943662        2          3          2  

Results saved to evaluation_results.csv


In [None]:
!pip install nltk pandas

import re
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

def calculate_bleu(reference, candidate):
    """
    Calculate BLEU score for a single response.
    Args:
        reference (str): The expected (ground-truth) response.
        candidate (str): The model's generated response.
    Returns:
        float: BLEU score.
    """
    reference_tokens = [reference.split()]
    candidate_tokens = candidate.split()
    smooth_fn = SmoothingFunction().method1
    return sentence_bleu(reference_tokens, candidate_tokens, smoothing_function=smooth_fn)

def calculate_arabic_percentage_in_response(response):
    """
    Calculate the percentage of Arabic characters in a response.
    Args:
        response (str): The model's generated response.
    Returns:
        float: Percentage of Arabic characters.
    """
    arabic_chars = re.findall(r'[\u0600-\u06FF]', response)  # Arabic Unicode range
    total_chars = len(response)
    if total_chars == 0:
        return 0
    return (len(arabic_chars) / total_chars) * 100

def evaluate_model(data):
    """
    Evaluate the model's performance using BLEU, Arabic percentage, and additional metrics.
    Args:
        data (list of dict): List of query, reference, and candidate responses.
    Returns:
        pd.DataFrame: Results with BLEU, Arabic percentage, and other metrics.
    """
    results = []
    for item in data:
        query = item['query']
        reference = item['reference']
        candidate = item['candidate']
        human_scores = item.get('human_scores', {})

        bleu = calculate_bleu(reference, candidate)

        arabic_percentage = calculate_arabic_percentage_in_response(candidate)

        results.append({
            "Query": query,
            "Reference": reference,
            "Candidate": candidate,
            "BLEU": bleu,
            "Arabic_Percentage": arabic_percentage,
            **human_scores  # Add human evaluation scores if present
        })

    results_df = pd.DataFrame(results)

    print("\nEvaluation Results:")
    print(results_df)
    return results_df

data = [
    {
        "query": "كيف يمكنني تحسين جودة نوم طفلي؟",
        "reference": "لتحسين جودة نوم الطفل، يمكن اتباع النصائح التالية: 1. وضع روتين ثابت للنوم. 2. إنشاء بيئة مريحة للنوم مثل غرفة مظلمة ودرجة حرارة مناسبة. 3. تقليل وقت الشاشة قبل النوم. 4. تشجيع النشاط البدني أثناء النهار. 5. تقليل تناول السكريات والمشروبات قبل النوم. 6. تجنب الأنشطة المثيرة قبل النوم. 7. اتباع جدول نوم منتظم.",
        "candidate": "تخ损 جودة نوم الطفل يمكن أن تعوق نموه.Buffered تھریو أی فیمینا أن تکرر خفیفات نوم. لتحسین جودة نوم الطفل،Followات النصائح التالیة: 1. Establish a bedtime routine: اعتاد الطفل على مساسه الحادیة قبل النوم بفعلات مثل القراءة أو الأغانی.",
        "human_scores": {"fluency": 2, "coherence": 2, "relevance": 1}
    }
]

results_df = evaluate_model(data)

results_df.to_csv("arabic_evaluation_results.csv", index=False)
print("\nResults saved to arabic_evaluation_results.csv")



Evaluation Results:
                             Query  \
0  كيف يمكنني تحسين جودة نوم طفلي؟   

                                           Reference  \
0  لتحسين جودة نوم الطفل، يمكن اتباع النصائح التا...   

                                           Candidate      BLEU  \
0  تخ损 جودة نوم الطفل يمكن أن تعوق نموه.Buffered ...  0.009568   

   Arabic_Percentage  fluency  coherence  relevance  
0          64.224138        2          2          1  

Results saved to arabic_evaluation_results.csv


In [None]:
!pip install nltk pandas langdetect

import re
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from langdetect import detect

def calculate_bleu(reference, candidate):
    """
    Calculate BLEU score for a single response.
    Args:
        reference (str): The expected (ground-truth) response.
        candidate (str): The model's generated response.
    Returns:
        float: BLEU score.
    """
    reference_tokens = [reference.split()]
    candidate_tokens = candidate.split()
    smooth_fn = SmoothingFunction().method1
    return sentence_bleu(reference_tokens, candidate_tokens, smoothing_function=smooth_fn)

def calculate_french_percentage_in_response(response):
    """
    Calculate the percentage of French characters in a response.
    Args:
        response (str): The model's generated response.
    Returns:
        float: Percentage of French characters.
    """
    french_chars = re.findall(r'[a-zA-ZÀ-ÿ]', response)  # Includes accented characters
    total_chars = len(response)
    if total_chars == 0:
        return 0
    return (len(french_chars) / total_chars) * 100

def calculate_language_match(response, target_language):
    """
    Check if the response matches the target language.
    Args:
        response (str): The model's generated response.
        target_language (str): Expected language code (e.g., "fr" for French).
    Returns:
        bool: True if the response is in the target language, False otherwise.
    """
    detected_language = detect(response)
    return detected_language == target_language

def evaluate_model(data):
    """
    Evaluate the model's performance using BLEU, French percentage, and language match.
    Args:
        data (list of dict): List of query, reference, and candidate responses.
    Returns:
        pd.DataFrame: Results with BLEU, French percentage, and language match.
    """
    results = []
    for item in data:
        query = item['query']
        reference = item['reference']
        candidate = item['candidate']

        bleu = calculate_bleu(reference, candidate)

        french_percentage = calculate_french_percentage_in_response(candidate)

        language_match = calculate_language_match(candidate, "fr")

        results.append({
            "Query": query,
            "Reference": reference,
            "Candidate": candidate,
            "BLEU Score": bleu,
            "French_Percentage": french_percentage,
            "Language Match": language_match
        })

    results_df = pd.DataFrame(results)

    print("\nEvaluation Results:")
    print(results_df)
    return results_df

data = [
    {
        "query": "Comment aider un enfant qui a des problèmes de concentration à l'école ?",
        "reference": "Pour aider un enfant ayant des problèmes de concentration à l'école, commencez par établir une routine claire, limitez les distractions pendant les devoirs, et assurez-vous qu'il dort suffisamment. Encouragez une communication ouverte avec les enseignants pour identifier les besoins spécifiques. Si nécessaire, envisagez de consulter un spécialiste.",
        "candidate": "Il est important de mettre en place des mesures pour aider l'enfant à gérer ses problèmes de concentration à l'école. Voici quelques conseils :\n\n* Créez un environnement de travail calme et organisé : évitez les bruits forts, les distractions et les emballages.\n* Établissez des objectifs clairs et soutenables : découvrez avec l'enfant ce qu'il souhaite accomplir et aidez-le à rompre les tâches en petits pas.\n* Encouragez la pratique de la méditation et la respiration profonde : ces techniques peuvent aider l'enfant à se concentrer et à se calmer.\n* Favorisez les activités physiques régulières : l'exercice peut améliorer la concentration et la focus.\n* N'oubliez pas de partager vos préoccupations avec les enseignants et les parents : ensemble, vous pouvez mettre en place des stratégies pour aider l'enfant à gérer ses problèmes de concentration.\n\nIl est important de noter que chaque enfant est unique, il est donc important de adapter ces conseils aux besoins spécifiques de l'enfant. Consultez un professionnel de santé si les problèmes de concentration persistent."
    }
]

results_df = evaluate_model(data)

results_df.to_csv("french_evaluation_results.csv", index=False)
print("\nResults saved to french_evaluation_results.csv")


Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/981.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m972.8/981.5 kB[0m [31m33.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m17.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: langdetect
  Building wheel for langdetect (setup.py) ... [?25l[?25hdone
  Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993222 sha256=bf7b8394ca7691b5e4a0d142ebe4850204f55a4ef33381bfe9f42916256fb1ef
  Stored in directory: /root/.cache/pip/wheels/95/03/7d/59ea870c70ce4e5a370638b5462a7711ab78fba2f655d05106
Successfully built langdetect
Installing collected packages: langdetect
Successfully installed langdetect-1.0.9

Evaluati

In [None]:
!pip install nltk pandas langdetect

import re
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from langdetect import detect

def calculate_bleu(reference, candidate):
    """
    Calculate BLEU score for a single response.
    Args:
        reference (str): The expected (ground-truth) response.
        candidate (str): The model's generated response.
    Returns:
        float: BLEU score.
    """
    reference_tokens = [reference.split()]
    candidate_tokens = candidate.split()
    smooth_fn = SmoothingFunction().method1
    return sentence_bleu(reference_tokens, candidate_tokens, smoothing_function=smooth_fn)

def calculate_spanish_percentage_in_response(response):
    """
    Calculate the percentage of Spanish characters in a response.
    Args:
        response (str): The model's generated response.
    Returns:
        float: Percentage of Spanish characters.
    """
    spanish_chars = re.findall(r'[a-zA-ZÁÉÍÓÚÜÑáéíóúüñ]', response)  # Includes accented characters
    total_chars = len(response)
    if total_chars == 0:
        return 0
    return (len(spanish_chars) / total_chars) * 100

def calculate_language_match(response, target_language):
    """
    Check if the response matches the target language.
    Args:
        response (str): The model's generated response.
        target_language (str): Expected language code (e.g., "es" for Spanish).
    Returns:
        bool: True if the response is in the target language, False otherwise.
    """
    detected_language = detect(response)
    return detected_language == target_language

def evaluate_model(data):
    """
    Evaluate the model's performance using BLEU, Spanish percentage, and language match.
    Args:
        data (list of dict): List of query, reference, and candidate responses.
    Returns:
        pd.DataFrame: Results with BLEU, Spanish percentage, and language match.
    """
    results = []
    for item in data:
        query = item['query']
        reference = item['reference']
        candidate = item['candidate']

        bleu = calculate_bleu(reference, candidate)

        spanish_percentage = calculate_spanish_percentage_in_response(candidate)

        language_match = calculate_language_match(candidate, "es")

        results.append({
            "Query": query,
            "Reference": reference,
            "Candidate": candidate,
            "BLEU Score": bleu,
            "Spanish_Percentage": spanish_percentage,
            "Language Match": language_match
        })

    results_df = pd.DataFrame(results)

    print("\nEvaluation Results:")
    print(results_df)
    return results_df


data = [
    {
        "query": "Me cuesta mucho dormir por la noche porque no puedo dejar de pensar en el trabajo. ¿Qué puedo hacer para descansar mejor?",
        "reference": "El insomnio relacionado con el estrés laboral es muy común. Intenta establecer una rutina relajante antes de dormir, como leer un libro o tomar un baño caliente. Evita las pantallas al menos una hora antes de acostarte y prueba técnicas de relajación como la meditación.",
        "candidate": "Entiendo que el estrés laboral puede ser un gran problema. Para dormir mejor, puedes intentar algunas estrategias. Primero, establece una rutina de sueño estable y mantén una hora de dormir regular. No te acuestes demasiado tarde, ya que eso puede influir en la calidad de tu sueño. Después de acostarte, intenta relajarte con técnicas de respiración profunda o meditación. Evita pantallas y electrodomésticos al menos 30 minutos antes de dormir. También puedes intentar hacer una lista de tareas pendientes antes de acostarte para que no tengas ese sentimiento de inquietud en la noche. Recuerda que la relajación es clave para dormir bien. ¡Espero que estas sugerencias te ayuden!"
    }
]


results_df = evaluate_model(data)

results_df.to_csv("spanish_evaluation_results.csv", index=False)
print("\nResults saved to spanish_evaluation_results.csv")



Evaluation Results:
                                               Query  \
0  Me cuesta mucho dormir por la noche porque no ...   

                                           Reference  \
0  El insomnio relacionado con el estrés laboral ...   

                                           Candidate  BLEU Score  \
0  Entiendo que el estrés laboral puede ser un gr...     0.02456   

   Spanish_Percentage  Language Match  
0           81.964809            True  

Results saved to spanish_evaluation_results.csv


In [None]:
import re
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from langdetect import detect

def calculate_bleu(reference, candidate):
    """
    Calculate BLEU score for a single response.
    Args:
        reference (str): The expected (ground-truth) response.
        candidate (str): The model's generated response.
    Returns:
        float: BLEU score.
    """
    reference_tokens = [reference.split()]
    candidate_tokens = candidate.split()
    smooth_fn = SmoothingFunction().method1
    return sentence_bleu(reference_tokens, candidate_tokens, smoothing_function=smooth_fn)

def calculate_language_match(response, target_language):
    """
    Check if the response matches the target language.
    Args:
        response (str): The model's generated response.
        target_language (str): Expected language code (e.g., "es" for Spanish).
    Returns:
        bool: True if the response is in the target language, False otherwise.
    """
    detected_language = detect(response)
    return detected_language == target_language

def calculate_spanish_percentage_in_response(response):
    """
    Calculate the percentage of Spanish characters in a response.
    Args:
        response (str): The model's generated response.
    Returns:
        float: Percentage of Spanish characters.
    """
    spanish_chars = re.findall(r'[a-zA-ZÁÉÍÓÚÜÑáéíóúüñ]', response)  # Spanish characters and accents
    total_chars = len(response.replace(" ", ""))  # Ignore spaces
    if total_chars == 0:
        return 0
    return (len(spanish_chars) / total_chars) * 100

def evaluate_model(data):
    """
    Evaluate the model's performance using BLEU, Spanish percentage, and language match.
    Args:
        data (list of dict): List of query, reference, and candidate responses.
    Returns:
        pd.DataFrame: Results with BLEU, Spanish percentage, and language match.
    """
    results = []
    for item in data:
        query = item['query']
        reference = item['reference']
        candidate = item['candidate']

        bleu = calculate_bleu(reference, candidate)

        spanish_percentage = calculate_spanish_percentage_in_response(candidate)

        language_match = calculate_language_match(candidate, "es")


        results.append({
            "Query": query,
            "Reference": reference,
            "Candidate": candidate,
            "BLEU Score": bleu,
            "Spanish_Percentage": spanish_percentage,
            "Language Match": language_match
        })


    results_df = pd.DataFrame(results)


    print("\nEvaluation Results:")
    print(results_df)
    return results_df

data = [
    {
        "query": "¿Cómo puedo mejorar la calidad del sueño de mi hijo?",
        "reference": "Para mejorar la calidad del sueño de tu hijo, sigue estos consejos: crea una rutina de sueño, asegúrate de que el ambiente sea tranquilo y oscuro, limita el uso de pantallas antes de dormir, y promueve el ejercicio físico durante el día.",
        "candidate": "Entiendo que el estrés laboral puede ser un gran problema. Para dormir mejor, puedes intentar algunas estrategias. Primero, establece una rutina de sueño estable y mantén una hora de dormir regular. No te acuestes demasiado tarde, ya que eso puede influir en la calidad de tu sueño. Después de acostarte, intenta relajarte con técnicas de respiración profunda o meditación. Evita pantallas y electrodomésticos al menos 30 minutos antes de dormir. También puedes intentar hacer una lista de tareas pendientes antes de acostarte para que no tengas ese sentimiento de inquietud en la noche. Recuerda que la relajación es clave para dormir bien. ¡Espero que estas sugerencias te ayuden!"
    }
]

results_df = evaluate_model(data)

results_df.to_csv("spanish_evaluation_results.csv", index=False)
print("\nResults saved to spanish_evaluation_results.csv")



Evaluation Results:
                                               Query  \
0  ¿Cómo puedo mejorar la calidad del sueño de mi...   

                                           Reference  \
0  Para mejorar la calidad del sueño de tu hijo, ...   

                                           Candidate  BLEU Score  \
0  Entiendo que el estrés laboral puede ser un gr...    0.017023   

   Spanish_Percentage  Language Match  
0           97.217391            True  

Results saved to spanish_evaluation_results.csv


In [None]:

!pip install nltk pandas langdetect


import re
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from langdetect import detect

def calculate_bleu(reference, candidate):
    """
    Calculate BLEU score for a single response.
    Args:
        reference (str): The expected (ground-truth) response.
        candidate (str): The model's generated response.
    Returns:
        float: BLEU score.
    """
    reference_tokens = [reference.split()]
    candidate_tokens = candidate.split()
    smooth_fn = SmoothingFunction().method1
    return sentence_bleu(reference_tokens, candidate_tokens, smoothing_function=smooth_fn)

def calculate_japanese_percentage_in_response(response):
    """
    Calculate the percentage of Japanese characters in a response.
    Args:
        response (str): The model's generated response.
    Returns:
        float: Percentage of Japanese characters.
    """
    japanese_chars = re.findall(r'[\u3040-\u30FF\u4E00-\u9FFF]', response)  # Hiragana, Katakana, and Kanji ranges
    total_chars = len(response.replace(" ", ""))  # Ignore spaces
    if total_chars == 0:
        return 0
    return (len(japanese_chars) / total_chars) * 100

def calculate_language_match(response, target_language):
    """
    Check if the response matches the target language.
    Args:
        response (str): The model's generated response.
        target_language (str): Expected language code (e.g., "ja" for Japanese).
    Returns:
        bool: True if the response is in the target language, False otherwise.
    """
    detected_language = detect(response)
    return detected_language == target_language

def evaluate_model(data):
    """
    Evaluate the model's performance using BLEU, Japanese percentage, and language match.
    Args:
        data (list of dict): List of query, reference, and candidate responses.
    Returns:
        pd.DataFrame: Results with BLEU, Japanese percentage, and language match.
    """
    results = []
    for item in data:
        query = item['query']
        reference = item['reference']
        candidate = item['candidate']


        bleu = calculate_bleu(reference, candidate)


        japanese_percentage = calculate_japanese_percentage_in_response(candidate)


        language_match = calculate_language_match(candidate, "ja")


        results.append({
            "Query": query,
            "Reference": reference,
            "Candidate": candidate,
            "BLEU Score": bleu,
            "Japanese_Percentage": japanese_percentage,
            "Language Match": language_match
        })


    results_df = pd.DataFrame(results)


    print("\nEvaluation Results:")
    print(results_df)
    return results_df


data = [
    {
        "query": "子どもの集中力を向上させるにはどうすればよいですか？",
        "reference": "子どもの集中力を向上させるためには、日々の生活リズムを確立し、適切な休憩時間を設定してください。また、勉強環境を整え、注意散漫を防ぐことが効果的です。さらに、運動やバランスの取れた食事も集中力を高める助けになります。",
        "candidate": "子どもの集中力を向上させるために、まず子どもの興味や関心を持つ活動に参加してもらうことが大切です。特に、静かにすることができるゲームや趣味、或いは創造的な活動を推奨します。また、子どもの目を離さないようにして、子どもの行動を指導することが重要です。最後に、子どもの成長に応じて、適切な指導や面談を実施することが大切です。"
    }
]


results_df = evaluate_model(data)


results_df.to_csv("japanese_evaluation_results.csv", index=False)
print("\nResults saved to japanese_evaluation_results.csv")



Evaluation Results:
                        Query  \
0  子どもの集中力を向上させるにはどうすればよいですか？   

                                           Reference  \
0  子どもの集中力を向上させるためには、日々の生活リズムを確立し、適切な休憩時間を設定してくださ...   

                                           Candidate  BLEU Score  \
0  子どもの集中力を向上させるために、まず子どもの興味や関心を持つ活動に参加してもらうことが大切...           0   

   Japanese_Percentage  Language Match  
0               93.125            True  

Results saved to japanese_evaluation_results.csv


In [None]:
!pip install nltk pandas langdetect

import re
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from langdetect import detect

def calculate_bleu(reference, candidate):
    """
    Calculate BLEU score for a single response.
    Args:
        reference (str): The expected (ground-truth) response.
        candidate (str): The model's generated response.
    Returns:
        float: BLEU score.
    """
    reference_tokens = [reference.split()]
    candidate_tokens = candidate.split()
    smooth_fn = SmoothingFunction().method1
    return sentence_bleu(reference_tokens, candidate_tokens, smoothing_function=smooth_fn)

def calculate_korean_percentage_in_response(response):
    """
    Calculate the percentage of Korean characters in a response.
    Args:
        response (str): The model's generated response.
    Returns:
        float: Percentage of Korean characters.
    """
    korean_chars = re.findall(r'[\uAC00-\uD7AF]', response)  # Korean Unicode range
    total_chars = len(response)
    if total_chars == 0:
        return 0
    return (len(korean_chars) / total_chars) * 100

def calculate_language_match(response, target_language):
    """
    Check if the response matches the target language.
    Args:
        response (str): The model's generated response.
        target_language (str): Expected language code (e.g., "ko" for Korean).
    Returns:
        bool: True if the response is in the target language, False otherwise.
    """
    detected_language = detect(response)
    return detected_language == target_language

def evaluate_model(data):
    """
    Evaluate the model's performance using BLEU, Korean percentage, and language match.
    Args:
        data (list of dict): List of query, reference, and candidate responses.
    Returns:
        pd.DataFrame: Results with BLEU, Korean percentage, and language match.
    """
    results = []
    for item in data:
        query = item['query']
        reference = item['reference']
        candidate = item['candidate']

        bleu = calculate_bleu(reference, candidate)

        korean_percentage = calculate_korean_percentage_in_response(candidate)

        language_match = calculate_language_match(candidate, "ko")

        results.append({
            "Query": query,
            "Reference": reference,
            "Candidate": candidate,
            "BLEU Score": bleu,
            "Korean_Percentage": korean_percentage,
            "Language Match": language_match
        })

    results_df = pd.DataFrame(results)

    print("\nEvaluation Results:")
    print(results_df)
    return results_df

data = [
    {
        "query": "부모님 기대하는 역할을 다하지 못하고 있는 죄책감을 느낄 때는 어떻게 대처해야 하나요?",
        "reference": "부모님의 기대를 모두 충족하려고 하지 않아도 된다는 점을 이해하세요. 자신을 돌보는 것이 중요하며, 자신의 감정과 생각을 정직하게 표현하는 연습을 해보세요. 또한, 필요하다면 상담사와 이야기하여 도움을 받는 것이 좋습니다.",
        "candidate": "부모님 기대하는 역할을 다하지 못하고 있는 죄책감을 느낄 때는 먼저Self-Reflection을 통해 정확하게 문제를 파악하세요. Після того, 다음과 같은 방법을ลอง해볼 수 있습니다. 처음에는 부모님의 기대를 다른 가족 구성원들과 비교하지 마세요. 대신, 부모님의 기대를 내가 최선을 다해 자원하는 실제적인 요구에 초점을 맞추세요. 둘째로, 부모님의 기대를 매우 엄격하게 잘 받치지 마세요. 인간은 실수를 하지만, 중요한 것은 실수를 통해 경험이 날 것이고, 이를 통해 더 나은 가정을 세울 수 있습니다. 셋째로, 부모님의 기대를 잘 수행하지 못하는 것에 대해 부정하는 것과는 달리,이를 인정하여 오히rique하게 자원하여 대처하세요. 부정 것은 현실을 인정하는 것이 아니라, 현실을 부정하는 것과 같습니다. 넷째로, 부모님의 기대를 수행하는 데 어려움을感じ하는 경우,에는 social support를 찾을 수 있습니다. 이러한 support들은 당신에게서 하는 것을 제공할 수 있습니다. 마지막으로, 부모님의 기대를 다하지 못하고 있는 죄책감을 느낄 때,에는 Healthylife-style과 self-care를 이행하세요. 이러한 것은 당신을 더 나은 정신 건강을 지닐 수 있습니다."
    }
]

results_df = evaluate_model(data)

results_df.to_csv("korean_evaluation_results.csv", index=False)
print("\nResults saved to korean_evaluation_results.csv")



Evaluation Results:
                                              Query  \
0  부모님 기대하는 역할을 다하지 못하고 있는 죄책감을 느낄 때는 어떻게 대처해야 하나요?   

                                           Reference  \
0  부모님의 기대를 모두 충족하려고 하지 않아도 된다는 점을 이해하세요. 자신을 돌보는...   

                                           Candidate  BLEU Score  \
0  부모님 기대하는 역할을 다하지 못하고 있는 죄책감을 느낄 때는 먼저Self-Refl...    0.003165   

   Korean_Percentage  Language Match  
0          61.563518            True  

Results saved to korean_evaluation_results.csv


In [None]:
!pip install nltk pandas langdetect

import re
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from langdetect import detect

# Function to calculate BLEU score
def calculate_bleu(reference, candidate):
    """
    Calculate BLEU score for a single response.
    Args:
        reference (str): The expected (ground-truth) response.
        candidate (str): The model's generated response.
    Returns:
        float: BLEU score.
    """
    reference_tokens = [reference.split()]
    candidate_tokens = candidate.split()
    smooth_fn = SmoothingFunction().method1
    return sentence_bleu(reference_tokens, candidate_tokens, smoothing_function=smooth_fn)

def calculate_russian_percentage_in_response(response):
    """
    Calculate the percentage of Russian characters in a response.
    Args:
        response (str): The model's generated response.
    Returns:
        float: Percentage of Russian characters.
    """
    russian_chars = re.findall(r'[\u0400-\u04FF]', response)  # Cyrillic Unicode range
    total_chars = len(response)
    if total_chars == 0:
        return 0
    return (len(russian_chars) / total_chars) * 100

def calculate_language_match(response, target_language):
    """
    Check if the response matches the target language.
    Args:
        response (str): The model's generated response.
        target_language (str): Expected language code (e.g., "ru" for Russian).
    Returns:
        bool: True if the response is in the target language, False otherwise.
    """
    detected_language = detect(response)
    return detected_language == target_language

def evaluate_model(data):
    """
    Evaluate the model's performance using BLEU, Russian percentage, and language match.
    Args:
        data (list of dict): List of query, reference, and candidate responses.
    Returns:
        pd.DataFrame: Results with BLEU, Russian percentage, and language match.
    """
    results = []
    for item in data:
        query = item['query']
        reference = item['reference']
        candidate = item['candidate']


        bleu = calculate_bleu(reference, candidate)


        russian_percentage = calculate_russian_percentage_in_response(candidate)


        language_match = calculate_language_match(candidate, "ru")


        results.append({
            "Query": query,
            "Reference": reference,
            "Candidate": candidate,
            "BLEU Score": bleu,
            "Russian_Percentage": russian_percentage,
            "Language Match": language_match
        })


    results_df = pd.DataFrame(results)


    print("\nEvaluation Results:")
    print(results_df)
    return results_df


data = [
    {
        "query": "Как улучшить качество сна у взрослого человека, испытывающего хронический стресс?",
        "reference": "Для улучшения качества сна при хроническом стрессе рекомендуется установить регулярный режим сна, избегать употребления кофеина перед сном и заняться расслабляющими упражнениями, такими как йога или медитация. Если проблемы со сном продолжаются, проконсультируйтесь с врачом.",
        "candidate": "Чтобы улучшить качество сна у взрослого человека, испытывающего хронический стресс, рекомендуется соблюдать ритуал перед сном, что помогает телу приготовиться к отдыху. Это может включать в себя такие шаги, как ограничение использования электронных устройств за несколько часов до сна, создание осветленной и прохладной расслабляющей обстановки, а также выполнение физических упражнений на свежем воздухе днём. Кроме того, рекомендуется практиковать техники расслабления, такие как медитация, прогрессивная мышечная релаксация или молитва. Не стоит принимать кофеиносодержащие напитки и алкоголь перед сном, а также не рекомендуется спать более 8-10 часов в сутки. Если хронический стресс продолжает беспокоить, рекомендуется обратиться за помощью к специалисту для получения профессиональной помощи и разработки индивидуального плана лечения."
    }
]

# Run evaluation
results_df = evaluate_model(data)

# Save results to CSV
results_df.to_csv("russian_evaluation_results.csv", index=False)
print("\nResults saved to russian_evaluation_results.csv")



Evaluation Results:
                                               Query  \
0  Как улучшить качество сна у взрослого человека...   

                                           Reference  \
0  Для улучшения качества сна при хроническом стр...   

                                           Candidate  BLEU Score  \
0  Чтобы улучшить качество сна у взрослого челове...    0.002862   

   Russian_Percentage  Language Match  
0           85.172005            True  

Results saved to russian_evaluation_results.csv
