In [7]:
import pandas as pd
import os
from groq import Groq
from tqdm import tqdm
import fasttext
import numpy as np


In [None]:

GROQ_API_KEY = "GROQKEY" 
INPUT_JSON_PATH = "test_data.json"       
INPUT_SCORES_PATH = "submission_metric_shuffle.csv"    
OUTPUT_CSV_PATH = "updated_submission_metric_shuffle.csv"   
FASTTEXT_PATH = "lid.176.bin" 


In [9]:
# Keyword mapping
KEYWORD_MAPPING = {
    "rejection_rate": ["rejection"],
    "ner_performance_for_the_relevant_entities": ["ner", "entities"],
    "confidence_agreement": ["confidence"],
    "bias_assessment": ["bias"],
    "privacy_leakage": ["privacy"],
    "data_governance_policies": ["governance", "data"],
    "topic_drift_rate": ["topic"]
}


In [10]:

def detect_language(text, model):
    """Returns language code (e.g., 'en', 'fr')"""
    if model is None or pd.isna(text) or str(text).strip() == "":
        return 'en' 
    
    clean_text = str(text).replace("\n", " ")
    try:
        predictions = model.predict(clean_text)
        lang = predictions[0][0].replace("__label__", "")
        return lang
    except:
        return 'en'

def translate_to_english(client, text):
    """Uses Groq to translate text to English."""
    if pd.isna(text) or str(text).strip() == "":
        return ""
        
    system_instruction = "You are a translator. Translate the following text into English. Output ONLY the translation, nothing else."
    
    try:
        chat_completion = client.chat.completions.create(
            messages=[
                {"role": "system", "content": system_instruction},
                {"role": "user", "content": text}
            ],
            model="llama-3.3-70b-versatile",
            temperature=0,
            max_tokens=1024,
        )
        return chat_completion.choices[0].message.content.strip()
    except Exception as e:
        print(f"Translation Error: {e}")
        return text # Fallback to original

def check_specific_keywords(text, metric_name):
    """
    Checks if the text contains the specific keywords mapped to the metric.
    """
    if pd.isna(text): return False
    text_lower = str(text).lower()
    
    # Get keywords for this metric
    target_words = KEYWORD_MAPPING.get(metric_name, [])
    
    if not target_words:
        return False
        
    # Check if ANY of the target words are in the text
    for word in target_words:
        if word in text_lower:
            return True
            
    return False

In [None]:
# 1. Init Models
client = Groq(api_key=GROQ_API_KEY)
try:
    ft_model = fasttext.load_model(FASTTEXT_PATH)
    print("FastText model loaded.")
except:
    print("FastText model not found. Defaulting to English.")
    ft_model = None
# 2. Load Data
df_prompts = pd.read_json(INPUT_JSON_PATH)
if 'id' not in df_prompts.columns:
    df_prompts['id'] = range(1, len(df_prompts) + 1)
df_scores = pd.read_csv(INPUT_SCORES_PATH)
# Merge
merged_df = pd.merge(df_scores, df_prompts, on='id', how='left')
merged_df=merged_df[:500]

print(f"Processing {len(merged_df)} rows...")
boost_count = 0
# 3. Process Rows
for index, row in tqdm(merged_df.iterrows(), total=merged_df.shape[0]):
    
    metric = row['metric_name']
    sys_p = row.get('system_prompt', '')
    usr_p = row.get('user_prompt', '')
    current_score = row['score']
    
    # Only process if we have a keyword rule for this metric
    if metric in KEYWORD_MAPPING:
        
        # A. Detect Language on User Prompt
        lang = detect_language(usr_p, ft_model)
        
        # B. Prepare English Text
        final_text_to_check = ""
        
        if lang != 'en':
            # Translate System Prompt
            if pd.notna(sys_p) and str(sys_p).strip() != "":
                sys_p_en = translate_to_english(client, sys_p)
            else:
                sys_p_en = ""
                
            # Translate User Prompt
            usr_p_en = translate_to_english(client, usr_p)
            
            final_text_to_check = f"{sys_p_en} {usr_p_en}"
        else:
            # Already English
            final_text_to_check = f"{sys_p} {usr_p}"

        # C. Check Keywords in the (Translated) Prompts
        has_keyword = check_specific_keywords(final_text_to_check, metric)
        
        # D. Apply Logic: If Present AND Score < 5 -> Set to 7
        if has_keyword:
            if current_score < 5.0:
                to_add= 7.0-current_score
                np.floor(to_add)
                merged_df.at[index, 'score'] = current_score+to_add
                boost_count += 1
# 4. Save
final_df = merged_df[['id', 'score']]
final_df.to_csv(OUTPUT_CSV_PATH, index=False)
print("-" * 30)
print(f"Processing Complete.")
print(f"Total scores boosted: {boost_count}")
print(f"Saved to: {OUTPUT_CSV_PATH}")

FastText model loaded.
Processing 500 rows...


100%|██████████| 500/500 [00:45<00:00, 10.98it/s]

------------------------------
Processing Complete.
Total scores boosted: 82
Saved to: updated_submission_metric_shuffle.csv



