In [None]:
# Notebook 3: Hindi Healthcare RAG Experiments

# ============================================================================
# Cell 1 - Setup and Imports
# ============================================================================

import sys
import os
sys.path.append('..')

import json
import time
import logging
from pathlib import Path
import pandas as pd
from datetime import datetime
import openai
from dotenv import load_dotenv
from typing import Dict, List, Tuple, Any, Optional
import numpy as np
import random
from sklearn.metrics.pairwise import cosine_similarity

from src.rag_system import MultilingualRAG
from src.utils import translate_text
from langchain.schema import Document

# SET THE API KEY IN ENVIRONMENT
OPEN_API_KEY = "your key here"
os.environ["OPENAI_API_KEY"] = OPEN_API_KEY

if OPEN_API_KEY:
    openai.api_key = OPEN_API_KEY
    print(f"✓ OpenAI API Key set (version: {openai.__version__})")
else:
    print("⚠️  OpenAI API Key not found")

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

ROOT_DIR = Path('..').resolve()
PROCESSED_DATA_DIR = ROOT_DIR / 'data' / 'processed'
EMBEDDINGS_DIR = ROOT_DIR / 'data' / 'embeddings'
RESULTS_DIR = ROOT_DIR / 'results'
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

print(f"Root directory: {ROOT_DIR}")
print(f"Results directory: {RESULTS_DIR}")

# ============================================================================
# Cell 2 - Load Processed Documents
# ============================================================================

processed_path = PROCESSED_DATA_DIR / 'processed_documents.json'

with open(processed_path, 'r', encoding='utf-8') as f:
    docs_data = json.load(f)

documents = []
for doc_dict in docs_data:
    doc = Document(
        page_content=doc_dict['content'],
        metadata=doc_dict['metadata']
    )
    documents.append(doc)

print(f"✓ Loaded {len(documents)} documents")

# ============================================================================
# Cell 3 - Define Hindi Test Questions
# ============================================================================

HINDI_TEST_QUESTIONS = {'condition': [{'category': 'condition',
                'complexity': 'simple',
                'english': 'What is insulin resistance?',
                'hindi': 'इंसुलिन प्रतिरोध क्या है?'},
               {'category': 'condition',
                'complexity': 'moderate',
                'english': 'What are the heart disease risk factors that can be changed?',
                'hindi': 'हृदय रोग के जोखिम कारक कौन-कौन से हैं जिन्हें बदला जा सकता है?'},
               {'category': 'condition',
                'complexity': 'moderate',
                'english': 'How does alcohol consumption affect the risk of heart disease?',
                'hindi': 'शराब का सेवन हृदय रोग के जोखिम को कैसे प्रभावित करता है?'},
               {'category': 'condition',
                'complexity': 'simple',
                'english': 'When does your risk increase if your mother or sister has heart disease?',
                'hindi': 'मदर या बहन को हृदय रोग होने पर आपकी जोखिम कब बढ़ जाती है?'},
               {'category': 'condition',
                'complexity': 'simple',
                'english': 'What is the daily alcohol consumption limit for men and women?',
                'hindi': 'दैनिक शराब सेवन की सीमा पुरुषों और महिलाओं के लिए क्या है?'},
               {'category': 'condition',
                'complexity': 'simple',
                'english': 'Which group of Asian Americans has higher rates of heart disease?',
                'hindi': 'किस समूह के एशियाई अमेरिकियों में हृदय रोग की दर अधिक होती है?'},
               {'category': 'condition',
                'complexity': 'moderate',
                'english': 'What is the relationship between stress and heart disease?',
                'hindi': 'तनाव का हृदय रोग से क्या संबंध है?'},
               {'category': 'condition',
                'complexity': 'moderate',
                'english': 'What is the significance of family history in heart disease?',
                'hindi': 'हृदय रोग के लिए पारिवारिक इतिहास का क्या महत्व है?'},
               {'category': 'condition',
                'complexity': 'simple',
                'english': 'When does your risk increase if your mother or sister has heart disease?',
                'hindi': 'मदर या बहन के हृदय रोग से पीड़ित होने पर आपकी जोखिम कब बढ़ जाती है?'},
               {'category': 'condition',
                'complexity': 'simple',
                'english': 'When does your risk increase if your mother or sister has heart disease?',
                'hindi': 'मदर या बहन के हृदय रोग से पीड़ित होने पर आपकी जोखिम कब बढ़ जाती है?'}],
 'medication': [{'category': 'medication',
                 'complexity': 'simple',
                 'english': 'What is mepivacaine hydrochloride used for?',
                 'hindi': 'मेपिवाकेन हाइड्रोक्लोराइड का उपयोग किस लिए किया जाता है?'},
                {'category': 'medication',
                 'complexity': 'simple',
                 'english': 'Which medication is used as a narcotic analgesic?',
                 'hindi': 'मादक दर्दनाशक के रूप में कौन सी दवा उपयोग की जाती है?'},
                {'category': 'medication',
                 'complexity': 'simple',
                 'english': 'What is the pH level of the narcotic analgesic injection?',
                 'hindi': 'मादक दर्दनाशक इंजेक्शन का पीएच स्तर क्या होता है?'},
                {'category': 'medication',
                 'complexity': 'complex',
                 'english': 'What is the structural formula of meperidine hydrochloride?',
                 'hindi': 'मेपिवाकेन हाइड्रोक्लोराइड का संरचनात्मक सूत्र क्या है?'},
                {'category': 'medication',
                 'complexity': 'complex',
                 'english': 'How does the process of drug binding to plasma proteins occur?',
                 'hindi': 'दवा की प्लाज्मा प्रोटीन से बंधन की प्रक्रिया कैसे होती है?'},
                {'category': 'medication',
                 'complexity': 'moderate',
                 'english': 'What type of anesthesia is mepivacaine used for?',
                 'hindi': 'किस प्रकार की एनेस्थीसिया के लिए मेपिवाकेन का उपयोग किया जाता है?'},
                {'category': 'medication',
                 'complexity': 'simple',
                 'english': 'What is the quantity of meperidine hydrochloride in a narcotic analgesic injection?',
                 'hindi': 'मादक दर्दनाशक इंजेक्शन में मेपरिडीन हाइड्रोक्लोराइड की मात्रा क्या होती है?'},
                {'category': 'medication',
                 'complexity': 'simple',
                 'english': 'What is the taste of the narcotic analgesic injection?',
                 'hindi': 'मादक दर्दनाशक इंजेक्शन का स्वाद कैसा होता है?'},
                {'category': 'medication',
                 'complexity': 'simple',
                 'english': 'What is the taste of the narcotic analgesic injection?',
                 'hindi': 'मादक दर्दनाशक इंजेक्शन का स्वाद कैसा होता है?'}],
 'side_effect': [{'category': 'side_effect',
                  'complexity': 'complex',
                  'english': 'What adverse reactions have been observed with the use of Zoledronic Acid Injection?',
                  'hindi': 'ज़ोलेंड्रोनिक एसिड इंजेक्शन के उपयोग में कौन से प्रतिकूल प्रतिक्रियाएं देखी गई हैं?'},
                 {'category': 'side_effect',
                  'complexity': 'complex',
                  'english': 'Why can renal deterioration occur after Zoledronic Acid Injection?',
                  'hindi': 'ज़ोलेंड्रोनिक एसिड इंजेक्शन के बाद गुर्दे की खराबी क्यों हो सकती है?'},
                 {'category': 'side_effect',
                  'complexity': 'complex',
                  'english': 'Why can renal deterioration occur after Zoledronic Acid Injection?',
                  'hindi': 'ज़ोलेंड्रोनिक एसिड इंजेक्शन के बाद गुर्दे की खराबी क्यों हो सकती है?'}],
 'treatment': [{'category': 'treatment',
                'complexity': 'moderate',
                'english': 'Why is quitting smoking important to reduce the risk of heart disease?',
                'hindi': 'हृदय रोग के जोखिम को कम करने के लिए धूम्रपान छोड़ना क्यों महत्वपूर्ण है?'},
               {'category': 'treatment',
                'complexity': 'moderate',
                'english': 'Why is it necessary to quit alcohol to reduce the risk of heart disease?',
                'hindi': 'हृदय रोग के जोखिम को कम करने के लिए शराब छोड़ना क्यों आवश्यक है?'},
               {'category': 'treatment',
                'complexity': 'complex',
                'english': 'What are the types of injection techniques for narcotic analgesic injections?',
                'hindi': 'मादक दर्दनाशक इंजेक्शन के लिए किस प्रकार की इंजेक्शन तकनीकें हैं?'},
               {'category': 'treatment',
                'complexity': 'moderate',
                'english': 'What lifestyle changes are necessary for heart disease?',
                'hindi': 'हृदय रोग के लिए कौन से जीवनशैली परिवर्तन आवश्यक हैं?'},
               {'category': 'treatment',
                'complexity': 'simple',
                'english': 'What type of pain is a narcotic analgesic injection used for?',
                'hindi': 'मादक दर्दनाशक इंजेक्शन का उपयोग किस प्रकार के दर्द के लिए किया जाता है?'},
               {'category': 'treatment',
                'complexity': 'moderate',
                'english': 'What measures can be taken to reduce the risk of heart disease?',
                'hindi': 'हृदय रोग के जोखिम को कम करने के लिए कौन से उपाय किए जा सकते हैं?'},
               {'category': 'treatment',
                'complexity': 'simple',
                'english': 'What type of pain is a narcotic analgesic injection used for?',
                'hindi': 'मादक दर्दनाशक इंजेक्शन का उपयोग किस प्रकार के दर्द के लिए किया जाता है?'},
               {'category': 'treatment',
                'complexity': 'moderate',
                'english': 'What measures can be taken to reduce the risk of heart disease?',
                'hindi': 'हृदय रोग के जोखिम को कम करने के लिए कौन से उपाय किए जा सकते हैं?'}]}

all_questions = []
for domain, questions in HINDI_TEST_QUESTIONS.items():
    all_questions.extend(questions)

hindi_questions = [q['hindi'] for q in all_questions]
english_references = [q['english'] for q in all_questions]

print(f"\n✓ Loaded {len(hindi_questions)} Hindi test questions")
print("\nSample questions:")
for i, (hi, en) in enumerate(zip(hindi_questions[:3], english_references[:3]), 1):
    print(f"\n{i}. {hi}")
    print(f"   ({en})")

# ============================================================================
# Cell 4 - Initialize RAG System
# ============================================================================

print("\nInitializing RAG system...")

rag_system = MultilingualRAG(
    embedding_model='sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2',
    vector_store_type='faiss',
    persist_directory=str(EMBEDDINGS_DIR / 'multilingual'),
    llm_model="gpt-4o"  # Using GPT-4
)

vector_store_path = EMBEDDINGS_DIR / 'multilingual' / 'faiss'
if not vector_store_path.exists():
    print("Creating vector store...")
    rag_system.create_vector_store(documents)
else:
    print("Loading existing vector store...")
    rag_system.load_vector_store()

print("✓ RAG system ready")

# ============================================================================
# Cell 5 - Enhanced Experiment Runner Class (FIXED TO SAVE CHUNKS)
# ============================================================================

class HindiHealthcareRAGExperiment:
    """
    Experiment runner comparing two approaches for Hindi healthcare QA:
    1. Multilingual embeddings: Hindi→Retrieve directly→Generate in Hindi
    2. Translation-based: Hindi→English→Retrieve→Generate→Hindi
    """
    
    def __init__(self, rag_system, has_openai_key: bool = False):
        self.rag_system = rag_system
        self.has_openai_key = has_openai_key
        self.system_prompt = (
            "You are a helpful healthcare information assistant. "
            "Based on the provided context from CDC, FDA, and medical literature, "
            "answer the question accurately and concisely in the SAME LANGUAGE as the question. "
            "If the context doesn't contain relevant information, say so clearly."
        )
    
    def _retrieve_with_metrics(
        self, 
        query: str,
        use_multilingual: bool,
        k: int = 5
    ) -> Tuple[List, str, float, List[float]]:
        """Retrieve documents with detailed metrics"""
        start = time.time()
        
        self.rag_system.use_multilingual_embeddings = use_multilingual
        
        try:
            docs_with_scores = self.rag_system.vector_store.similarity_search_with_score(
                query, k=k
            )
        except (AttributeError, NotImplementedError):
            docs = self.rag_system.vector_store.similarity_search(query, k=k)
            docs_with_scores = [(doc, 1.0 - i*0.05) for i, doc in enumerate(docs)]
        
        documents = [doc for doc, score in docs_with_scores]
        scores = [float(score) for doc, score in docs_with_scores]
        
        retrieval_time = time.time() - start
        
        return documents, query, retrieval_time, scores
    
    def _generate_with_metrics(
        self,
        question: str,
        documents: List,
        target_language: str = "hi"
    ) -> Tuple[str, float, str]:  # ← FIXED: Returns 3 values now
        """Generate response in target language"""
        start = time.time()
        
        # FIXED: Now unpacks 3 values including chunks
        response, gen_time, chunks = self.rag_system.generate_response(
            question, documents, target_language, self.system_prompt
        )
        
        total_time = time.time() - start
        return response, total_time, chunks  # ← Returns chunks
    
    def _translate_for_eval(
        self, 
        text: str, 
        src_lang: str = "hi",
        dest_lang: str = "en"
    ) -> Tuple[str, float]:
        """Translate text for evaluation purposes"""
        start = time.time()
        translated = translate_text(text, src=src_lang, dest=dest_lang)
        trans_time = time.time() - start
        
        return translated, trans_time
    
    def run_approach_multilingual(
        self,
        hindi_question: str,
        k: int = 5
    ) -> Dict[str, Any]:
        """Approach A: Direct multilingual embeddings"""
        result = {}
        
        try:
            # Step 1: Retrieve
            docs, eff_query, ret_time, scores = self._retrieve_with_metrics(
                query=hindi_question,
                use_multilingual=True,
                k=k
            )
            
            result['multilingual_retrieval_time'] = ret_time
            result['multilingual_retrieved_chunks'] = [d.page_content[:300] for d in docs]
            result['multilingual_chunk_scores'] = scores
            result['multilingual_chunk_sources'] = [d.metadata.get('source', 'Unknown') for d in docs]
            result['multilingual_effective_query'] = eff_query
            
            # Step 2: Generate - FIXED TO GET CHUNKS
            response_hindi, gen_time, chunks = self._generate_with_metrics(
                hindi_question, docs, target_language="hi"
            )
            
            result['multilingual_response_hindi'] = response_hindi
            result['multilingual_generation_time'] = gen_time
            result['multilingual_chunks'] = chunks  # ← SAVES FULL CHUNKS
            
            # Step 3: Translate for eval
            response_en, trans_time = self._translate_for_eval(
                response_hindi, src_lang="hi", dest_lang="en"
            )
            
            result['multilingual_response_english_for_eval'] = response_en
            result['multilingual_response_translation_time'] = trans_time
            
            result['multilingual_total_time'] = ret_time + gen_time + trans_time
            
            logging.info(f"  ✓ Multilingual: {result['multilingual_total_time']:.2f}s")
            
        except Exception as e:
            logging.error(f"  ✗ Multilingual failed: {str(e)}")
            result = self._get_error_result('multilingual', str(e))
        
        return result
    
    def run_approach_translation(
        self,
        hindi_question: str,
        k: int = 5
    ) -> Dict[str, Any]:
        """Approach B: Translation-based pipeline"""
        result = {}
        
        try:
            # Step 1: Translate question
            question_en, q_trans_time = self._translate_for_eval(
                hindi_question, src_lang="hi", dest_lang="en"
            )
            
            result['translation_question_english'] = question_en
            result['translation_question_translation_time'] = q_trans_time
            
            # Step 2: Retrieve
            docs, eff_query, ret_time, scores = self._retrieve_with_metrics(
                query=question_en,
                use_multilingual=False,
                k=k
            )
            
            result['translation_retrieval_time'] = ret_time
            result['translation_retrieved_chunks'] = [d.page_content[:300] for d in docs]
            result['translation_chunk_scores'] = scores
            result['translation_chunk_sources'] = [d.metadata.get('source', 'Unknown') for d in docs]
            result['translation_effective_query'] = eff_query
            
            # Step 3: Generate - FIXED TO GET CHUNKS
            response_en, gen_time, chunks = self._generate_with_metrics(
                question_en, docs, target_language="en"
            )
            
            result['translation_response_english'] = response_en
            result['translation_generation_time'] = gen_time
            result['translation_chunks'] = chunks  # ← SAVES FULL CHUNKS
            
            # Step 4: Translate response
            response_hindi, resp_trans_time = self._translate_for_eval(
                response_en, src_lang="en", dest_lang="hi"
            )
            
            result['translation_response_hindi'] = response_hindi
            result['translation_response_translation_time'] = resp_trans_time
            result['translation_response_english_for_eval'] = response_en
            
            result['translation_total_time'] = (
                q_trans_time + ret_time + gen_time + resp_trans_time
            )
            
            logging.info(f"  ✓ Translation: {result['translation_total_time']:.2f}s")
            
        except Exception as e:
            logging.error(f"  ✗ Translation failed: {str(e)}")
            result = self._get_error_result('translation', str(e))
        
        return result
    
    def _get_error_result(self, approach: str, error_msg: str) -> Dict[str, Any]:
        """Generate error result dictionary"""
        return {
            f'{approach}_error': error_msg,
            f'{approach}_retrieval_time': 0.0,
            f'{approach}_generation_time': 0.0,
            f'{approach}_total_time': 0.0,
            f'{approach}_response_hindi': f"ERROR: {error_msg}",
            f'{approach}_response_english_for_eval': f"ERROR: {error_msg}",
            f'{approach}_retrieved_chunks': [],
            f'{approach}_chunk_scores': [],
            f'{approach}_chunk_sources': [],
            f'{approach}_chunks': "",  # ← ADDED
        }
    
    def run_single_question(
        self,
        hindi_question: str,
        question_id: str,
        english_equivalent: Optional[str] = None,
        k: int = 5
    ) -> Dict[str, Any]:
        """Run complete experiment for a single Hindi question"""
        result = {
            'question_id': question_id,
            'question_hindi': hindi_question,
            'question_english_reference': english_equivalent or '',
            'system_prompt': self.system_prompt,
            'num_chunks_retrieved': k,
        }
        
        print(f"  Running Approach A (Multilingual embeddings)...")
        result.update(self.run_approach_multilingual(hindi_question, k))
        
        print(f"  Running Approach B (Translation pipeline)...")
        result.update(self.run_approach_translation(hindi_question, k))
        
        return result
    
    def run_experiments(
        self,
        hindi_questions: List[str],
        english_references: Optional[List[str]] = None,
        output_path: str = None,
        k: int = 5
    ) -> pd.DataFrame:
        """Run experiments for multiple Hindi questions"""
        if english_references and len(english_references) != len(hindi_questions):
            raise ValueError("Number of English references must match Hindi questions")
        
        results = []
        
        print(f"\nRunning Hindi Healthcare RAG Experiments")
        print(f"Total questions: {len(hindi_questions)}")
        print("=" * 80)
        
        for idx, hindi_q in enumerate(hindi_questions, 1):
            print(f"\n[{idx}/{len(hindi_questions)}] {hindi_q}")
            
            question_id = f"Q{idx:03d}"
            english_ref = english_references[idx-1] if english_references else None
            
            result = self.run_single_question(
                hindi_question=hindi_q,
                question_id=question_id,
                english_equivalent=english_ref,
                k=k
            )
            results.append(result)
            
            time.sleep(0.3)
        
        df = pd.DataFrame(results)
        
        if output_path:
            Path(output_path).parent.mkdir(parents=True, exist_ok=True)
            df.to_csv(output_path, index=False, encoding='utf-8')
            print("\n" + "=" * 80)
            print(f"✓ Saved results to {output_path}")
            print(f"  Shape: {df.shape}")
        
        return df

# ============================================================================
# Cell 6 - Run Experiments
# ============================================================================

experiment_runner = HindiHealthcareRAGExperiment(
    rag_system=rag_system,
    has_openai_key=bool(OPEN_API_KEY)
)

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output_path = RESULTS_DIR / f'hindi_healthcare_rag_{timestamp}.csv'

df_results = experiment_runner.run_experiments(
    hindi_questions=hindi_questions,
    english_references=english_references,
    output_path=str(output_path),
    k=5
)

# ============================================================================
# Cell 7 - Verify Chunks Were Saved
# ============================================================================

print("\n" + "=" * 80)
print("VERIFYING CHUNKS WERE SAVED")
print("=" * 80)

# Check columns
print(f"\n✓ Total columns: {len(df_results.columns)}")
print(f"\n✓ Column names:")
for col in df_results.columns:
    print(f"  - {col}")

# Check if chunks exist
has_multi_chunks = 'multilingual_chunks' in df_results.columns
has_trans_chunks = 'translation_chunks' in df_results.columns

if has_multi_chunks and has_trans_chunks:
    print("\n✅ SUCCESS! Chunks were saved!")
    print(f"\nSample multilingual chunk (first 300 chars):")
    print(df_results['multilingual_chunks'].iloc[0][:300] + "...")
    print(f"\nSample translation chunk (first 300 chars):")
    print(df_results['translation_chunks'].iloc[0][:300] + "...")
else:
    print("\n❌ ERROR: Chunks were NOT saved!")
    if not has_multi_chunks:
        print("  Missing: multilingual_chunks")
    if not has_trans_chunks:
        print("  Missing: translation_chunks")

print("\n" + "=" * 80)
print("✓ Experiments complete! Ready for Notebook 4 and 5")
print("=" * 80)

✓ OpenAI API Key set (version: 1.35.0)
Root directory: C:\Users\Boris\Desktop\code\multilingual-rag
Results directory: C:\Users\Boris\Desktop\code\multilingual-rag\results
✓ Loaded 16036 documents

✓ Loaded 30 Hindi test questions

Sample questions:

1. इंसुलिन प्रतिरोध क्या है?
   (What is insulin resistance?)

2. हृदय रोग के जोखिम कारक कौन-कौन से हैं जिन्हें बदला जा सकता है?
   (What are the heart disease risk factors that can be changed?)

3. शराब का सेवन हृदय रोग के जोखिम को कैसे प्रभावित करता है?
   (How does alcohol consumption affect the risk of heart disease?)

Initializing RAG system...


2025-10-15 01:05:32,533 - INFO - Load pretrained SentenceTransformer: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2


Loading existing vector store...


2025-10-15 01:05:35,808 - INFO - Loading faiss with AVX2 support.
2025-10-15 01:05:35,809 - INFO - Could not load library with AVX2 support due to:
ModuleNotFoundError("No module named 'faiss.swigfaiss_avx2'")
2025-10-15 01:05:35,809 - INFO - Loading faiss.
2025-10-15 01:05:35,826 - INFO - Successfully loaded faiss.
2025-10-15 01:05:36,151 - INFO - Loaded faiss vector store


✓ RAG system ready

Running Hindi Healthcare RAG Experiments
Total questions: 30

[1/30] इंसुलिन प्रतिरोध क्या है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:05:38,385 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:05:41,712 - INFO -   ✓ Multilingual: 5.55s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:05:45,924 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:05:51,724 - INFO -   ✓ Translation: 10.01s



[2/30] हृदय रोग के जोखिम कारक कौन-कौन से हैं जिन्हें बदला जा सकता है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:05:55,012 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:05:58,243 - INFO -   ✓ Multilingual: 6.22s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:06:02,504 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:06:03,675 - INFO -   ✓ Translation: 5.43s



[3/30] शराब का सेवन हृदय रोग के जोखिम को कैसे प्रभावित करता है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:06:05,499 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:06:08,823 - INFO -   ✓ Multilingual: 4.84s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:06:11,295 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:06:12,763 - INFO -   ✓ Translation: 3.94s



[4/30] मदर या बहन को हृदय रोग होने पर आपकी जोखिम कब बढ़ जाती है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:06:14,734 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:06:18,663 - INFO -   ✓ Multilingual: 5.59s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:06:20,808 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:06:22,631 - INFO -   ✓ Translation: 3.96s



[5/30] दैनिक शराब सेवन की सीमा पुरुषों और महिलाओं के लिए क्या है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:06:26,593 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:06:29,440 - INFO -   ✓ Multilingual: 6.50s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:06:32,622 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:06:34,496 - INFO -   ✓ Translation: 5.05s



[6/30] किस समूह के एशियाई अमेरिकियों में हृदय रोग की दर अधिक होती है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:06:35,980 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:06:38,884 - INFO -   ✓ Multilingual: 4.09s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:06:41,135 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:06:42,392 - INFO -   ✓ Translation: 3.51s



[7/30] तनाव का हृदय रोग से क्या संबंध है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:06:46,605 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:06:50,094 - INFO -   ✓ Multilingual: 7.40s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:06:54,271 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:06:56,205 - INFO -   ✓ Translation: 6.11s



[8/30] हृदय रोग के लिए पारिवारिक इतिहास का क्या महत्व है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:06:59,507 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:07:02,251 - INFO -   ✓ Multilingual: 5.74s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:07:07,563 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:07:08,991 - INFO -   ✓ Translation: 6.74s



[9/30] मदर या बहन के हृदय रोग से पीड़ित होने पर आपकी जोखिम कब बढ़ जाती है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:07:12,943 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:07:15,830 - INFO -   ✓ Multilingual: 6.54s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:07:18,416 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:07:18,962 - INFO -   ✓ Translation: 3.13s



[10/30] मदर या बहन के हृदय रोग से पीड़ित होने पर आपकी जोखिम कब बढ़ जाती है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:07:22,428 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:07:24,886 - INFO -   ✓ Multilingual: 5.62s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:07:26,743 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:07:27,764 - INFO -   ✓ Translation: 2.88s



[11/30] मेपिवाकेन हाइड्रोक्लोराइड का उपयोग किस लिए किया जाता है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:07:29,406 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:07:32,261 - INFO -   ✓ Multilingual: 4.19s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:07:34,428 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:07:35,843 - INFO -   ✓ Translation: 3.58s



[12/30] मादक दर्दनाशक के रूप में कौन सी दवा उपयोग की जाती है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:07:37,244 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:07:39,248 - INFO -   ✓ Multilingual: 3.10s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:07:41,349 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:07:43,090 - INFO -   ✓ Translation: 3.84s



[13/30] मादक दर्दनाशक इंजेक्शन का पीएच स्तर क्या होता है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:07:45,376 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:07:48,350 - INFO -   ✓ Multilingual: 4.96s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:07:50,697 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:07:51,531 - INFO -   ✓ Translation: 3.18s



[14/30] मेपिवाकेन हाइड्रोक्लोराइड का संरचनात्मक सूत्र क्या है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:07:53,434 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:07:55,250 - INFO -   ✓ Multilingual: 3.42s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:07:57,257 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:07:58,459 - INFO -   ✓ Translation: 3.21s



[15/30] दवा की प्लाज्मा प्रोटीन से बंधन की प्रक्रिया कैसे होती है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:08:00,502 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:08:02,759 - INFO -   ✓ Multilingual: 4.00s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:08:05,493 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:08:06,479 - INFO -   ✓ Translation: 3.72s



[16/30] किस प्रकार की एनेस्थीसिया के लिए मेपिवाकेन का उपयोग किया जाता है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:08:08,862 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:08:11,585 - INFO -   ✓ Multilingual: 4.80s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:08:13,713 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:08:14,802 - INFO -   ✓ Translation: 3.22s



[17/30] मादक दर्दनाशक इंजेक्शन में मेपरिडीन हाइड्रोक्लोराइड की मात्रा क्या होती है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:08:17,701 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:08:19,907 - INFO -   ✓ Multilingual: 4.80s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:08:22,859 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:08:24,405 - INFO -   ✓ Translation: 4.49s



[18/30] मादक दर्दनाशक इंजेक्शन का स्वाद कैसा होता है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:08:25,839 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:08:28,063 - INFO -   ✓ Multilingual: 3.36s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:08:30,195 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:08:31,619 - INFO -   ✓ Translation: 3.56s



[19/30] मादक दर्दनाशक इंजेक्शन का स्वाद कैसा होता है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:08:33,732 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:08:35,376 - INFO -   ✓ Multilingual: 3.45s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:08:37,528 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:08:38,880 - INFO -   ✓ Translation: 3.50s



[20/30] ज़ोलेंड्रोनिक एसिड इंजेक्शन के उपयोग में कौन से प्रतिकूल प्रतिक्रियाएं देखी गई हैं?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:08:40,379 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:08:42,704 - INFO -   ✓ Multilingual: 3.52s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:08:46,163 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:08:47,958 - INFO -   ✓ Translation: 5.25s



[21/30] ज़ोलेंड्रोनिक एसिड इंजेक्शन के बाद गुर्दे की खराबी क्यों हो सकती है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:08:50,454 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:08:52,452 - INFO -   ✓ Multilingual: 4.19s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:08:56,002 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:08:57,653 - INFO -   ✓ Translation: 5.20s



[22/30] ज़ोलेंड्रोनिक एसिड इंजेक्शन के बाद गुर्दे की खराबी क्यों हो सकती है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:09:01,942 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:09:05,270 - INFO -   ✓ Multilingual: 7.32s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:09:08,869 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:09:10,463 - INFO -   ✓ Translation: 5.19s



[23/30] हृदय रोग के जोखिम को कम करने के लिए धूम्रपान छोड़ना क्यों महत्वपूर्ण है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:09:12,570 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:09:15,868 - INFO -   ✓ Multilingual: 5.10s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:09:18,682 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:09:20,200 - INFO -   ✓ Translation: 4.33s



[24/30] हृदय रोग के जोखिम को कम करने के लिए शराब छोड़ना क्यों आवश्यक है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:09:23,624 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:09:25,889 - INFO -   ✓ Multilingual: 5.38s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:09:28,536 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:09:30,226 - INFO -   ✓ Translation: 4.34s



[25/30] मादक दर्दनाशक इंजेक्शन के लिए किस प्रकार की इंजेक्शन तकनीकें हैं?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:09:32,094 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:09:34,624 - INFO -   ✓ Multilingual: 4.09s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:09:37,635 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:09:38,552 - INFO -   ✓ Translation: 3.93s



[26/30] हृदय रोग के लिए कौन से जीवनशैली परिवर्तन आवश्यक हैं?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:09:41,949 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:09:44,789 - INFO -   ✓ Multilingual: 5.93s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:09:47,351 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:09:48,932 - INFO -   ✓ Translation: 4.14s



[27/30] मादक दर्दनाशक इंजेक्शन का उपयोग किस प्रकार के दर्द के लिए किया जाता है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:09:52,759 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:09:54,782 - INFO -   ✓ Multilingual: 5.55s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:09:57,091 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:09:57,980 - INFO -   ✓ Translation: 3.20s



[28/30] हृदय रोग के जोखिम को कम करने के लिए कौन से उपाय किए जा सकते हैं?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:10:02,626 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:10:05,962 - INFO -   ✓ Multilingual: 7.68s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:10:11,994 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:10:13,602 - INFO -   ✓ Translation: 7.64s



[29/30] मादक दर्दनाशक इंजेक्शन का उपयोग किस प्रकार के दर्द के लिए किया जाता है?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:10:15,938 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:10:19,344 - INFO -   ✓ Multilingual: 5.44s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:10:21,650 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:10:22,323 - INFO -   ✓ Translation: 2.98s



[30/30] हृदय रोग के जोखिम को कम करने के लिए कौन से उपाय किए जा सकते हैं?
  Running Approach A (Multilingual embeddings)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:10:27,218 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:10:29,817 - INFO -   ✓ Multilingual: 7.19s


  Running Approach B (Translation pipeline)...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-10-15 01:10:35,127 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-10-15 01:10:37,211 - INFO -   ✓ Translation: 7.39s



✓ Saved results to C:\Users\Boris\Desktop\code\multilingual-rag\results\hindi_healthcare_rag_20251015_010536.csv
  Shape: (30, 30)

VERIFYING CHUNKS WERE SAVED

✓ Total columns: 30

✓ Column names:
  - question_id
  - question_hindi
  - question_english_reference
  - system_prompt
  - num_chunks_retrieved
  - multilingual_retrieval_time
  - multilingual_retrieved_chunks
  - multilingual_chunk_scores
  - multilingual_chunk_sources
  - multilingual_effective_query
  - multilingual_response_hindi
  - multilingual_generation_time
  - multilingual_chunks
  - multilingual_response_english_for_eval
  - multilingual_response_translation_time
  - multilingual_total_time
  - translation_question_english
  - translation_question_translation_time
  - translation_retrieval_time
  - translation_retrieved_chunks
  - translation_chunk_scores
  - translation_chunk_sources
  - translation_effective_query
  - translation_response_english
  - translation_generation_time
  - translation_chunks
  - transla