## Environment Setup

In [3]:
import sys

# !pip uninstall -y torch torchvision torchaudio
# !pip cache purge
# !pip install torch==2.7.0+cu118 torchvision==0.22.0+cu118 torchaudio==2.7.0 --index-url https://download.pytorch.org/whl/cu118
# !pip install pydantic==2.10.6

# packages = [
#     "langchain==0.3.25",
#     "langchain_community",
#     "langchain-huggingface",
#     "sentence_transformers",
#     "pypdf",
#     "faiss-cpu",
#     "bitsandbytes",
#     "transformers",
#     "accelerate",
#     "scikit-learn",
#     "gradio==4.44.1",
#     "typer==0.10.0"
# ]
#
# for pkg in packages:
#     print(f"Installing: {pkg}")
#     !{sys.executable} -m pip install -qU {pkg}

print("All packages installed successfully.")

All packages installed successfully.


In [4]:
import torch
import torchvision
import warnings
import logging
import gradio as gr
import json
from typing import List, Tuple, Dict, Any, Optional
from datetime import datetime
import os
import re

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
warnings.filterwarnings("ignore")

# GPU Check
available = torch.cuda.is_available()
print(f"CUDA available: {available}, GPU: {torch.cuda.get_device_name(0) if available else 'none'}")
print(torch.__version__, torchvision.__version__)

CUDA available: True, GPU: NVIDIA GeForce RTX 4070 Laptop GPU
2.7.0+cu118 0.22.0+cu118


# 1. Initial Setup (Model selection & conversation flow)

## 1.1 Configuration: Model & Retrieval Settings

In [5]:
from dataclasses import dataclass
@dataclass
class ModelConfig:
    """Enhanced configuration for the empathetic language model."""
    # Original model for comparison or fallback
    original_model_name: str = "ritvik77/Medical_Doctor_AI_LoRA-Mistral-7B-Instruct_FullModel"
    # Fine-tuned empathetic model path
    empathetic_model_path: str = "./checkpoint1"  # Path to your fine-tuned model
    embedding_model: str = "sentence-transformers/all-mpnet-base-v2"
    max_new_tokens: int = 256
    temperature: float = 0.3
    top_p: float = 0.9
    repetition_penalty: float = 1.1
    device_map: str = "auto"
    load_in_4bit: bool = True
    trust_remote_code: bool = True
    do_sample: bool = True


@dataclass
class RAGConfig:
    """Configuration for RAG retrieval."""
    chunk_size: int = 1000
    chunk_overlap: int = 20
    retriever_k: int = 5
    vector_store_path: str = "./vector_store"
    document_dir: str = "./documents"

## 1.2 Document Handling: Load and Split PDFs

In [6]:
# Import statements for document processing and vector store
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from pathlib import Path

class DocumentProcessor:
    """Loads and splits PDF documents into chunks."""
    def __init__(self, cfg: RAGConfig):
        self.splitter = RecursiveCharacterTextSplitter(
            chunk_size=cfg.chunk_size, chunk_overlap=cfg.chunk_overlap
        )
    
    def load_and_split(self, pdf_paths: List[str]) -> List[Document]:
        docs = []
        for path in pdf_paths:
            loader = PyPDFLoader(path)
            docs.extend(loader.load())
        chunks = self.splitter.split_documents(docs)
        logger.info(f"Split into {len(chunks)} chunks")
        return chunks

## 1.3 Vector Store: Build or Load FAISS Index

In [7]:
class VectorStoreManager:
    """Creates or loads a FAISS vector store."""
    def __init__(self, rag_cfg: RAGConfig, model_cfg: ModelConfig):
        device = "cuda" if torch.cuda.is_available() else "cpu"
        self.embedding = HuggingFaceEmbeddings(
            model_name=model_cfg.embedding_model, model_kwargs={"device": device}
        )
        self.path = rag_cfg.vector_store_path
    
    def create_or_load(self, chunks: List[Document]) -> FAISS:
        if Path(self.path).exists():
            vs = FAISS.load_local(self.path, self.embedding, allow_dangerous_deserialization=True)
            logger.info("Loaded existing FAISS store")
        else:
            vs = FAISS.from_documents(chunks, self.embedding)
            vs.save_local(self.path)
            logger.info("Created new FAISS store")
        return vs

## 1.4 Model Loader: Quantized LLM Pipeline

In [8]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
from langchain.llms import HuggingFacePipeline

class EmpatheticModelManager:
    """Enhanced model manager for empathetic medical responses."""

    def __init__(self, cfg: ModelConfig):
        self.cfg = cfg
        self.llm = None
        self.emotion_detector = EmotionDetector()
        self.is_using_empathetic_model = False

    def get_llm(self) -> HuggingFacePipeline:
        if self.llm is None:
            # Quantization config
            bnb = BitsAndBytesConfig(
                load_in_4bit=self.cfg.load_in_4bit,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_compute_dtype=torch.float16
            )

            # Determine which model to load
            if os.path.exists(self.cfg.empathetic_model_path):
                model_path = self.cfg.empathetic_model_path
                self.is_using_empathetic_model = False
                logger.info("Using fine-tuned model")
            else:
                model_path = self.cfg.original_model_name
                self.is_using_empathetic_model = False
                logger.info("Using base model")

            logger.info(f"Loading model from: {model_path}")
            print(model_path)

            # Load model
            model = AutoModelForCausalLM.from_pretrained(
                model_path,
                quantization_config=bnb,
                device_map=self.cfg.device_map,
                trust_remote_code=self.cfg.trust_remote_code,
                use_cache=True
            )

            tokenizer = AutoTokenizer.from_pretrained(
                model_path,
                trust_remote_code=self.cfg.trust_remote_code
            )
            tokenizer.pad_token = tokenizer.eos_token

            pipe = pipeline(
                "text-generation",
                model=model,
                tokenizer=tokenizer,
                max_new_tokens=self.cfg.max_new_tokens,
                temperature=self.cfg.temperature,
                top_p=self.cfg.top_p,
                repetition_penalty=self.cfg.repetition_penalty,
                device_map=self.cfg.device_map,
                do_sample=self.cfg.do_sample,
                return_full_text=False,
                pad_token_id=tokenizer.eos_token_id,
                eos_token_id=tokenizer.eos_token_id
            )

            self.llm = HuggingFacePipeline(pipeline=pipe)
            logger.info("Empathetic LLM pipeline is ready")
        return self.llm

    def get_model_status(self) -> dict:
        """Get current model status."""
        return {
            "is_empathetic": self.is_using_empathetic_model,
            "model_path": self.cfg.empathetic_model_path if self.is_using_empathetic_model else self.cfg.original_model_name,
            "empathy_builtin": self.is_using_empathetic_model
        }

## 1.5 EmotionDetector

In [9]:
class EmotionDetector:
    """Detects emotion/empathy labels from patient input."""
    
    def __init__(self):
        # Emotion keywords mapping based on your fine-tuning data
        self.emotion_keywords = {
            'afraid': ['afraid', 'scared', 'frightened', 'fear', 'terrified', 'anxious', 'worried', 'nervous'],
            'terrified': ['terrified', 'horrified', 'petrified', 'panic', 'terror', 'nightmare'],
            'angry': ['angry', 'mad', 'furious', 'frustrated', 'irritated', 'annoyed', 'upset', 'rage'],
            'sad': ['sad', 'depressed', 'down', 'miserable', 'unhappy', 'crying', 'tears', 'grief'],
            'happy': ['happy', 'glad', 'cheerful', 'pleased', 'delighted', 'excited', 'thrilled'],
            'joyful': ['joyful', 'ecstatic', 'elated', 'overjoyed', 'blissful', 'euphoric'],
            'proud': ['proud', 'accomplished', 'achieved', 'successful', 'triumph', 'victory'],
            'sentimental': ['nostalgic', 'memories', 'remember', 'reminisce', 'touching', 'meaningful'],
            'jealous': ['jealous', 'envious', 'envy', 'resentful', 'bitter'],
            'faithful': ['loyal', 'devoted', 'committed', 'dedicated', 'faithful', 'trust']
        }
        
        self.emotion_contexts = {
            'afraid': 'The patient is expressing fear and anxiety. Respond with reassurance and understanding.',
            'terrified': 'The patient is experiencing terror and fear. Provide calm, reassuring support.',
            'proud': 'The patient is sharing something positive. Acknowledge their achievement warmly.',
            'joyful': 'The patient is expressing joy and happiness. Share in their positive emotions appropriately.',
            'sentimental': 'The patient is sharing a meaningful memory. Show empathy and emotional support.',
            'angry': 'The patient is expressing frustration or anger. Validate their feelings and offer support.',
            'sad': 'The patient is experiencing sadness. Provide comfort and understanding.',
            'happy': 'The patient is expressing joy. Share in their positive emotions appropriately.',
            'jealous': 'The patient is feeling envious or jealous. Acknowledge their feelings with understanding.',
            'faithful': 'The patient is discussing loyalty or faithfulness. Provide supportive guidance.',
            'neutral': 'Respond with empathy and understanding.'
        }
    
    def detect_emotion(self, text: str) -> Tuple[str, str]:
        """
        Detect emotion from patient input.
        Returns: (emotion_label, emotion_context)
        """
        text_lower = text.lower()
        detected_emotions = []
        
        # Score each emotion based on keyword matches
        emotion_scores = {}
        for emotion, keywords in self.emotion_keywords.items():
            score = sum(1 for keyword in keywords if keyword in text_lower)
            if score > 0:
                emotion_scores[emotion] = score
        
        # Additional pattern-based detection
        if any(word in text_lower for word in ['pain', 'hurt', 'ache', 'suffering']):
            emotion_scores['sad'] = emotion_scores.get('sad', 0) + 1
        
        if any(word in text_lower for word in ['can\'t', 'unable', 'difficult', 'hard', 'struggle']):
            emotion_scores['afraid'] = emotion_scores.get('afraid', 0) + 1
        
        if any(word in text_lower for word in ['better', 'improved', 'recovery', 'healing']):
            emotion_scores['happy'] = emotion_scores.get('happy', 0) + 1
        
        # Determine primary emotion
        if emotion_scores:
            primary_emotion = max(emotion_scores.keys(), key=lambda k: emotion_scores[k])
        else:
            primary_emotion = 'neutral'
        
        emotion_context = self.emotion_contexts.get(primary_emotion, self.emotion_contexts['neutral'])
        
        logger.info(f"Detected emotion: {primary_emotion} for input: '{text[:50]}...'")
        return primary_emotion, emotion_context

## 1.5 Conversation History Manager

In [10]:
class ConversationManager:
    """Enhanced conversation manager with persistent history and topic generation."""

    def __init__(self, history_dir: str = "./history"):
        self.history: List[Tuple[str, str]] = []
        self.session_id = datetime.now().strftime("%Y%m%d_%H%M%S")
        self.history_dir = history_dir
        self.current_topic = ""
        os.makedirs(self.history_dir, exist_ok=True)

    def generate_topic(self, question: str) -> str:
        """Generate a topic title from the first question of the conversation."""
        medical_keywords = [
            'diabetes', 'hypertension', 'cancer', 'heart', 'blood', 'pressure',
            'symptoms', 'treatment', 'medication', 'diagnosis', 'disease',
            'infection', 'pain', 'fever', 'cough', 'headache', 'stomach'
        ]

        question_lower = question.lower()
        found_keywords = [kw for kw in medical_keywords if kw in question_lower]

        if found_keywords:
            main_keyword = found_keywords[0].title()
            if len(question) > 50:
                return f"{main_keyword} - {question[:47]}..."
            return f"{main_keyword} - {question}"
        else:
            if len(question) > 50:
                return f"Medical Query - {question[:47]}..."
            return f"Medical Query - {question}"

    def add_turn(self, question: str, answer: str):
        """Add a question-answer turn to history."""
        if not self.history and not self.current_topic:
            self.current_topic = self.generate_topic(question)

        self.history.append((question, answer))

    def get_for_chain(self) -> List[Tuple[str, str]]:
        """Get history in format suitable for the RAG chain."""
        return self.history

    def save(self, custom_filename: str = None):
        """Save conversation with metadata including topic and timestamp."""
        if not self.history:
            return None

        filename = custom_filename or f"{self.session_id}.json"
        filepath = os.path.join(self.history_dir, filename)

        # Clean the history before saving - remove emotion indicators and standard mode indicators
        clean_history = []
        for q, a in self.history:
            clean_answer = a

            # Remove emotion indicators
            if "*[Emotion detected:" in clean_answer:
                lines = clean_answer.split('\n')
                clean_lines = []
                for line in lines:
                    # Skip lines that contain emotion indicators or standard mode indicators
                    if (line.strip().startswith('*[Emotion detected:') or
                        line.strip().startswith('*[Standard mode:') or
                        line.strip() == '*[Error:*'):
                        continue
                    clean_lines.append(line)
                clean_answer = '\n'.join(clean_lines).strip()

            # Remove standard mode indicators
            if "*[Standard mode:" in clean_answer:
                lines = clean_answer.split('\n')
                clean_lines = []
                for line in lines:
                    if line.strip().startswith('*[Standard mode:'):
                        continue
                    clean_lines.append(line)
                clean_answer = '\n'.join(clean_lines).strip()

            # Remove error indicators
            if "*[Error:" in clean_answer:
                lines = clean_answer.split('\n')
                clean_lines = []
                for line in lines:
                    if line.strip().startswith('*[Error:'):
                        continue
                    clean_lines.append(line)
                clean_answer = '\n'.join(clean_lines).strip()

            clean_history.append((q, clean_answer))

        conversation_data = {
            "topic": self.current_topic,
            "session_id": self.session_id,
            "created_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            "total_turns": len(clean_history),
            "conversation": clean_history
        }

        with open(filepath, "w", encoding='utf-8') as f:
            json.dump(conversation_data, f, indent=2, ensure_ascii=False)

        logger.info(f"Saved conversation '{self.current_topic}' to {filepath}")
        return filepath


    def get_available_conversations(self) -> List[Dict[str, str]]:
        """Get list of available conversation histories with metadata."""
        conversations = []

        if not os.path.exists(self.history_dir):
            return conversations

        for filename in os.listdir(self.history_dir):
            if filename.endswith('.json'):
                filepath = os.path.join(self.history_dir, filename)
                try:
                    with open(filepath, "r", encoding='utf-8') as f:
                        data = json.load(f)

                    conversations.append({
                        "filename": filename,
                        "topic": data.get("topic", "Unknown Topic"),
                        "created_at": data.get("created_at", "Unknown Date"),
                        "total_turns": data.get("total_turns", 0)
                    })
                except Exception as e:
                    logger.warning(f"Error reading {filename}: {e}")

        conversations.sort(key=lambda x: x["created_at"], reverse=True)
        return conversations

    def clear_current_session(self):
        """Clear current conversation and start a new session."""
        self.history = []
        self.session_id = datetime.now().strftime("%Y%m%d_%H%M%S")
        self.current_topic = ""

## 1.6 RAG Chain: Conversational Retrieval Setup

In [11]:
from langchain.chains import ConversationalRetrievalChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate

class EmpatheticRAGChain:
    """Enhanced RAG chain with empathy-aware prompts."""

    def __init__(self, llm: HuggingFacePipeline, vs: FAISS, emotion_detector: EmotionDetector):
        self.emotion_detector = emotion_detector
        self.empathy_enabled = True  # empathy mode control

        # Enhanced condense question prompt
        self.condense_question_prompt = PromptTemplate(
            input_variables=["question", "chat_history"],
            template="""Given the conversation history and a follow-up question, rephrase the question to be standalone while preserving emotional context.

Chat History:
{chat_history}

Follow-up Question: {question}

Standalone Question:"""
        )

        # without sympathy
        self.standard_qa_prompt = PromptTemplate(
            input_variables=["context", "question"],
            template="""<|system|>
You are a medical AI assistant. Provide accurate medical information based on the given context.

<|user|>
Medical Context: {context}

Patient Question: {question}

Important guidelines:
- Provide direct, concise medical information based on the context
- Be professional and informative
- If the context doesn't contain relevant information, acknowledge this clearly

<|assistant|>"""
        )

        # Empathy-aware QA prompt
        self.empathy_qa_prompt = PromptTemplate(
            input_variables=["context", "question", "emotion_context"],
            template="""<|system|>
You are an empathetic medical AI assistant trained to provide compassionate and understanding responses to patients.

<|user|>
You are responding to a patient who needs medical guidance. Use ONLY the provided medical context to answer their question.

Patient's Emotional State: {emotion_context}

Medical Context: {context}

Patient Question: {question}

Important guidelines:
- Show empathy and understanding for the patient's emotional state
- Provide direct, concise medical information based on the context
- Be supportive and reassuring while maintaining medical accuracy
- End your response naturally without additional questions
- If the context doesn't contain relevant information, acknowledge this empathetically

<|assistant|>"""
        )

        self._init_chains(llm, vs)

    def _init_chains(self, llm: HuggingFacePipeline, vs: FAISS):
        question_generator = LLMChain(
            llm=llm,
            prompt=self.condense_question_prompt
        )

        # standard chain
        standard_combine_docs_chain = load_qa_chain(
            llm=llm,
            chain_type="stuff",
            prompt=self.standard_qa_prompt
        )

        self.standard_chain = ConversationalRetrievalChain(
            retriever=vs.as_retriever(search_kwargs={"k": 5}),
            combine_docs_chain=standard_combine_docs_chain,
            question_generator=question_generator,
            return_source_documents=True
        )

        # Empathy chain
        empathy_combine_docs_chain = load_qa_chain(
            llm=llm,
            chain_type="stuff",
            prompt=self.empathy_qa_prompt
        )

        self.empathy_chain = ConversationalRetrievalChain(
            retriever=vs.as_retriever(search_kwargs={"k": 5}),
            combine_docs_chain=empathy_combine_docs_chain,
            question_generator=question_generator,
            return_source_documents=True
        )

    def set_empathy_mode(self, enabled: bool):
        """Set empathy mode enabled or disabled."""
        self.empathy_enabled = enabled
        logger.info(f"Empathy mode {'enabled' if enabled else 'disabled'}")

    def query(self, question: str, history: List[Tuple[str, str]]):
        """Enhanced query with empathy mode control."""
        if self.empathy_enabled:
            # empathy prompt
            emotion_label, emotion_context = self.emotion_detector.detect_emotion(question)

            result = self.empathy_chain({
                "question": question,
                "chat_history": history,
                "emotion_context": emotion_context
            })

            result["detected_emotion"] = emotion_label
            result["emotion_context"] = emotion_context
            result["empathy_mode"] = True
        else:
            # standard prompt
            result = self.standard_chain({
                "question": question,
                "chat_history": history
            })

            result["detected_emotion"] = "neutral"
            result["emotion_context"] = "Standard medical response mode"
            result["empathy_mode"] = False

        return result

## 1.7 MedicalChatbot Core: Integrate All

In [12]:
class EmpatheticMedicalChatbot:
    """Enhanced medical chatbot with empathetic responses and persistent conversation history."""

    def __init__(self):
        self.model_cfg = ModelConfig()
        self.rag_cfg = RAGConfig()
        self.doc_proc = DocumentProcessor(self.rag_cfg)
        self.vec_mgr = VectorStoreManager(self.rag_cfg, self.model_cfg)
        self.mod_mgr = EmpatheticModelManager(self.model_cfg)
        self.conv_mgr = ConversationManager()
        self.emotion_detector = EmotionDetector()
        self.rag_chain: EmpatheticRAGChain = None
        self.empathy_enabled = True

    def setup_documents(self, pdf_paths: List[str]):
        """Setup documents and initialize RAG chain."""
        chunks = self.doc_proc.load_and_split(pdf_paths)
        vs = self.vec_mgr.create_or_load(chunks)
        llm = self.mod_mgr.get_llm()
        self.rag_chain = EmpatheticRAGChain(llm, vs, self.emotion_detector)
        self.rag_chain.set_empathy_mode(self.empathy_enabled)

    def toggle_empathy_mode(self, enabled: bool):
        self.empathy_enabled = enabled
        if self.rag_chain:
            self.rag_chain.set_empathy_mode(enabled)
        logger.info(f"Empathy mode {'enabled' if enabled else 'disabled'}")

    def get_empathy_status(self) -> bool:
        return self.empathy_enabled

    def get_model_info(self) -> dict:
        """Get detailed model information."""
        return self.mod_mgr.get_model_status()

    def chat(self, question: str) -> Dict[str, Any]:
        """Process a question and return answer with sources."""
        try:
            if not question.strip():
                return {
                    "answer": "I'm here to help. Please feel free to share your medical concerns with me.",
                    "sources": [],
                    "detected_emotion": "neutral",
                    "empathy_mode": self.empathy_enabled
                }

            # Log the current state
            hist = self.conv_mgr.get_for_chain()
            logger.info(f"Processing question with {len(hist)} previous turns in history")

            # Ensure RAG chain is properly initialized
            if not self.rag_chain:
                logger.error("RAG chain not initialized")
                return {
                    "answer": "I'm sorry, but the system is not properly initialized. Please try again later.",
                    "sources": [],
                    "detected_emotion": "neutral",
                    "empathy_mode": self.empathy_enabled
                }

            result = self.rag_chain.query(question, hist)

            if not result or "answer" not in result:
                logger.error("RAG chain returned invalid result")
                return {
                    "answer": "I apologize, but I couldn't process your question properly. Please try again.",
                    "sources": [],
                    "detected_emotion": "neutral",
                    "empathy_mode": self.empathy_enabled
                }

            ans = result["answer"]
            ans = self._clean_answer(ans)

            detected_emotion = result.get("detected_emotion", "neutral")
            empathy_mode = result.get("empathy_mode", self.empathy_enabled)

            # Add to conversation history
            self.conv_mgr.add_turn(question, ans)

            logger.info(f"Processed question in {'empathy' if empathy_mode else 'standard'} mode with emotion '{detected_emotion}': {question[:50]}...")
            return {
                "answer": ans,
                "sources": result.get("source_documents", []),
                "detected_emotion": detected_emotion,
                "emotion_context": result.get("emotion_context", ""),
                "empathy_mode": empathy_mode
            }
        except Exception as e:
            logger.error(f"Error processing question: {e}")
            import traceback
            logger.error(f"Full traceback: {traceback.format_exc()}")
            return {
                "answer": "I apologize, but I encountered an issue while processing your question. Please try again, and I'll do my best to help you.",
                "sources": [],
                "detected_emotion": "neutral",
                "empathy_mode": self.empathy_enabled
            }

    def _clean_answer(self, answer: str) -> str:
        """Enhanced answer cleaning with better post-processing."""
        if not answer or not answer.strip():
            return "I apologize, but I couldn't generate a proper response. Please try asking your question differently."

        original_answer = answer

        # Remove system tokens and unwanted phrases
        system_tokens = ["<|system|>", "<|user|>", "<|assistant|>", "<|endoftext|>"]
        for token in system_tokens:
            answer = answer.replace(token, "")

        stop_phrases = [
            "Stop generating once you have answered the question completely.",
            "Stop generating.",
            "Important instructions:",
            "Context:",
            "Question:",
            "Answer:",
            "Patient's Emotional State:",
            "Medical Context:",
            "Patient Question:",
            "Important guidelines:"
        ]

        for phrase in stop_phrases:
            answer = answer.replace(phrase, "")

        # Handle truncated sentences
        lines = answer.split('\n')
        cleaned_lines = []

        for line in lines:
            line = line.strip()
            if not line:
                continue

            # Skip obvious metadata
            if any(indicator in line for indicator in ['Source:', 'Sources:', 'Reference:', 'References:']):
                break

            if any(indicator in line.lower() for indicator in ['http', 'www.', '.org', '.com']):
                break

            # Skip lines that look like prompts or instructions
            if line.startswith('-') and any(word in line.lower() for word in ['provide', 'be', 'if', 'show']):
                continue

            cleaned_lines.append(line)

        final_answer = '\n'.join(cleaned_lines).strip()

        # If answer is too short or empty, return a fallback
        if not final_answer or len(final_answer.strip()) < 10:
            logger.warning(f"Answer too short after cleaning. Original: {original_answer[:100]}...")
            return "I understand your question, but I'm having difficulty providing a complete response right now. Could you please rephrase your question or provide more details?"

        # Fix truncated sentences
        if final_answer and not final_answer.endswith(('.', '!', '?', ':')):
            sentences = final_answer.split('.')
            if len(sentences) > 1:
                complete_sentences = sentences[:-1]
                final_answer = '.'.join(complete_sentences) + '.'
            else:
                final_answer += '.'

        # Remove duplicate sentences
        sentences = [s.strip() for s in final_answer.split('.') if s.strip()]
        unique_sentences = []
        seen = set()

        for sentence in sentences:
            sentence_key = sentence.lower().replace(' ', '')[:50]
            if sentence_key not in seen:
                seen.add(sentence_key)
                unique_sentences.append(sentence)

        if unique_sentences:
            final_answer = '. '.join(unique_sentences) + '.'

        return final_answer

    # Preserve all original methods
    def save_current_conversation(self) -> Optional[str]:
        """Save current conversation to file."""
        return self.conv_mgr.save()

    def load_conversation(self, filename: str) -> bool:
        """Load a previous conversation from file."""
        filepath = os.path.join(self.conv_mgr.history_dir, filename)

        if not os.path.exists(filepath):
            logger.error(f"Conversation file not found: {filepath}")
            return False

        try:
            with open(filepath, "r", encoding='utf-8') as f:
                data = json.load(f)

            self.conv_mgr.clear_current_session()

            # Load conversation and clean any emotion indicators that might exist
            loaded_history = data.get("conversation", [])
            clean_history = []

            for q, a in loaded_history:
                # Clean answer from any UI formatting
                clean_answer = a

                # Remove emotion indicators
                if "*[Emotion detected:" in clean_answer or "*[Standard mode:" in clean_answer or "*[Error:" in clean_answer:
                    lines = clean_answer.split('\n')
                    clean_lines = []
                    for line in lines:
                        line_stripped = line.strip()
                        if (line_stripped.startswith('*[Emotion detected:') or
                            line_stripped.startswith('*[Standard mode:') or
                            line_stripped.startswith('*[Error:')):
                            continue
                        clean_lines.append(line)
                    clean_answer = '\n'.join(clean_lines).strip()

                clean_history.append((q, clean_answer))

            self.conv_mgr.history = clean_history
            self.conv_mgr.current_topic = data.get("topic", "")
            self.conv_mgr.session_id = data.get("session_id", self.conv_mgr.session_id)

            logger.info(f"Loaded conversation: {self.conv_mgr.current_topic} with {len(self.conv_mgr.history)} turns")

            # Log the cleaned history for debugging
            logger.info("Loaded conversation history (first 100 chars of each answer):")
            for i, (q, a) in enumerate(clean_history[:3]):  # Show first 3 for debugging
                logger.info(f"Turn {i+1}: Q='{q[:50]}...', A='{a[:100]}...'")

            return True
        except Exception as e:
            logger.error(f"Error loading conversation: {e}")
            import traceback
            logger.error(f"Full traceback: {traceback.format_exc()}")
            return False

    def get_conversation_list(self) -> List[Dict[str, str]]:
        """Get list of available conversations."""
        return self.conv_mgr.get_available_conversations()

    def start_new_conversation(self):
        """Start a new conversation session."""
        if self.conv_mgr.history:
            self.save_current_conversation()
        self.conv_mgr.clear_current_session()

    def get_current_conversation_info(self) -> Dict[str, Any]:
        """Get information about current conversation."""
        return {
            "topic": self.conv_mgr.current_topic,
            "session_id": self.conv_mgr.session_id,
            "total_turns": len(self.conv_mgr.history),
            "history": self.conv_mgr.history
        }

    def get_emotion_detection_status(self) -> bool:
        """Check if emotion detection is enabled."""
        return self.model_cfg.empathy_detection_enabled

    def toggle_emotion_detection(self, enabled: bool):
        """Toggle emotion detection on/off."""
        self.model_cfg.empathy_detection_enabled = enabled
        logger.info(f"Emotion detection {'enabled' if enabled else 'disabled'}")

## 1.8 Example


In [13]:
import gradio as gr
from typing import List, Tuple
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def initialize_chatbot():
    """Initialize the empathetic medical chatbot."""
    print("Initializing Empathetic Medical Chatbot...")
    bot = EmpatheticMedicalChatbot()
    print("Setting up documents...")
    # Update this path to your medical documents
    bot.setup_documents(["./Dataset/Medical_Book.pdf"])
    return bot

# Initialize the bot
bot = initialize_chatbot()

Initializing Empathetic Medical Chatbot...


  self.embedding = HuggingFaceEmbeddings(

INFO:datasets:PyTorch version 2.7.0+cu118 available.
INFO:datasets:TensorFlow version 2.19.0 available.
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2


Setting up documents...


INFO:__main__:Split into 3006 chunks
INFO:faiss.loader:Loading faiss with AVX512 support.
INFO:faiss.loader:Could not load library with AVX512 support due to:
ModuleNotFoundError("No module named 'faiss.swigfaiss_avx512'")
INFO:faiss.loader:Loading faiss with AVX2 support.
INFO:faiss.loader:Successfully loaded faiss with AVX2 support.
INFO:faiss:Failed to load GPU Faiss: name 'GpuIndexIVFFlat' is not defined. Will not load constructor refs for GPU indexes. This is only an error if you're trying to use GPU Faiss.
INFO:__main__:Loaded existing FAISS store
INFO:__main__:Using base model
INFO:__main__:Loading model from: ritvik77/Medical_Doctor_AI_LoRA-Mistral-7B-Instruct_FullModel


ritvik77/Medical_Doctor_AI_LoRA-Mistral-7B-Instruct_FullModel


INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).
Device set to use cuda:0
  self.llm = HuggingFacePipeline(pipeline=pipe)
INFO:__main__:Empathetic LLM pipeline is ready
  question_generator = LLMChain(
stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/docs/how_to/#qa-with-rag
  standard_combine_docs_chain = load_qa_chain(
  self.standard_chain = ConversationalRetrievalChain(
INFO:__main__:Empathy mode enabled


In [14]:
# Get model info for display
model_info = bot.get_model_info()
USE_GUI = True  # Set to False for command-line mode

if not USE_GUI:
    print("Running in command-line mode...\n")
    print(f"Model Info: {model_info}")

    EMPATHY_MODE = False  # Change this to control empathy mode
    bot.toggle_empathy_mode(EMPATHY_MODE)
    empathy_status = bot.get_empathy_status()
    print(f"Empathy Mode: {'Enabled' if empathy_status else 'Disabled'}")

    questions = [
        "I'm really scared about my diabetes diagnosis",
        "What are the symptoms of diabetes?",
        "How can I prevent complications? I'm worried about my future.",
        "I feel happy that my blood sugar is improving!",
    ]

    for q in questions:
        print(f"Q: {q}")
        res = bot.chat(q)
        print(f"A: {res['answer']}")
        print(f"Detected Emotion: {res['detected_emotion']}")
        print(f"Emotion Context: {res['emotion_context']}")
        print("-" * 80)

else:
    # ------------------------------ Core helpers --------------------------- #
    def respond(user_input: str, chat_history: List[Tuple[str, str]]):
        """Respond to a user message and append emotion info if empathy mode is on."""
        if not user_input.strip():
            # Nothing typed → just refresh sidebar
            return (
                chat_history,
                "",
                update_conversation_list(),
                get_empathy_status(),
            )

        try:
            # Debug: Log current conversation state
            current_conv_info = bot.get_current_conversation_info()
            loaded_history_length = len(current_conv_info["history"])
            ui_history_length = len(chat_history)

            logger.info(f"Respond called - Loaded history: {loaded_history_length}, UI history: {ui_history_length}")
            logger.info(f"User input: '{user_input[:50]}...'")

            # Debug: Log the current history that will be passed to RAG chain
            if current_conv_info["history"]:
                logger.info("Current conversation history (last 2 turns):")
                for i, (q, a) in enumerate(current_conv_info["history"][-2:]):
                    logger.info(f"History Turn {i}: Q='{q[:30]}...', A='{a[:50]}...'")

            # If loaded history is longer than UI history, sync them
            if loaded_history_length > ui_history_length:
                logger.info("Syncing loaded conversation with UI history")
                # The conversation was just loaded, continue normally

            result = bot.chat(user_input)
            answer = result["answer"]
            detected_emotion = result.get("detected_emotion", "neutral")
            empathy_mode = result.get("empathy_mode", True)

            # Attach emotion indicator when empathy mode is enabled
            if empathy_mode:
                emotion_emoji = {
                    "afraid": "😰",
                    "terrified": "😱",
                    "angry": "😠",
                    "sad": "😢",
                    "happy": "😊",
                    "joyful": "😄",
                    "proud": "😌",
                    "sentimental": "🥺",
                    "jealous": "😒",
                    "faithful": "🤗",
                    "neutral": "😐",
                }
                emoji = emotion_emoji.get(detected_emotion, "😐")
                emotion_text = f"[Emotion detected: {detected_emotion} {emoji}]"
                formatted_answer = f"{answer}\n\n*{emotion_text}*"
            else:
                formatted_answer = f"{answer}\n\n*[Standard mode: No emotion detection]*"

            chat_history.append((user_input, formatted_answer))
            bot.save_current_conversation()

            logger.info(f"Successfully processed user input: {user_input[:50]}...")

        except Exception as e:
            logger.error(f"Error in respond function: {e}")
            import traceback
            logger.error(f"Full traceback: {traceback.format_exc()}")

            # Add error information to chat history
            error_msg = "I apologize, but I encountered a technical issue. Please try rephrasing your question."
            chat_history.append((user_input, f"{error_msg}\n\n*[Error: {str(e)}]*"))

        return (
            chat_history,
            "",
            update_conversation_list(),
            get_empathy_status(),
        )

    def reset_chat():
        """Reset chat history and start a new conversation."""
        bot.start_new_conversation()
        return [], update_conversation_list(), get_empathy_status()

    # -------- Sidebar status helpers -------- #
    def get_empathy_status():
        """Get current empathy mode status for display."""
        status = bot.get_empathy_status()
        return f"🧠 Empathy Mode: {'✅ Enabled' if status else '❌ Disabled'}"

    def toggle_empathy_mode(enabled: bool):
        """Toggle empathy mode and return updated status."""
        bot.toggle_empathy_mode(enabled)
        return get_empathy_status()

    # -------- Conversation list helpers ------ #
    def update_conversation_list():
        """Return rows for Dataframe display."""
        try:
            rows = []
            for c in bot.get_conversation_list():
                rows.append(
                    [
                        f"📋 {c['topic']}\n"
                        f"📅 {c['created_at']} | 💬 {c['total_turns']} turns"
                    ]
                )
            return rows
        except Exception as e:
            logger.error(f"Error updating conversation list: {e}")
            return []

    def get_current_topic():
        """Get current conversation topic for display."""
        info = bot.get_current_conversation_info()
        if info["topic"]:
            return (
                f"**Current Topic:** {info['topic']} | **Turns:** {info['total_turns']}"
            )
        return "**Current Topic:** New Conversation"

    # ------------ Load a previous conversation ------------ #
    def load_conversation(evt: gr.SelectData):
        """Load a previous conversation when user clicks on conversation list."""
        if evt.index is None:
            return [], get_current_topic()
        try:
            # evt.index is a list [row_index, col_index], we only need row_index
            row_index = evt.index[0] if isinstance(evt.index, list) else evt.index

            conversations = bot.get_conversation_list()
            if row_index < len(conversations):
                filename = conversations[row_index]["filename"]

                # Save current conversation if exists
                if bot.conv_mgr.history:
                    bot.save_current_conversation()

                # Load selected conversation
                if bot.load_conversation(filename):
                    conv_info = bot.get_current_conversation_info()

                    # Format history properly - clean any remaining emotion indicators
                    formatted_history = []
                    for q, a in conv_info["history"]:
                        # Double-check: clean the answer by removing any emotion indicators
                        clean_answer = a

                        # Remove emotion indicators that might still exist
                        if "*[Emotion detected:" in clean_answer or "*[Standard mode:" in clean_answer or "*[Error:" in clean_answer:
                            lines = clean_answer.split('\n')
                            clean_lines = []
                            for line in lines:
                                line_stripped = line.strip()
                                if (line_stripped.startswith('*[Emotion detected:') or
                                    line_stripped.startswith('*[Standard mode:') or
                                    line_stripped.startswith('*[Error:')):
                                    continue
                                clean_lines.append(line)
                            clean_answer = '\n'.join(clean_lines).strip()

                        formatted_history.append((q, clean_answer))

                    logger.info(f"Successfully loaded conversation: {conv_info['topic']} with {len(formatted_history)} turns")

                    # Add success notification
                    gr.Info(f"Loaded conversation: {conv_info['topic']}")

                    return formatted_history, get_current_topic()
                else:
                    gr.Warning("Failed to load the selected conversation")
            else:
                gr.Warning("Invalid conversation selection")
        except Exception as e:
            logger.error(f"Error loading conversation: {e}")
            import traceback
            logger.error(f"Full traceback: {traceback.format_exc()}")
            gr.Warning(f"Error loading conversation: {str(e)}")

        return [], get_current_topic()

    # -------------------------------------------------------------------- #
    # ----------------------------  Gradio GUI  --------------------------- #
    # -------------------------------------------------------------------- #
    with gr.Blocks(
        title="Empathetic Medical Chatbot",
        theme=gr.themes.Soft(),
        css="""
        .emotion-indicator {background:linear-gradient(45deg,#ff9a9e,#fecfef);border-radius:10px;padding:5px;margin:5px 0;}
        .chatbot {border-radius:15px;}
        .dataframe .add-row, .dataframe .add-col {display: none !important;}
        .dataframe button[aria-label="Add row"], .dataframe button[aria-label="Add column"] {display: none !important;}
        """,
    ) as demo:

        # App header - show which model is currently loaded
        is_finetuned = model_info.get("is_empathetic", False)
        model_status_text = "🤖 Fine-tuned Empathetic Model" if is_finetuned else "🤖 Base Model with Empathy Features"
        gr.Markdown(
            f"""
            # 🏥 Empathetic Medical Chatbot
            ### An AI assistant that understands your emotions and provides compassionate medical guidance
            **{model_status_text}**
            """
        )

        with gr.Row():
            # ---------------- Main chat column ---------------- #
            with gr.Column(scale=3):
                current_topic = gr.Markdown(get_current_topic())

                chat = gr.Chatbot(
                    label="💬 Empathetic Medical Consultation",
                    height=500,
                    show_copy_button=True,
                    show_share_button=False,
                    bubble_full_width=False,
                )

                with gr.Row():
                    user_msg = gr.Textbox(
                        placeholder=(
                            "Share your medical concerns... "
                            "I'm here to help with understanding and compassion."
                        ),
                        label="Your Question",
                        scale=4,
                        lines=2,
                        max_lines=5,
                    )
                    send_btn = gr.Button("Send 📤", variant="primary", scale=1, size="lg")

            # ---------------- Sidebar column ------------------ #
            with gr.Column(scale=1):
                # Empathy mode controls - always visible regardless of model type
                gr.Markdown("### 🧠 Empathy Mode")
                empathy_status = gr.Markdown(get_empathy_status())
                empathy_toggle = gr.Checkbox(
                    label="Enable Empathy Mode",
                    value=bot.get_empathy_status(),
                    info="Enables emotional understanding and empathetic responses",
                )

                # --- Conversation history list ---
                gr.Markdown("### 📚 Conversation History")
                with gr.Row():
                    refresh_btn = gr.Button(
                        "🔄 Refresh", variant="secondary", size="sm", scale=1
                    )
                    clear_btn = gr.Button(
                        "🗑️ New Chat", variant="stop", size="sm", scale=1
                    )

                conversation_list = gr.Dataframe(
                    headers=["Previous Conversations"],
                    datatype=["str"],
                    label="📋 Click to continue a conversation:",
                    interactive=False,  # Set to False to hide editing features
                    wrap=True,
                    height=300,
                    column_widths=[400],
                )

        # --------------- Callbacks & wiring ---------------- #
        def respond_and_update_all(user_input, chat_history):
            """Handle user input and update all UI components."""
            new_hist, empty_input, conv_list, emp_status = respond(
                user_input, chat_history
            )
            return (
                new_hist,
                empty_input,
                get_current_topic(),
                conv_list,
                emp_status,
            )

        def reset_and_update_all():
            """Reset chat and update all UI components."""
            empty_chat, conv_list, emp_status = reset_chat()
            return (
                empty_chat,
                get_current_topic(),
                conv_list,
                "",  # Clear textbox
                emp_status,
            )

        def load_and_update_topic(evt: gr.SelectData):
            """Load conversation and update topic display."""
            hist, topic = load_conversation(evt)
            return hist, topic

        # Send button / Enter key
        outputs_list = [chat, user_msg, current_topic, conversation_list, empathy_status]

        send_btn.click(
            respond_and_update_all,
            inputs=[user_msg, chat],
            outputs=outputs_list,
        )
        user_msg.submit(
            respond_and_update_all,
            inputs=[user_msg, chat],
            outputs=outputs_list,
        )

        # Conversation list click
        conversation_list.select(
            load_and_update_topic,
            outputs=[chat, current_topic],
        )

        # New chat button
        reset_outputs = [chat, current_topic, conversation_list, user_msg, empathy_status]

        clear_btn.click(
            reset_and_update_all,
            outputs=reset_outputs,
        )

        # Refresh list button
        refresh_btn.click(
            update_conversation_list,
            outputs=[conversation_list],
        )

        # Empathy toggle - always active
        empathy_toggle.change(
            toggle_empathy_mode,
            inputs=[empathy_toggle],
            outputs=[empathy_status],
        )

        # Initial load
        def initialize_interface():
            """Initialize the interface with current state."""
            return [
                update_conversation_list(),
                get_current_topic(),
                get_empathy_status(),
            ]

        demo.load(
            initialize_interface,
            outputs=[conversation_list, current_topic, empathy_status],
        )

    # -------------------- Launch Gradio ------------------ #
    demo.launch(
        share=True,
        server_name="0.0.0.0",
        server_port=7861,
        show_error=True,
    )

INFO:httpx:HTTP Request: GET https://checkip.amazonaws.com/ "HTTP/1.1 200 "


Running on local URL:  http://0.0.0.0:7861


INFO:httpx:HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET http://localhost:7861/startup-events "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: HEAD http://localhost:7861/ "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: GET https://api.gradio.app/v2/tunnel-request "HTTP/1.1 200 OK"


Running on public URL: https://b7ff13622959990c96.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


INFO:httpx:HTTP Request: HEAD https://b7ff13622959990c96.gradio.live "HTTP/1.1 200 OK"


# 2. Evaluation

Run automatic metrics (BLEU, BERTScore, cosine similarity) on RAG-only, LoRA-tuned and combined outputs to quantify improvements.

In [12]:
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd
from typing import List, Dict, Tuple
import json

# Download necessary NLTK data
try:
    nltk.download('punkt', quiet=True)
except:
    pass

class ModelEvaluator:
    """Evaluates different model configurations using automatic metrics."""

    def __init__(self, chatbot: EmpatheticMedicalChatbot):
        self.chatbot = chatbot
        self.sentence_transformer = SentenceTransformer('all-mpnet-base-v2')
        self.smoothing_function = SmoothingFunction().method1

        # Test dataset - contains questions and expected answer types
        self.test_questions = [
            {
                "question": "I'm really scared about my diabetes diagnosis. What should I know?",
                "emotion": "afraid",
                "reference": "Diabetes is a manageable condition with proper care and monitoring. Understanding your condition and working with healthcare professionals can help you maintain good health."
            },
            {
                "question": "What are the main symptoms of diabetes?",
                "emotion": "neutral",
                "reference": "The main symptoms of diabetes include frequent urination, excessive thirst, unexplained weight loss, fatigue, and blurred vision."
            },
            {
                "question": "I feel so overwhelmed with managing my blood sugar levels.",
                "emotion": "sad",
                "reference": "Managing blood sugar can feel challenging, but with practice and support, it becomes easier. Regular monitoring and following your treatment plan are key."
            },
            {
                "question": "How can I prevent diabetes complications?",
                "emotion": "neutral",
                "reference": "Preventing diabetes complications involves maintaining good blood sugar control, regular exercise, healthy diet, and regular medical check-ups."
            },
            {
                "question": "I'm happy that my recent blood test results improved!",
                "emotion": "happy",
                "reference": "Improved blood test results indicate good diabetes management and commitment to your health plan."
            }
        ]

    def calculate_bleu_score(self, reference: str, candidate: str) -> float:
        """Calculate BLEU score between reference and candidate text."""
        try:
            reference_tokens = reference.lower().split()
            candidate_tokens = candidate.lower().split()

            if not candidate_tokens:
                return 0.0

            # Use sentence-level BLEU with smoothing
            score = sentence_bleu(
                [reference_tokens],
                candidate_tokens,
                smoothing_function=self.smoothing_function
            )
            return score
        except Exception as e:
            logger.error(f"Error calculating BLEU score: {e}")
            return 0.0

    def calculate_bertscore(self, reference: str, candidate: str) -> float:
        """Calculate semantic similarity using sentence transformers (proxy for BERTScore)."""
        try:
            embeddings = self.sentence_transformer.encode([reference, candidate])
            similarity = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
            return float(similarity)
        except Exception as e:
            logger.error(f"Error calculating BERTScore: {e}")
            return 0.0

    def calculate_cosine_similarity(self, reference: str, candidate: str) -> float:
        """Calculate cosine similarity between reference and candidate embeddings."""
        return self.calculate_bertscore(reference, candidate)  # Same implementation

    def evaluate_model_configuration(self, config_name: str, use_empathy: bool) -> Dict[str, Any]:
        """Evaluate a specific model configuration."""
        print(f"\n{'='*60}")
        print(f"Evaluating: {config_name}")
        print(f"{'='*60}")

        # Set empathy mode
        original_empathy_status = self.chatbot.get_empathy_status()
        self.chatbot.toggle_empathy_mode(use_empathy)

        results = []
        total_bleu = 0
        total_bertscore = 0
        total_cosine = 0

        for i, test_case in enumerate(self.test_questions):
            print(f"\nTest {i+1}: {test_case['question'][:50]}...")

            try:
                # Get model response
                response = self.chatbot.chat(test_case['question'])
                answer = response['answer']
                detected_emotion = response.get('detected_emotion', 'neutral')

                # Compute metrics
                bleu = self.calculate_bleu_score(test_case['reference'], answer)
                bertscore = self.calculate_bertscore(test_case['reference'], answer)
                cosine_sim = self.calculate_cosine_similarity(test_case['reference'], answer)

                # Emotion detection accuracy (only in empathy mode)
                emotion_correct = (detected_emotion == test_case['emotion']) if use_empathy else None

                result = {
                    'question': test_case['question'],
                    'expected_emotion': test_case['emotion'],
                    'detected_emotion': detected_emotion,
                    'emotion_correct': emotion_correct,
                    'answer': answer,
                    'reference': test_case['reference'],
                    'bleu_score': bleu,
                    'bertscore': bertscore,
                    'cosine_similarity': cosine_sim
                }

                results.append(result)
                total_bleu += bleu
                total_bertscore += bertscore
                total_cosine += cosine_sim

                print(f"  BLEU: {bleu:.4f}")
                print(f"  BERTScore: {bertscore:.4f}")
                print(f"  Cosine Sim: {cosine_sim:.4f}")
                if use_empathy:
                    print(f"  Emotion: {detected_emotion} (Expected: {test_case['emotion']}) {'✓' if emotion_correct else '✗'}")

            except Exception as e:
                logger.error(f"Error evaluating test case {i+1}: {e}")
                continue

        # Compute average scores
        num_tests = len(results)
        avg_metrics = {
            'avg_bleu': total_bleu / num_tests if num_tests > 0 else 0,
            'avg_bertscore': total_bertscore / num_tests if num_tests > 0 else 0,
            'avg_cosine_similarity': total_cosine / num_tests if num_tests > 0 else 0
        }

        if use_empathy:
            emotion_accuracy = sum(1 for r in results if r['emotion_correct']) / num_tests if num_tests > 0 else 0
            avg_metrics['emotion_accuracy'] = emotion_accuracy

        # Restore original empathy status
        self.chatbot.toggle_empathy_mode(original_empathy_status)

        return {
            'config_name': config_name,
            'use_empathy': use_empathy,
            'avg_metrics': avg_metrics,
            'detailed_results': results
        }

    def run_full_evaluation(self) -> Dict[str, Any]:
        """Run evaluation on all model configurations."""
        print("\n" + "="*80)
        print("STARTING MODEL EVALUATION")
        print("="*80)


        evaluation_results = {}

        print("Evaluating base model configurations instead...")

        # Evaluate base model in standard mode
        evaluation_results['base_standard'] = self.evaluate_model_configuration(
            "Base Model (Standard Mode)", False
        )

        # Evaluate base model in empathy mode
        evaluation_results['base_empathy'] = self.evaluate_model_configuration(
            "Base Model (Empathy Mode)", True
        )

        return evaluation_results

    def print_summary(self, evaluation_results: Dict[str, Any]):
        """Print evaluation summary."""
        print("\n" + "="*80)
        print("EVALUATION SUMMARY")
        print("="*80)

        summary_data = []

        for config_name, results in evaluation_results.items():
            metrics = results['avg_metrics']
            row = {
                'Configuration': results['config_name'],
                'BLEU': f"{metrics['avg_bleu']:.4f}",
                'BERTScore': f"{metrics['avg_bertscore']:.4f}",
                'Cosine Sim': f"{metrics['avg_cosine_similarity']:.4f}"
            }

            if 'emotion_accuracy' in metrics:
                row['Emotion Accuracy'] = f"{metrics['emotion_accuracy']:.4f}"
            else:
                row['Emotion Accuracy'] = "N/A"

            summary_data.append(row)

        # Create and print table
        df = pd.DataFrame(summary_data)
        print(df.to_string(index=False, justify='center'))

        # Identify best-performing configurations
        print(f"\n{'='*50}")
        print("BEST PERFORMING CONFIGURATIONS")
        print(f"{'='*50}")

        best_bleu = max(evaluation_results.items(), key=lambda x: x[1]['avg_metrics']['avg_bleu'])
        best_bertscore = max(evaluation_results.items(), key=lambda x: x[1]['avg_metrics']['avg_bertscore'])
        best_cosine = max(evaluation_results.items(), key=lambda x: x[1]['avg_metrics']['avg_cosine_similarity'])

        print(f"🏆 Best BLEU Score: {best_bleu[1]['config_name']} ({best_bleu[1]['avg_metrics']['avg_bleu']:.4f})")
        print(f"🏆 Best BERTScore: {best_bertscore[1]['config_name']} ({best_bertscore[1]['avg_metrics']['avg_bertscore']:.4f})")
        print(f"🏆 Best Cosine Similarity: {best_cosine[1]['config_name']} ({best_cosine[1]['avg_metrics']['avg_cosine_similarity']:.4f})")

        # Emotion accuracy
        empathy_configs = {k: v for k, v in evaluation_results.items() if 'emotion_accuracy' in v['avg_metrics']}
        if empathy_configs:
            best_emotion = max(empathy_configs.items(), key=lambda x: x[1]['avg_metrics']['emotion_accuracy'])
            print(f"🏆 Best Emotion Accuracy: {best_emotion[1]['config_name']} ({best_emotion[1]['avg_metrics']['emotion_accuracy']:.4f})")


In [13]:
# The GUI part has completed, now running evaluation
print("\n" + "="*100)
print("STARTING AUTOMATIC EVALUATION")
print("="*100)

try:
    # Create the evaluator
    # bot = initialize_chatbot()
    evaluator = ModelEvaluator(bot)

    # Run the full evaluation
    evaluation_results = evaluator.run_full_evaluation()

    # Print summary
    evaluator.print_summary(evaluation_results)

    # Save detailed results to file
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    results_file = f"evaluation_results_{timestamp}.json"

    with open(results_file, 'w', encoding='utf-8') as f:
        json.dump(evaluation_results, f, indent=2, ensure_ascii=False, default=str)

    print(f"\n💾 Detailed results saved to: {results_file}")

except Exception as e:
    logger.error(f"Evaluation failed: {e}")
    print(f"\n❌ Evaluation failed with error: {e}")

print("\n" + "="*100)
print("EVALUATION COMPLETED")
print("="*100)


INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda:0
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-mpnet-base-v2



STARTING AUTOMATIC EVALUATION


INFO:__main__:Empathy mode disabled
INFO:__main__:Empathy mode disabled
INFO:__main__:Processing question with 0 previous turns in history



STARTING MODEL EVALUATION
Evaluating base model configurations instead...

Evaluating: Base Model (Standard Mode)

Test 1: I'm really scared about my diabetes diagnosis. Wha...


  result = self.standard_chain({
INFO:__main__:Processed question in standard mode with emotion 'neutral': I'm really scared about my diabetes diagnosis. Wha...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:__main__:Processing question with 1 previous turns in history


  BLEU: 0.0022
  BERTScore: 0.4242
  Cosine Sim: 0.4242

Test 2: What are the main symptoms of diabetes?...


INFO:__main__:Processed question in standard mode with emotion 'neutral': What are the main symptoms of diabetes?...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:__main__:Processing question with 2 previous turns in history


  BLEU: 0.0033
  BERTScore: 0.2679
  Cosine Sim: 0.2679

Test 3: I feel so overwhelmed with managing my blood sugar...


INFO:__main__:Processed question in standard mode with emotion 'neutral': I feel so overwhelmed with managing my blood sugar...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:__main__:Processing question with 3 previous turns in history


  BLEU: 0.0000
  BERTScore: 0.5179
  Cosine Sim: 0.5179

Test 4: How can I prevent diabetes complications?...


INFO:__main__:Processed question in standard mode with emotion 'neutral': How can I prevent diabetes complications?...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:__main__:Processing question with 4 previous turns in history


  BLEU: 0.0028
  BERTScore: 0.4297
  Cosine Sim: 0.4297

Test 5: I'm happy that my recent blood test results improv...


INFO:__main__:Processed question in standard mode with emotion 'neutral': I'm happy that my recent blood test results improv...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:__main__:Empathy mode enabled
INFO:__main__:Empathy mode enabled
INFO:__main__:Empathy mode enabled
INFO:__main__:Empathy mode enabled
INFO:__main__:Processing question with 5 previous turns in history
INFO:__main__:Detected emotion: afraid for input: 'I'm really scared about my diabetes diagnosis. Wha...'


  BLEU: 0.0037
  BERTScore: 0.2960
  Cosine Sim: 0.2960

Evaluating: Base Model (Empathy Mode)

Test 1: I'm really scared about my diabetes diagnosis. Wha...


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
INFO:__main__:Processed question in empathy mode with emotion 'afraid': I'm really scared about my diabetes diagnosis. Wha...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:__main__:Processing question with 6 previous turns in history
INFO:__main__:Detected emotion: neutral for input: 'What are the main symptoms of diabetes?...'


  BLEU: 0.0022
  BERTScore: 0.4259
  Cosine Sim: 0.4259
  Emotion: afraid (Expected: afraid) ✓

Test 2: What are the main symptoms of diabetes?...


INFO:__main__:Processed question in empathy mode with emotion 'neutral': What are the main symptoms of diabetes?...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:__main__:Processing question with 7 previous turns in history
INFO:__main__:Detected emotion: neutral for input: 'I feel so overwhelmed with managing my blood sugar...'


  BLEU: 0.0027
  BERTScore: 0.2849
  Cosine Sim: 0.2849
  Emotion: neutral (Expected: neutral) ✓

Test 3: I feel so overwhelmed with managing my blood sugar...


INFO:__main__:Processed question in empathy mode with emotion 'neutral': I feel so overwhelmed with managing my blood sugar...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:__main__:Processing question with 8 previous turns in history
INFO:__main__:Detected emotion: neutral for input: 'How can I prevent diabetes complications?...'


  BLEU: 0.0032
  BERTScore: 0.3952
  Cosine Sim: 0.3952
  Emotion: neutral (Expected: sad) ✗

Test 4: How can I prevent diabetes complications?...


INFO:__main__:Processed question in empathy mode with emotion 'neutral': How can I prevent diabetes complications?...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:__main__:Processing question with 9 previous turns in history
INFO:__main__:Detected emotion: happy for input: 'I'm happy that my recent blood test results improv...'


  BLEU: 0.0032
  BERTScore: 0.5486
  Cosine Sim: 0.5486
  Emotion: neutral (Expected: neutral) ✓

Test 5: I'm happy that my recent blood test results improv...


INFO:__main__:Processed question in empathy mode with emotion 'happy': I'm happy that my recent blood test results improv...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:__main__:Empathy mode enabled
INFO:__main__:Empathy mode enabled


  BLEU: 0.0080
  BERTScore: 0.2756
  Cosine Sim: 0.2756
  Emotion: happy (Expected: happy) ✓

EVALUATION SUMMARY
      Configuration         BLEU  BERTScore Cosine Sim Emotion Accuracy
Base Model (Standard Mode) 0.0024   0.3871    0.3871           N/A     
 Base Model (Empathy Mode) 0.0039   0.3861    0.3861        0.8000     

BEST PERFORMING CONFIGURATIONS
🏆 Best BLEU Score: Base Model (Empathy Mode) (0.0039)
🏆 Best BERTScore: Base Model (Standard Mode) (0.3871)
🏆 Best Cosine Similarity: Base Model (Standard Mode) (0.3871)
🏆 Best Emotion Accuracy: Base Model (Empathy Mode) (0.8000)

💾 Detailed results saved to: evaluation_results_20250621_090257.json

EVALUATION COMPLETED
