In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Cell 1: Install Dependencies
# Run this cell first to install all required packages
!pip install numpy indic-nlp-library indic-transliteration langchain langchain_community faiss-cpu tqdm pandas
!pip install sentence-transformers langchain_community tiktoken gradio
!pip install torch torchvision torchaudio transformers
!pip install accelerate bitsandbytes
!pip install datasets
!pip install langgraph langchain-core
# Optional speech components - can be enabled later
!pip install openai-whisper gTTS

# Cell 2: Import Libraries and Define Constants
# Run this cell to import all necessary dependencies

import numpy as np
import pandas as pd
import os
import json
import re
import gc
import torch
from typing import TypedDict, List, Dict, Optional, Any, Union, Callable
from pathlib import Path
import gradio as gr
import logging
import operator

# Import LangChain and LangGraph components
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, BaseMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.language_models import BaseChatModel
from langchain_core.runnables import RunnablePassthrough
from langchain_core.outputs import ChatResult, ChatGeneration
from langgraph.graph import StateGraph, END

# Import components from your existing implementation
from transformers import (
    AutoTokenizer, 
    AutoModelForCausalLM, 
    BitsAndBytesConfig, 
    pipeline,
    AutoModelForSequenceClassification
)
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from datasets import Dataset

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Constants (updated for recommended models)
HINDI_MODEL_NAME = "Ryder99/Llama-3.2-3B-Instruct-Hindi"  # Role-Playing Agent
GENERATOR_MODEL_NAME = "Triangle104/Unsloth_Llama-3.2-3B-Instruct-Q5_K_M-GGUF"  # Scene & Character Generator
EVALUATOR_MODEL_NAME = "EpistemeAI/ReasoningCore-3B-T1_1"  # Performance Critique Agent

# Model loading configurations
USE_8BIT = False 
USE_4BIT = True  # Use 4-bit for efficient loading
MAX_NEW_TOKENS = 150
TEMPERATURE = 0.2
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Updated embedding model for better compatibility with Llama tokenization
EMBEDDING_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"  # Better multilingual support

RAG_DATA_PATH = "/kaggle/input/rag-preprocessed-data/processed_hindi_dialogues.json"
FAISS_INDEX_PATH = "/kaggle/working/hindi_dialogue_faiss_index"
ENABLE_SPEECH = False  # Keep speech disabled to save memory

def setup_memory_management():
    """Configure global settings for better memory management"""
    
    # Configure PyTorch to release memory faster
    torch.cuda.empty_cache()
    
    # Set up cache directory
    cache_dir = "/kaggle/working/model_cache"
    os.makedirs(cache_dir, exist_ok=True)
    os.environ['TRANSFORMERS_CACHE'] = cache_dir
    
    # Configure environment variables for better memory handling
    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'
    
    print(f"Memory management configured. Using device: {DEVICE}")
    
# Call this function at the end of Cell 2
setup_memory_management()

print(f"Using device: {DEVICE}")
print("Libraries imported and constants defined!")

# Cell 3: Define Basic Utility Functions
# These are helper functions used throughout the system

def load_hindi_model():
    """Load the Hindi role-playing model"""
    print(f"Loading Hindi model {HINDI_MODEL_NAME}...")
    
    # Configuration for 4-bit quantization
    if USE_4BIT:
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4"
        )
        
        model = AutoModelForCausalLM.from_pretrained(
            HINDI_MODEL_NAME,
            quantization_config=quantization_config,
            device_map="auto",
            torch_dtype=torch.float16
        )
    else:
        # Standard FP16 loading
        model = AutoModelForCausalLM.from_pretrained(
            HINDI_MODEL_NAME,
            device_map="auto",
            torch_dtype=torch.float16
        )
    
    tokenizer = AutoTokenizer.from_pretrained(HINDI_MODEL_NAME)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
        
    # Force garbage collection
    clear_gpu_memory()
    
    return model, tokenizer

def load_generator_model():
    """Load the scene and character generator model"""
    print(f"Loading generator model {GENERATOR_MODEL_NAME}...")
    
    try:
        # For standard models, not GGUF
        if USE_4BIT:
            quantization_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_compute_dtype=torch.float16,
                bnb_4bit_use_double_quant=True,
                bnb_4bit_quant_type="nf4"
            )
            
            model = AutoModelForCausalLM.from_pretrained(
                "meta-llama/Llama-3.2-3B-Instruct",  # Using Llama-3.2 base as fallback
                quantization_config=quantization_config,
                device_map="auto",
                torch_dtype=torch.float16
            )
        else:
            model = AutoModelForCausalLM.from_pretrained(
                "meta-llama/Llama-3.2-3B-Instruct",
                device_map="auto",
                torch_dtype=torch.float16
            )
        
        tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
    
    except Exception as e:
        print(f"Error loading generator model: {e}")
        print("Falling back to smaller model...")
        
        # Fallback to a smaller model if the primary one fails
        if USE_4BIT:
            quantization_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_compute_dtype=torch.float16,
                bnb_4bit_use_double_quant=True,
                bnb_4bit_quant_type="nf4"
            )
            
            model = AutoModelForCausalLM.from_pretrained(
                "TinyLlama/TinyLlama-1.1B-Chat-v1.0",  # Much smaller fallback
                quantization_config=quantization_config,
                device_map="auto",
                torch_dtype=torch.float16
            )
        else:
            model = AutoModelForCausalLM.from_pretrained(
                "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
                device_map="auto",
                torch_dtype=torch.float16
            )
        
        tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
    
    # Force garbage collection
    clear_gpu_memory()
    
    return model, tokenizer

def load_evaluator_model():
    """Load the performance critique model"""
    print(f"Loading evaluator model {EVALUATOR_MODEL_NAME}...")
    
    if USE_4BIT:
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4"
        )
        
        model = AutoModelForCausalLM.from_pretrained(
            EVALUATOR_MODEL_NAME,
            quantization_config=quantization_config,
            device_map="auto",
            torch_dtype=torch.float16
        )
    else:
        model = AutoModelForCausalLM.from_pretrained(
            EVALUATOR_MODEL_NAME,
            device_map="auto",
            torch_dtype=torch.float16
        )
    
    tokenizer = AutoTokenizer.from_pretrained(EVALUATOR_MODEL_NAME)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    # Force garbage collection
    clear_gpu_memory()
    
    return model, tokenizer

# Also update the clear_gpu_memory function to be more aggressive
def clear_gpu_memory():
    """Clear GPU memory more aggressively to avoid OOM errors"""
    if DEVICE == "cuda":
        torch.cuda.empty_cache()
        torch.cuda.synchronize()  # Wait for all CUDA operations to finish
        gc.collect()  # Run full garbage collection

def is_hindi(text):
    """Check if the text contains Hindi (either in Devanagari or romanized)"""
    # Check for Devanagari characters
    devanagari_pattern = re.compile(r'[\u0900-\u097F]')
    if devanagari_pattern.search(text):
        return True

    # Common Hindi romanized words
    hindi_romanized_words = [
        'namaste', 'dhanyavad', 'theek', 'haan', 'nahi', 'kya', 'aap', 'mai', 'tum',
        'kitna', 'rupaye', 'paisa', 'khana', 'pani', 'chai', 'acha', 'bahut', 'thoda'
    ]

    text_lower = text.lower()
    for word in hindi_romanized_words:
        if word in text_lower:
            return True

    return False

def clean_response(response):
    """Clean up the response to ensure proper format and remove role confusion."""
    response = re.sub(r'(Shopkeeper|Waiter|Customer|Assistant):\s*', '', response)
    lines = response.split('\n')
    cleaned_lines = []
    roman_line = ""
    devanagari_line = ""
    for line in lines:
        if line.strip():
            if not roman_line:
                roman_line = line.strip()
            elif not devanagari_line:
                devanagari_line = line.strip()
                break
    if roman_line and devanagari_line:
        return f"{roman_line}\n{devanagari_line}"
    return response

print("Utility functions defined!")

# Cell 4: RAG System Implementation
# This is your existing RAG system with updates for efficiency

class HindiLearningRAG:
    """RAG system for retrieving Hindi dialogues, idioms, and examples."""

    def __init__(self, dummy_mode=False):
        """Initialize the RAG system with embeddings model."""
        self.dummy_mode = dummy_mode
        if dummy_mode:
            logger.info("Initializing dummy RAG system (no retrieval capabilities)")
            return

        logger.info(f"Initializing Hindi Learning RAG on {DEVICE}...")
        self.embeddings = None
        self.vector_store = None
        self.document_data = []
        self.initialize_embeddings()
        logger.info("RAG system initialized.")

    def initialize_embeddings(self):
        """Initialize the embeddings model."""
        if self.dummy_mode:
            return

        try:
            self.embeddings = HuggingFaceEmbeddings(
                model_name=EMBEDDING_MODEL,
                model_kwargs={"device": DEVICE},
                encode_kwargs={"normalize_embeddings": True}
            )

            logger.info("Embeddings model initialized.")
        except Exception as e:
            logger.error(f"Error initializing embeddings: {e}")
            logger.warning("Continuing in dummy mode (no retrieval capabilities)")
            self.dummy_mode = True

    def load_documents(self, file_path=RAG_DATA_PATH):
        """Load documents from JSON file."""
        if self.dummy_mode:
            return False

        if not os.path.exists(file_path):
            logger.warning(f"Data file {file_path} not found. You need to load data first.")
            self.dummy_mode = True
            return False

        try:
            logger.info(f"Loading documents from {file_path}...")
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)

            self.document_data = data
            logger.info(f"Loaded {len(data)} documents.")
            return True
        except Exception as e:
            logger.error(f"Error loading documents: {e}")
            self.dummy_mode = True
            return False

    def create_vector_store(self):
        """Create a FAISS vector store from loaded documents."""
        if self.dummy_mode or not self.document_data:
            logger.warning("No documents loaded or in dummy mode. Cannot create vector store.")
            return False

        try:
            logger.info("Creating FAISS vector store...")

            documents = []
            for item in self.document_data:
                doc = Document(
                    page_content=item["page_content"],
                    metadata=item["metadata"]
                )
                documents.append(doc)

            # Create vector store
            self.vector_store = FAISS.from_documents(documents, self.embeddings)

            logger.info(f"Created vector store with {len(documents)} documents.")

            # Save index
            if not os.path.exists(FAISS_INDEX_PATH):
                os.makedirs(FAISS_INDEX_PATH)
            self.vector_store.save_local(FAISS_INDEX_PATH)
            logger.info(f"Saved vector store to {FAISS_INDEX_PATH}")

            clear_gpu_memory()
            return True
        except Exception as e:
            logger.error(f"Error creating vector store: {e}")
            self.dummy_mode = True
            return False

    def load_vector_store(self, index_path=FAISS_INDEX_PATH):
        """Load a FAISS vector store from disk."""
        if self.dummy_mode:
            return False

        if not os.path.exists(index_path):
            logger.warning(f"Index path {index_path} not found. Create index first.")
            return False

        try:
            logger.info(f"Loading vector store from {index_path}...")
            self.vector_store = FAISS.load_local(index_path, self.embeddings)
            logger.info("Vector store loaded successfully.")
            return True
        except Exception as e:
            logger.error(f"Error loading vector store: {e}")
            self.dummy_mode = True
            return False

    def retrieve_dialogue_examples(self, query, top_k=3, context_tags=None, emotion_tags=None):
        """Retrieve dialogue examples based on query and optional tags."""
        if self.dummy_mode:
            return []

        if not self.vector_store:
            if not self.load_vector_store():
                logger.warning("Vector store not available. Loading documents and creating index...")
                if self.load_documents() and self.create_vector_store():
                    logger.info("Vector store created successfully.")
                else:
                    self.dummy_mode = True
                    return []

        try:
            logger.info(f"Retrieving examples for query: {query}")

            # Get base retrieval results
            retrieval_results = self.vector_store.similarity_search_with_score(query, k=top_k*2)

            # Further filter by metadata if tags are provided
            if context_tags or emotion_tags:
                filtered_results = []
                for doc, score in retrieval_results:
                    metadata = doc.metadata

                    # Check context tags
                    context_match = True
                    if context_tags:
                        doc_context = set(metadata.get("context_tags", []))
                        query_context = set(context_tags)
                        context_match = bool(doc_context.intersection(query_context))

                    # Check emotion tags
                    emotion_match = True
                    if emotion_tags:
                        doc_emotion = set(metadata.get("emotion_tags", []))
                        query_emotion = set(emotion_tags)
                        emotion_match = bool(doc_emotion.intersection(query_emotion))

                    if context_match and emotion_match:
                        filtered_results.append((doc, score))

                retrieval_results = filtered_results

            # Sort by score and truncate
            retrieval_results = sorted(retrieval_results, key=lambda x: x[1])[:top_k]

            # Extract dialogue turns for each document
            examples = []
            for doc, score in retrieval_results:
                example = {
                    "scene_description": doc.metadata.get("scene_description", ""),
                    "roman_dialogue": doc.metadata.get("roman_dialogue", ""),
                    "devanagari_dialogue": doc.metadata.get("devanagari_dialogue", ""),
                    "context_tags": doc.metadata.get("context_tags", []),
                    "emotion_tags": doc.metadata.get("emotion_tags", []),
                    "relevance_score": float(score),
                    "dialogue_turns": doc.metadata.get("dialogue_turns", [])
                }
                examples.append(example)

            logger.info(f"Retrieved {len(examples)} examples.")
            return examples
        except Exception as e:
            logger.error(f"Error retrieving examples: {e}")
            return []

    def get_hindi_phrases_for_context(self, context, top_k=3):
        """Get relevant Hindi phrases based on the context."""
        if self.dummy_mode:
            # Return default phrases for common scenarios
            market_phrases = [
                {"phrase": "Kitne ka hai?", "meaning": "How much is it?", "devanagari": "कितने का है?"},
                {"phrase": "Thoda kam kar dijiye", "meaning": "Please reduce it a little", "devanagari": "थोड़ा कम कर दीजिए"},
                {"phrase": "Badhiya maal hai", "meaning": "It's good quality", "devanagari": "बढ़िया माल है"}
            ]

            restaurant_phrases = [
                {"phrase": "Menu dikha dijiye", "meaning": "Please show me the menu", "devanagari": "मेनू दिखा दीजिए"},
                {"phrase": "Thoda teekha hai", "meaning": "It's a bit spicy", "devanagari": "थोड़ा तीखा है"},
                {"phrase": "Bill le aayiye", "meaning": "Please bring the bill", "devanagari": "बिल ले आइए"}
            ]

            hotel_phrases = [
                {"phrase": "Kamra saaf hai?", "meaning": "Is the room clean?", "devanagari": "कमरा साफ है?"},
                {"phrase": "Checkout ka samay kya hai?", "meaning": "What is the checkout time?", "devanagari": "चेकआउट का समय क्या है?"},
                {"phrase": "WiFi password kya hai?", "meaning": "What is the WiFi password?", "devanagari": "वाईफाई पासवर्ड क्या है?"}
            ]

            if "market" in context.lower():
                return market_phrases[:top_k]
            elif "restaurant" in context.lower():
                return restaurant_phrases[:top_k]
            elif "hotel" in context.lower():
                return hotel_phrases[:top_k]
            else:
                return market_phrases[:top_k]  # Default to market

        # If RAG is available, extract phrases from retrieved examples
        examples = self.retrieve_dialogue_examples(context, top_k=top_k)

        phrases = []
        for example in examples:
            dialogue_turns = example.get("dialogue_turns", [])

            # Extract short phrases from dialogue turns
            for turn in dialogue_turns:
                text_roman = turn.get("text_roman", "")
                text_devanagari = turn.get("text_devanagari", "")

                # Look for short phrases (3-5 words)
                words = text_roman.split()
                if 3 <= len(words) <= 10:
                    phrases.append({
                        "phrase": text_roman,
                        "devanagari": text_devanagari,
                        "meaning": ""  # We would need translation for this
                    })

        # Return unique phrases, limited to top_k
        unique_phrases = []
        seen_phrases = set()

        for phrase in phrases:
            if phrase["phrase"] not in seen_phrases:
                seen_phrases.add(phrase["phrase"])
                unique_phrases.append(phrase)

                if len(unique_phrases) >= top_k:
                    break

        # If we don't have enough phrases, add default ones
        if len(unique_phrases) < top_k:
            default_phrases = [
                {"phrase": "Kitne ka hai?", "meaning": "How much is it?", "devanagari": "कितने का है?"},
                {"phrase": "Thoda kam kar dijiye", "meaning": "Please reduce it a little", "devanagari": "थोड़ा कम कर दीजिए"},
                {"phrase": "Badhiya maal hai", "meaning": "It's good quality", "devanagari": "बढ़िया माल है"}
            ]

            for phrase in default_phrases:
                if phrase["phrase"] not in seen_phrases and len(unique_phrases) < top_k:
                    seen_phrases.add(phrase["phrase"])
                    unique_phrases.append(phrase)

        return unique_phrases

print("RAG system defined!")

# Cell 5: LLM Model Wrappers
# Define wrappers for local models to use with LangChain/LangGraph

# Cell 5: LLM Model Wrappers
# Define wrappers for local models to use with LangChain/LangGraph

class LocalModelWrapper(BaseChatModel):
    """Wrapper for local models to make them compatible with LangChain"""
    
    def __init__(self, model, tokenizer, model_name="local-model"):
        # Important: Set these as instance variables before calling super().__init__()
        self._model = model
        self._tokenizer = tokenizer
        self._model_name = model_name
        # Call super().__init__() after setting instance variables
        super().__init__()
        
    def _generate(self, messages, stop=None, run_manager=None, **kwargs):
        """Generate text based on messages"""
        try:
            # Format the prompt based on messages
            formatted_prompt = self._format_messages_to_prompt(messages)
            
            # Tokenize and generate
            input_ids = self._tokenizer(formatted_prompt, return_tensors="pt").input_ids.to(self._model.device)
            
            # Set generation parameters with good defaults
            gen_kwargs = {
                "max_new_tokens": kwargs.get("max_new_tokens", 512),
                "temperature": kwargs.get("temperature", 0.7),
                "top_p": kwargs.get("top_p", 0.9),
                "repetition_penalty": kwargs.get("repetition_penalty", 1.1),
                "do_sample": True
            }
            
            # Generate text
            with torch.no_grad():
                output_ids = self._model.generate(input_ids, **gen_kwargs)
            
            # Decode the generated text
            generated_text = self._tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)
            
            # Create and return ChatResult
            generation = ChatGeneration(message=AIMessage(content=generated_text))
            return ChatResult(generations=[generation])
            
        except Exception as e:
            logger.error(f"Error in LocalModelWrapper._generate: {e}")
            # Return a simple error message if generation fails
            generation = ChatGeneration(message=AIMessage(content=f"Error generating response: {str(e)}"))
            return ChatResult(generations=[generation])
    
    def _format_messages_to_prompt(self, messages):
        """Format a list of messages into a prompt for the model."""
        prompt_parts = []
        
        for message in messages:
            if isinstance(message, SystemMessage):
                prompt_parts.append(f"System: {message.content}")
            elif isinstance(message, HumanMessage):
                prompt_parts.append(f"Human: {message.content}")
            elif isinstance(message, AIMessage):
                prompt_parts.append(f"Assistant: {message.content}")
            else:
                prompt_parts.append(f"{message.type}: {message.content}")
        
        return "\n\n".join(prompt_parts) + "\n\nAssistant:"
    
    @property
    def _llm_type(self):
        """Return the type identifier for this LLM."""
        return f"local-llm-{self._model_name}"
    
    @property
    def _identifying_params(self):
        """Return identifying parameters for this LLM."""
        return {"model_name": self._model_name}

class TemplateModelWrapper(BaseChatModel):
    """Wrapper for local models that require specific formatting"""
    
    def __init__(self, model, tokenizer, model_name="template-model"):
        # Important: Set these as instance variables before calling super().__init__()
        self._model = model
        self._tokenizer = tokenizer
        self._model_name = model_name
        # Call super().__init__() after setting instance variables
        super().__init__()
        
    def _generate(self, messages, stop=None, run_manager=None, **kwargs):
        """Generate text based on messages using a template format"""
        try:
            # Format the prompt based on messages
            formatted_prompt = self._format_messages_to_prompt(messages)
            
            # Tokenize and generate
            input_ids = self._tokenizer(formatted_prompt, return_tensors="pt").input_ids.to(self._model.device)
            
            # Set generation parameters with good defaults
            gen_kwargs = {
                "max_new_tokens": kwargs.get("max_new_tokens", 512),
                "temperature": kwargs.get("temperature", 0.7),
                "top_p": kwargs.get("top_p", 0.9),
                "repetition_penalty": kwargs.get("repetition_penalty", 1.1),
                "do_sample": True
            }
            
            # Generate text
            with torch.no_grad():
                output_ids = self._model.generate(input_ids, **gen_kwargs)
            
            # Decode the generated text
            generated_text = self._tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)
            
            # Create and return ChatResult
            generation = ChatGeneration(message=AIMessage(content=generated_text))
            return ChatResult(generations=[generation])
            
        except Exception as e:
            logger.error(f"Error in TemplateModelWrapper._generate: {e}")
            # Return a simple error message if generation fails
            generation = ChatGeneration(message=AIMessage(content=f"Error generating response: {str(e)}"))
            return ChatResult(generations=[generation])
    
    def _format_messages_to_prompt(self, messages):
        """Format messages using a specific template for generator model."""
        system_message = ""
        user_messages = []
        
        # Extract system and user messages
        for message in messages:
            if isinstance(message, SystemMessage):
                system_message = message.content
            elif isinstance(message, HumanMessage):
                user_messages.append(message.content)
        
        # Combine all user messages if there are multiple
        user_content = "\n".join(user_messages) if user_messages else ""
        
        # Format using LLama 3 style instruction template
        if system_message:
            prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{system_message}<|eot_id|>\n\n<|start_header_id|>user<|end_header_id|>\n{user_content}<|eot_id|>\n\n<|start_header_id|>assistant<|end_header_id|>\n"
        else:
            prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{user_content}<|eot_id|>\n\n<|start_header_id|>assistant<|end_header_id|>\n"
            
        return prompt
    
    @property
    def _llm_type(self):
        """Return the type identifier for this LLM."""
        return f"template-llm-{self._model_name}"
    
    @property
    def _identifying_params(self):
        """Return identifying parameters for this LLM."""
        return {"model_name": self._model_name}

print("LLM Model Wrappers defined!")


# Cell 6: Define LangGraph Components
# COMPLETE UPDATED LANGGRAPH STRUCTURE
# This should replace your entire Cell 6 with the LangGraph components
class ConversationState(TypedDict):
    """State structure for the conversation flow"""
    messages: List[BaseMessage]
    scenario: Optional[Dict[str, Any]]
    evaluation: Optional[Dict[str, Any]]
    dialogue_history: List[Dict[str, str]]
    current_stage: str
    user_input: Optional[str]
    user_used_hindi: bool
    proficiency_level: str  # beginner, intermediate, advanced

# Custom prompt templates for different models
GENERATOR_SYSTEM_PROMPT = """You are a creative Hindi language learning scenario generator.
        
Create a realistic and practical scenario for Hindi conversation practice at the {proficiency_level} level.
Include the following in your response:
1. scenario_type: A category like "restaurant", "market", "transportation", "hotel", etc.
2. scenario_title: A concise title for this scenario
3. scenario_description: A brief description (2-3 sentences) of the setting
4. character_role: The role the AI assistant will play (e.g., "waiter", "shopkeeper")
5. user_role: The role the learner will play (usually "customer", "traveler", etc.)
6. goals: 3-5 simple conversation goals for the learner to accomplish in this scenario
7. key_vocabulary: 5-8 key Hindi words/phrases relevant to this scenario with both Roman and Devanagari script
8. first_line: An opening line for the conversation (what the assistant should say first) in both Roman and Devanagari script

Format your response as a JSON object. Use this exact format:
{{
  "scenario_type": "restaurant",
  "scenario_title": "Ordering Food at a Restaurant", 
  "scenario_description": "You are at a local restaurant in Delhi...",
  "character_role": "waiter",
  "user_role": "customer",
  "goals": ["Order a main dish", "Ask about spiciness", "Request the bill"],
  "key_vocabulary": [
    {{"roman": "menu", "devanagari": "मेनू", "meaning": "menu"}},
    {{"roman": "khana", "devanagari": "खाना", "meaning": "food"}},
    {{"roman": "bill", "devanagari": "बिल", "meaning": "bill"}}
  ],
  "first_line": {{
    "roman": "Namaste ji, kya khaayenge aap?",
    "devanagari": "नमस्ते जी, क्या खाएंगे आप?"
  }}
}}
"""

EVALUATOR_SYSTEM_PROMPT = """You are a Hindi language learning evaluator. Your task is to analyze the conversation between a learner (practicing Hindi at {proficiency_level} level) and an AI tutor, and provide detailed, constructive feedback.

Context: The scenario was a {scenario_type} conversation where the AI played a {character_role} and the learner played a {user_role}.

<reasoning>
Analyze the following aspects and rate each on a scale of 1-5:
1. Hindi Usage: To what extent did the learner use Hindi (vs English)? Recognize any Hindi words/phrases they used.
2. Cultural Appropriateness: Did their responses make sense in an Indian {scenario_type} context?
3. Goal Achievement: The scenario had these goals: {goals}. How well did the learner accomplish them?
4. Areas for Improvement: Specific suggestions for vocabulary, phrases, or cultural elements to incorporate next time.
</reasoning>

<answer>
Format your score sections like this example:
Hindi Usage: 3/5
Cultural Appropriateness: 4/5
Goal Achievement: 3/5  
Overall Rating: 3/5

Provide encouragement and specific examples from their conversation in your feedback.
</answer>
"""

# Add this function for fallback scenarios
def get_fallback_scenario(proficiency_level="beginner"):
    """Generate a predefined scenario based on proficiency level if the model fails"""
    
    # Beginner scenario - Restaurant
    if proficiency_level == "beginner":
        return {
            "scenario_type": "restaurant",
            "scenario_title": "Ordering Food at a Restaurant", 
            "scenario_description": "You are at a local Indian restaurant and want to order a meal. The waiter approaches your table.",
            "character_role": "waiter",
            "user_role": "customer",
            "goals": ["Order a main dish", "Ask about spiciness", "Request the bill"],
            "key_vocabulary": [
                {"roman": "menu", "devanagari": "मेनू", "meaning": "menu"},
                {"roman": "khana", "devanagari": "खाना", "meaning": "food"},
                {"roman": "bill", "devanagari": "बिल", "meaning": "bill"},
                {"roman": "paani", "devanagari": "पानी", "meaning": "water"},
                {"roman": "teekha", "devanagari": "तीखा", "meaning": "spicy"}
            ],
            "first_line": {
                "roman": "Namaste ji, kya khaayenge aap?",
                "devanagari": "नमस्ते जी, क्या खाएंगे आप?"
            }
        }
    
    # Intermediate scenario - Market
    elif proficiency_level == "intermediate":
        return {
            "scenario_type": "market",
            "scenario_title": "Shopping at the Vegetable Market", 
            "scenario_description": "You are at a busy local market wanting to buy fresh vegetables. A shopkeeper is ready to assist you.",
            "character_role": "shopkeeper",
            "user_role": "customer",
            "goals": ["Ask about prices", "Negotiate a discount", "Buy multiple items"],
            "key_vocabulary": [
                {"roman": "sabzi", "devanagari": "सब्ज़ी", "meaning": "vegetable"},
                {"roman": "kitne ka hai", "devanagari": "कितने का है", "meaning": "how much is it"},
                {"roman": "kilo", "devanagari": "किलो", "meaning": "kilogram"},
                {"roman": "dam", "devanagari": "दाम", "meaning": "price"},
                {"roman": "sasta", "devanagari": "सस्ता", "meaning": "cheap"}
            ],
            "first_line": {
                "roman": "Aaiye ji, kya chahiye aapko?",
                "devanagari": "आइये जी, क्या चाहिए आपको?"
            }
        }
    
    # Advanced scenario - Hotel
    else:  # advanced
        return {
            "scenario_type": "hotel",
            "scenario_title": "Checking into a Hotel", 
            "scenario_description": "You've arrived at a hotel in Delhi and need to check in. The receptionist is waiting to help you.",
            "character_role": "receptionist",
            "user_role": "guest",
            "goals": ["Complete check-in process", "Ask about amenities", "Request a wake-up call"],
            "key_vocabulary": [
                {"roman": "kamra", "devanagari": "कमरा", "meaning": "room"},
                {"roman": "booking", "devanagari": "बुकिंग", "meaning": "booking"},
                {"roman": "checkout", "devanagari": "चेकआउट", "meaning": "checkout"},
                {"roman": "wifi", "devanagari": "वाईफाई", "meaning": "WiFi"},
                {"roman": "chaabi", "devanagari": "चाबी", "meaning": "key"}
            ],
            "first_line": {
                "roman": "Namaste ji, swagat hai aapka. Kaise madad kar sakta hoon?",
                "devanagari": "नमस्ते जी, स्वागत है आपका। कैसे मदद कर सकता हूं?"
            }
        }

class LangGraphAgents:
    """Define and coordinate LangGraph agents for Hindi learning system"""
    
    def __init__(self, generator_llm, hindi_model, hindi_tokenizer, evaluator_llm, rag_system):
        self.generator_llm = generator_llm  # For scenario generation
        self.hindi_model = hindi_model      # For conversation
        self.hindi_tokenizer = hindi_tokenizer
        self.evaluator_llm = evaluator_llm  # For evaluation
        self.rag_system = rag_system
        
    def scenario_generator_agent(self, state: ConversationState) -> ConversationState:
        """Generate a new conversation scenario based on proficiency level with better error handling"""
        messages = state["messages"]
        proficiency_level = state["proficiency_level"]
        
        # Create system prompt for scenario generator with custom template for this model
        system_prompt = GENERATOR_SYSTEM_PROMPT.format(proficiency_level=proficiency_level)
        
        # Get scenario from the generator LLM
        scenario_prompt = ChatPromptTemplate.from_messages([
            ("system", system_prompt),
            MessagesPlaceholder(variable_name="messages")
        ])
        
        # Create a new state to return (avoid modifying the input state directly)
        new_state = state.copy()
        
        # Generate scenario
        try:
            # First check if generator LLM is available
            if not hasattr(self, 'generator_llm') or self.generator_llm is None:
                logger.error("Generator LLM not available, using fallback scenario")
                raise ValueError("Generator LLM not available")
                
            # Create scenario chain and invoke
            scenario_chain = scenario_prompt | self.generator_llm
            scenario_response = scenario_chain.invoke({"messages": messages})
            logger.info("Generated scenario response")
            
            # Parse the JSON response (handling potential errors)
            try:
                # Try to extract JSON from the response
                content = scenario_response.content
                logger.info(f"Raw scenario content: {content[:100]}...")
                
                # Find JSON-like structure
                import re
                json_pattern = r'```json\s*([\s\S]*?)\s*```|(\{[\s\S]*\})'
                match = re.search(json_pattern, content)
                
                if match:
                    json_str = match.group(1) or match.group(2)
                    logger.info(f"Found JSON structure: {json_str[:100]}...")
                    scenario_result = json.loads(json_str)
                else:
                    # If no match with code blocks, try to parse the whole response
                    logger.info("No JSON block found, attempting to parse entire content")
                    scenario_result = json.loads(content)
                    
                # Validate required fields
                required_fields = ["scenario_type", "scenario_title", "scenario_description", 
                                "character_role", "user_role", "goals", "key_vocabulary", "first_line"]
                
                for field in required_fields:
                    if field not in scenario_result:
                        logger.warning(f"Missing required field: {field}")
                        raise ValueError(f"Missing required field: {field}")
                        
            except Exception as e:
                logger.error(f"Error parsing scenario JSON: {e}")
                # Use fallback scenario
                logger.info(f"Using fallback scenario for {proficiency_level}")
                scenario_result = get_fallback_scenario(proficiency_level)
                
        except Exception as e:
            logger.error(f"Complete failure in scenario generation: {e}")
            # Use fallback scenario
            logger.info(f"Using fallback scenario for {proficiency_level}")
            scenario_result = get_fallback_scenario(proficiency_level)
        
        # Update state with new scenario
        new_state["scenario"] = scenario_result
        new_state["current_stage"] = "language_tutor"
        
        # Add system message to inform about the scenario
        system_message = SystemMessage(content=f"A new scenario has been generated: {scenario_result['scenario_title']}")
        
        # Create the assistant's first message from the scenario
        first_line_roman = scenario_result["first_line"].get("roman", "Namaste!")
        first_line_devanagari = scenario_result["first_line"].get("devanagari", "नमस्ते!")
        first_message = AIMessage(content=f"{first_line_roman}\n{first_line_devanagari}")
        
        new_state["messages"] = messages + [system_message, first_message]
        new_state["dialogue_history"] = [{
            "role": "assistant",
            "content": f"{first_line_roman}\n{first_line_devanagari}"
        }]
        
        return new_state
        
    def language_tutor_agent(self, state: ConversationState) -> ConversationState:
        """Process user input and generate Hindi tutor responses with improved error handling"""
        try:
            messages = state["messages"]
            scenario = state["scenario"]
            dialogue_history = state["dialogue_history"]
            user_input = state["user_input"]
            
            # Check if user is using Hindi
            user_used_hindi = is_hindi(user_input)
            new_state = state.copy()
            new_state["user_used_hindi"] = user_used_hindi
            
            # Add user message to dialogue history
            dialogue_history.append({
                "role": "user",
                "content": user_input
            })
            
            # Create a system prompt for the Hindi model based on scenario
            character_role = scenario["character_role"]
            scenario_type = scenario["scenario_type"]
            
            # Get relevant RAG examples if available
            rag_examples = []
            if self.rag_system and not self.rag_system.dummy_mode:
                context_tags = [scenario_type]
                rag_examples = self.rag_system.retrieve_dialogue_examples(
                    query=user_input,
                    top_k=2,
                    context_tags=context_tags
                )
            
            # Create role-locked prompt
            system_prompt = f"""You are a Hindi language tutor demonstrating ONLY the {character_role} role in a {scenario_type} conversation.

    CRITICAL ROLE INSTRUCTIONS:
    1. You ONLY play the {character_role} - NEVER respond as the customer/user.
    2. The human user plays the {scenario["user_role"]} role.
    3. NEVER continue the conversation as the customer.
    4. NEVER put "Customer:" or similar labels in your responses.

    FORMAT REQUIREMENTS:
    1. First line: Response in Roman Hindi (1-2 sentences)
    2. Second line: Same response in Devanagari script
    3. NOTHING ELSE.

    CONTENT GUIDELINES:
    1. Keep responses SHORT and PRACTICAL.
    2. Use authentic, everyday Hindi appropriate for a {scenario_type} setting.
    3. Match the user's proficiency level with appropriate vocabulary and complexity.
    4. Use REALISTIC Hindi that would be spoken in a real {scenario_type}.
    """

            if user_used_hindi:
                system_prompt += "\nNOTE: The learner is responding in Hindi, which is excellent! Acknowledge their effort in your response."
                
            # Format RAG examples for the prompt if available
            if rag_examples:
                rag_content = "\n\nREFERENCE EXAMPLES (use these for authentic Hindi expressions):\n"
                for i, example in enumerate(rag_examples[:2]):
                    turns = example.get("dialogue_turns", [])
                    if turns:
                        rag_content += f"Example {i+1}:\n"
                        for j, turn in enumerate(turns[:3]):
                            speaker = turn.get("speaker", "")
                            text = turn.get("text_roman", "")
                            rag_content += f"{speaker}: {text}\n"
                        rag_content += "\n"
                system_prompt += rag_content
            
            # Create a prompt for the Hindi model
            messages_formatted = [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"The {scenario['user_role']} says: \"{user_input}\"\n\nRespond ONLY as the {character_role} in simple Hindi (both Roman and Devanagari). NEVER respond as the {scenario['user_role']}. Keep your response brief and practical."}
            ]
            
            # Check if Hindi model is available
            if not hasattr(self, 'hindi_model') or self.hindi_model is None:
                raise ValueError("Hindi model not available")
                
            if not hasattr(self, 'hindi_tokenizer') or self.hindi_tokenizer is None:
                raise ValueError("Hindi tokenizer not available")
                
            # Tokenize and generate
            try:
                input_ids = self.hindi_tokenizer.apply_chat_template(
                    messages_formatted,
                    add_generation_prompt=True,
                    return_tensors="pt"
                ).to(self.hindi_model.device)
                
                outputs = self.hindi_model.generate(
                    input_ids,
                    max_new_tokens=100,
                    do_sample=True,
                    temperature=TEMPERATURE,
                    repetition_penalty=1.3,
                    eos_token_id=self.hindi_tokenizer.eos_token_id,
                )
                
                tutor_response = self.hindi_tokenizer.decode(
                    outputs[0][input_ids.shape[-1]:], 
                    skip_special_tokens=True
                )
            except Exception as e:
                logger.error(f"Error generating Hindi response: {e}")
                # Fallback response in case of generation error
                tutor_response = "Sorry, I couldn't generate a proper Hindi response.\nक्षमा करें, मैं उचित हिंदी प्रतिक्रिया नहीं दे सका।"
            
            # Clean the response to ensure proper format
            tutor_response = clean_response(tutor_response)
            
            # Add tutor response to dialogue history
            dialogue_history.append({
                "role": "assistant",
                "content": tutor_response
            })
            
            # Update state
            new_state["dialogue_history"] = dialogue_history
            new_state["messages"] = messages + [HumanMessage(content=user_input), AIMessage(content=tutor_response)]
            
            # Determine next stage - if we've reached 5+ turns, check if user wants evaluation
            if len(dialogue_history) >= 10 or "evaluate" in user_input.lower():
                new_state["current_stage"] = "evaluator"
            else:
                new_state["current_stage"] = "language_tutor"
                
            # Clear user input
            new_state["user_input"] = None
            
            # Clear memory
            clear_gpu_memory()
            
            return new_state
            
        except Exception as e:
            logger.error(f"Error in language_tutor_agent: {e}")
            # Create a recovery state to avoid system crash
            new_state = state.copy()
            error_message = AIMessage(content=f"Sorry, I encountered an error processing your message. Let's try again.\nक्षमा करें, मुझे आपके संदेश को प्रोसेस करने में त्रुटि मिली। फिर से प्रयास करें।")
            new_state["messages"] = state["messages"] + [HumanMessage(content=state["user_input"]), error_message]
            new_state["current_stage"] = "language_tutor"
            new_state["user_input"] = None
            return new_state
        
    def evaluator_agent(self, state: ConversationState) -> ConversationState:
        """Evaluate the conversation and provide feedback using the dedicated reasoning model"""
        messages = state["messages"]
        scenario = state["scenario"]
        dialogue_history = state["dialogue_history"]
        proficiency_level = state["proficiency_level"]
        
        # Create a system prompt for the evaluator with custom template for this model
        system_prompt = EVALUATOR_SYSTEM_PROMPT.format(
            proficiency_level=proficiency_level,
            scenario_type=scenario["scenario_type"],
            character_role=scenario["character_role"],
            user_role=scenario["user_role"],
            goals=", ".join(scenario["goals"])
        )
        
        # Extract the conversation history for the evaluator
        conversation_text = "Conversation transcript:\n"
        for turn in dialogue_history:
            role = "Tutor" if turn["role"] == "assistant" else "Learner"
            conversation_text += f"{role}: {turn['content']}\n\n"
            
        # Create the evaluation prompt
        evaluation_prompt = ChatPromptTemplate.from_messages([
            ("system", system_prompt),
            ("user", conversation_text)
        ])
        
        # Generate evaluation using the evaluator model
        evaluation_chain = evaluation_prompt | self.evaluator_llm
        evaluation_response = evaluation_chain.invoke({})
        
        # Parse the response to extract key information
        evaluation_text = evaluation_response.content
        
        # Try to extract scores
        import re
        
        # Look for scores in standard format and reasoning/answer blocks
        def extract_score(pattern, text, default=3):
            match = re.search(pattern, text)
            if match:
                try:
                    return int(match.group(1))
                except:
                    return default
            return default
            
        # Try to extract content from reasoning/answer blocks
        reasoning_match = re.search(r'<reasoning>(.*?)</reasoning>', evaluation_text, re.DOTALL)
        answer_match = re.search(r'<answer>(.*?)</answer>', evaluation_text, re.DOTALL)
        
        # If reasoning/answer blocks are found, use them to extract scores and format response
        if reasoning_match and answer_match:
            reasoning = reasoning_match.group(1).strip()
            answer = answer_match.group(1).strip()
            
            # Extract scores from the answer block, which should be more structured
            hindi_usage_score = extract_score(r'Hindi Usage:?\s*(\d+)/5', answer)
            cultural_score = extract_score(r'Cultural Appropriateness:?\s*(\d+)/5', answer)
            goal_score = extract_score(r'Goal Achievement:?\s*(\d+)/5', answer)
            overall_score = extract_score(r'Overall Rating:?\s*(\d+)/5', answer)
            
            # Use answer as the main content, but include reasoning
            evaluation_text = f"{answer}\n\n**Detailed Analysis:**\n{reasoning}"
        else:
            # Standard extraction if no reasoning/answer blocks
            hindi_usage_score = extract_score(r'Hindi Usage:?\s*(\d+)/5', evaluation_text)
            cultural_score = extract_score(r'Cultural Appropriateness:?\s*(\d+)/5', evaluation_text)
            goal_score = extract_score(r'Goal Achievement:?\s*(\d+)/5', evaluation_text)
            overall_score = extract_score(r'Overall Rating:?\s*(\d+)/5', evaluation_text)
        
        # Create a structured evaluation result
        evaluation_result = {
            "hindi_usage_score": hindi_usage_score,
            "cultural_appropriateness_score": cultural_score,
            "goal_achievement_score": goal_score,
            "overall_score": overall_score,
            "full_evaluation": evaluation_text
        }
        
        # Format a user-friendly evaluation message
        evaluation_message = f"""# Hindi Conversation Evaluation

## Overall Score: {"⭐" * evaluation_result["overall_score"]} ({evaluation_result["overall_score"]}/5)

{evaluation_text}

Would you like to try another scenario?
"""
        
        # Update state with evaluation
        new_state = state.copy()
        new_state["evaluation"] = evaluation_result
        new_state["messages"] = messages + [AIMessage(content=evaluation_message)]
        new_state["current_stage"] = "done"
        
        return new_state
        
    def router_agent(self, state: ConversationState) -> str:
        """Determine the next agent to handle the conversation"""
        current_stage = state["current_stage"]
        return current_stage

def build_conversation_graph(generator_llm, hindi_model, hindi_tokenizer, evaluator_llm, rag_system):
    """Build the LangGraph conversation flow with separate models for different roles"""
    # Create agents
    agents = LangGraphAgents(generator_llm, hindi_model, hindi_tokenizer, evaluator_llm, rag_system)
    
    # Define the workflow graph
    workflow = StateGraph(ConversationState)
    
    # Add nodes
    workflow.add_node("scenario_generator", agents.scenario_generator_agent)
    workflow.add_node("language_tutor", agents.language_tutor_agent)
    workflow.add_node("evaluator", agents.evaluator_agent)
    
    # Add edges
    workflow.add_edge("scenario_generator", "language_tutor")
    
    # Add conditional edges using a simpler router function
    def router(state):
        """Simple router based on current_stage field"""
        return state["current_stage"]
        
    workflow.add_conditional_edges(
        "language_tutor",
        router,
        {
            "language_tutor": "language_tutor",
            "evaluator": "evaluator",
            "done": END
        }
    )
    
    # After evaluation, end the conversation
    workflow.add_edge("evaluator", END)
    
    # Set the entry point
    workflow.set_entry_point("scenario_generator")
    
    return workflow.compile()

print("LangGraph components defined!")

# Cell 7: Optional Speech Components
# These functions are only used if ENABLE_SPEECH is True

def setup_speech_components():
    """Set up speech recognition and TTS components if enabled"""
    if not ENABLE_SPEECH:
        print("Speech features disabled. Set ENABLE_SPEECH = True to enable.")
        return None, None
        
    try:
        import whisper
        from gtts import gTTS
        
        # Load a small model for speech recognition
        print("Loading Whisper base model for speech recognition...")
        speech_model = whisper.load_model("base")
        
        print("Speech components initialized successfully.")
        return speech_model, True
    except Exception as e:
        print(f"Error initializing speech components: {e}")
        return None, None

def transcribe_audio(speech_model, audio_path):
    """Transcribe audio using Whisper"""
    if not speech_model or not ENABLE_SPEECH:
        return "Speech recognition is disabled."
        
    try:
        result = speech_model.transcribe(audio_path, language="hi")
        return result["text"]
    except Exception as e:
        return f"Error transcribing audio: {e}"

def text_to_speech(text, output_path="/kaggle/working/tutor_speak.mp3"):
    """Convert text to speech using gTTS"""
    if not ENABLE_SPEECH:
        return None
        
    # Extract just Hindi text if it contains both Roman and Devanagari
    lines = text.strip().split("\n")
    if len(lines) >= 2 and not text.startswith("#"):
        # Use both lines for better speech synthesis
        text_for_tts = f"{lines[0]} {lines[1]}"
    
    try:
        from gtts import gTTS
        tts = gTTS(text=text_for_tts if 'text_for_tts' in locals() else text, lang="hi", slow=False)
        tts.save(output_path)
        return output_path
    except Exception as e:
        print(f"Error generating speech: {e}")
        return None

print("Speech components defined (will be enabled if ENABLE_SPEECH=True)")

# Cell 8: Main Application Class
# This is the core class that runs the system

class EnhancedHindiPracticeApp:
    """Main application class for the Hindi practice system with LangGraph integration"""
    
    def __init__(self):
        self.hindi_model = None
        self.hindi_tokenizer = None
        self.generator_model = None
        self.generator_tokenizer = None
        self.generator_llm = None
        self.evaluator_model = None
        self.evaluator_tokenizer = None
        self.evaluator_llm = None
        self.rag_system = None
        self.conversation_graph = None
        self.state = None
        self.model_loaded = False
        self.speech_model = None
        self.speech_enabled = False

   
    def initialize_system(self, progress=None):
        """Initialize all components of the system with progressive loading for the recommended models"""
        try:
            if progress:
                progress(0, desc="Initializing Hindi Learning System...")
                
            # Configure PyTorch to properly use GPU
            if DEVICE == "cuda":
                # Set higher memory limits for 16GB VRAM
                os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:4096'
                torch.cuda.empty_cache()
                print(f"GPU memory before loading: {torch.cuda.memory_allocated()/1024**3:.2f}GB / {torch.cuda.get_device_properties(0).total_memory/1024**3:.2f}GB")
            
            # Initialize RAG system first
            try:
                if progress:
                    progress(0.1, desc="Setting up RAG system...")
                self.rag_system = HindiLearningRAG(dummy_mode=False)  # Try with actual retrieval
                
                # Initialize RAG data
                if progress:
                    progress(0.2, desc="Setting up dialogue data...")
                if not os.path.exists(RAG_DATA_PATH):
                    create_sample_dialogue_data()
                
                # Load or create vector store
                if progress:
                    progress(0.25, desc="Loading vector store...")
                self.rag_system.load_documents()
                self.rag_system.create_vector_store()
                        
            except Exception as e:
                print(f"RAG initialization error: {str(e)}")
                if progress:
                    progress(0.25, desc="Using fallback RAG system")
                self.rag_system = HindiLearningRAG(dummy_mode=True)
            
            # 1. Load Hindi conversation model
            if progress:
                progress(0.3, desc="Loading Hindi conversation model...")
            try:
                # Clear memory before loading
                clear_gpu_memory()
                
                # Use 4-bit quantization for efficient loading
                quantization_config = BitsAndBytesConfig(
                    load_in_4bit=True,
                    bnb_4bit_compute_dtype=torch.float16,
                    bnb_4bit_use_double_quant=True,
                    bnb_4bit_quant_type="nf4"
                )
                
                self.hindi_model = AutoModelForCausalLM.from_pretrained(
                    HINDI_MODEL_NAME,
                    quantization_config=quantization_config,
                    device_map="auto",
                    torch_dtype=torch.float16
                )
                
                self.hindi_tokenizer = AutoTokenizer.from_pretrained(HINDI_MODEL_NAME)
                if self.hindi_tokenizer.pad_token is None:
                    self.hindi_tokenizer.pad_token = self.hindi_tokenizer.eos_token
                    
                print(f"Hindi model loaded. GPU memory: {torch.cuda.memory_allocated()/1024**3:.2f}GB")
                
            except Exception as e:
                error_msg = f"Error loading Hindi model: {str(e)}"
                print(error_msg)
                if progress:
                    progress(0.4, desc=error_msg)
                return error_msg
            
            # 2. Load generator model (separate model)
            if progress:
                progress(0.5, desc="Loading scenario generator model...")
            try:
                # Use different quantization for generator model
                generator_quantization_config = BitsAndBytesConfig(
                    load_in_4bit=True,
                    bnb_4bit_compute_dtype=torch.float16,
                    bnb_4bit_use_double_quant=True,
                    bnb_4bit_quant_type="nf4"
                )
                
                # Load a different model for generation tasks
                self.generator_model = AutoModelForCausalLM.from_pretrained(
                    "meta-llama/Llama-3.2-3B-Instruct",  # Using Llama-3.2 base as fallback
                    quantization_config=generator_quantization_config,
                    device_map="auto",
                    torch_dtype=torch.float16
                )
                
                self.generator_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
                if self.generator_tokenizer.pad_token is None:
                    self.generator_tokenizer.pad_token = self.generator_tokenizer.eos_token
                
                # Create wrapper with corrected class
                self.generator_llm = TemplateModelWrapper(
                    self.generator_model, 
                    self.generator_tokenizer,
                    model_name="scenario-generator"
                )
                
                print(f"Generator model loaded. GPU memory: {torch.cuda.memory_allocated()/1024**3:.2f}GB")
            
            except Exception as e:
                print(f"Error loading generator model: {str(e)}")
                # Fallback to use the Hindi model for generation if needed
                if progress:
                    progress(0.6, desc="Using Hindi model for generation")
                self.generator_model = self.hindi_model
                self.generator_tokenizer = self.hindi_tokenizer
                self.generator_llm = TemplateModelWrapper(
                    self.generator_model, 
                    self.generator_tokenizer,
                    model_name="hindi-as-generator"
                )
            
            # 3. Load evaluator model (separate model)
            if progress:
                progress(0.7, desc="Loading evaluator model...")
            try:
                # Use 4-bit quantization for evaluator
                evaluator_quantization_config = BitsAndBytesConfig(
                    load_in_4bit=True,
                    bnb_4bit_compute_dtype=torch.float16,
                    bnb_4bit_use_double_quant=True,
                    bnb_4bit_quant_type="nf4"
                )
                
                self.evaluator_model = AutoModelForCausalLM.from_pretrained(
                    EVALUATOR_MODEL_NAME,
                    quantization_config=evaluator_quantization_config,
                    device_map="auto",
                    torch_dtype=torch.float16
                )
            
                self.evaluator_tokenizer = AutoTokenizer.from_pretrained(EVALUATOR_MODEL_NAME)
                if self.evaluator_tokenizer.pad_token is None:
                    self.evaluator_tokenizer.pad_token = self.evaluator_tokenizer.eos_token
                
                # Create evaluator with corrected class
                self.evaluator_llm = TemplateModelWrapper(
                    self.evaluator_model,
                    self.evaluator_tokenizer,
                    model_name="performance-evaluator"
                )
                
                print(f"Evaluator model loaded. GPU memory: {torch.cuda.memory_allocated()/1024**3:.2f}GB")
                
            except Exception as e:
                print(f"Error loading evaluator model: {str(e)}")
                # Fallback to use the Hindi model for evaluation
                if progress:
                    progress(0.8, desc="Using Hindi model for evaluation")
                self.evaluator_model = self.hindi_model
                self.evaluator_tokenizer = self.hindi_tokenizer
                self.evaluator_llm = TemplateModelWrapper(
                    self.evaluator_model,
                    self.evaluator_tokenizer,
                    model_name="hindi-as-evaluator"
                )
        
            # Build the conversation graph with all separate models
            if progress:
                progress(0.9, desc="Building conversation graph...")
            try:
                self.conversation_graph = build_conversation_graph(
                    self.generator_llm,
                    self.hindi_model,
                    self.hindi_tokenizer,
                    self.evaluator_llm,
                    self.rag_system
                )
                
                print(f"Conversation graph built. Final GPU memory: {torch.cuda.memory_allocated()/1024**3:.2f}GB")
                
            except Exception as e:
                error_msg = f"Error building conversation graph: {str(e)}"
                print(error_msg)
                if progress:
                    progress(0.95, desc=error_msg)
                return error_msg
            
            self.model_loaded = True
            if progress:
                progress(1.0, desc="System initialized successfully!")
                
            return f"Hindi Learning System initialized! GPU memory used: {torch.cuda.memory_allocated()/1024**3:.2f}GB"
        
        except Exception as e:
            error_msg = f"System initialization failed: {str(e)}"
            print(error_msg)
            if progress:
                progress(1.0, desc=error_msg)
            return error_msg


    
    def start_new_scenario(self, proficiency_level="beginner"):
        """Start a new conversation scenario with improved error handling"""
        if not self.model_loaded:
            return [], "Model not loaded. Please initialize the system first.", ""
            
        try:
            # Initialize state with a human message requesting a scenario
            self.state = {
                "messages": [HumanMessage(content=f"Create a new Hindi conversation scenario for {proficiency_level} level")],
                "scenario": None,
                "evaluation": None,
                "dialogue_history": [],
                "current_stage": "scenario_generator",
                "user_input": None,
                "user_used_hindi": False,
                "proficiency_level": proficiency_level
            }
            
            # Use a fallback scenario in case of any errors
            fallback_scenario = get_fallback_scenario(proficiency_level)
            
            try:
                # Try to generate scenario using LangGraph
                print("Generating new scenario using conversation graph...")
                self.state = self.conversation_graph.invoke(self.state)
                
                # Check if scenario was generated successfully
                if not self.state.get("scenario"):
                    print("No scenario generated by graph, using fallback...")
                    self.state["scenario"] = fallback_scenario
            except Exception as e:
                print(f"Error generating scenario through conversation graph: {e}")
                # Handle error by using fallback scenario
                self.state["scenario"] = fallback_scenario
                
                # Create appropriate messages
                system_message = SystemMessage(content=f"A default scenario has been loaded: {fallback_scenario['scenario_title']}")
                
                # Use first line from fallback scenario
                first_line_roman = fallback_scenario["first_line"].get("roman", "Namaste!")
                first_line_devanagari = fallback_scenario["first_line"].get("devanagari", "नमस्ते!")
                first_message = AIMessage(content=f"{first_line_roman}\n{first_line_devanagari}")
                
                self.state["messages"] = self.state["messages"] + [system_message, first_message]
                self.state["dialogue_history"] = [{
                    "role": "assistant",
                    "content": f"{first_line_roman}\n{first_line_devanagari}"
                }]
                self.state["current_stage"] = "language_tutor"
            
            # Extract scenario details for UI
            scenario = self.state["scenario"]
            scenario_details = f"# {scenario['scenario_title']}\n\n"
            scenario_details += f"{scenario['scenario_description']}\n\n"
            scenario_details += f"**Your Role**: {scenario['user_role']}\n"
            scenario_details += f"**AI's Role**: {scenario['character_role']}\n\n"
            scenario_details += "**Your Goals**:\n"
            for goal in scenario['goals']:
                scenario_details += f"- {goal}\n"
                
            # Format vocabulary list
            vocabulary = "### Useful Hindi Vocabulary:\n"
            for vocab in scenario['key_vocabulary']:
                if isinstance(vocab, dict):
                    roman = vocab.get('roman', '')
                    devanagari = vocab.get('devanagari', '')
                    meaning = vocab.get('meaning', '')
                    vocabulary += f"- {roman} ({devanagari}): {meaning}\n"
                else:
                    vocabulary += f"- {vocab}\n"
                    
            # Format the conversation for Gradio chatbot
            formatted_messages = []
            for msg in self.state["messages"]:
                if isinstance(msg, SystemMessage):
                    # Skip system messages in the UI
                    continue
                elif isinstance(msg, AIMessage):
                    content = msg.content
                    # Format AI messages for better display
                    lines = content.strip().split("\n")
                    if len(lines) >= 2 and not content.startswith("#"):  # Not an evaluation message
                        roman = lines[0]
                        devanagari = lines[1]
                        content = f"🗣️ {roman}\n📝 {devanagari}"
                    formatted_messages.append(("", content))
                elif isinstance(msg, HumanMessage):
                    formatted_messages.append((f"👤 {msg.content}", ""))
            
            return formatted_messages, scenario_details, vocabulary
            
        except Exception as e:
            error_msg = f"Error starting new scenario: {str(e)}"
            print(error_msg)
            return [], error_msg, ""
            
    def send_message(self, user_input, history):
        """Process a user message and continue the conversation"""
        if not self.model_loaded or not self.state:
            return history + [(f"👤 {user_input}", "System not initialized. Please start a new scenario first.")]
                
        if not user_input:
            return history
                
        try:
            # Update state with user input
            self.state["user_input"] = user_input
            
            # Process through the conversation graph
            try:
                self.state = self.conversation_graph.invoke(self.state)
            except Exception as e:
                error_msg = f"Error processing message: {str(e)}"
                print(error_msg)
                return history + [(f"👤 {user_input}", f"Error: {error_msg}")]
                
            # Format the latest AI message
            latest_ai_message = None
            for msg in reversed(self.state["messages"]):
                if isinstance(msg, AIMessage):
                    latest_ai_message = msg
                    break
                    
            if latest_ai_message:
                content = latest_ai_message.content
                # Check if this is a regular message or evaluation
                if not content.startswith("#"):  # Not an evaluation
                    lines = content.strip().split("\n")
                    if len(lines) >= 2:
                        roman = lines[0]
                        devanagari = lines[1]
                        formatted_content = f"🗣️ {roman}\n📝 {devanagari}"
                    else:
                        formatted_content = content
                else:
                    # This is an evaluation message, keep formatting
                    formatted_content = content
                    
                updated_history = history + [(f"👤 {user_input}", formatted_content)]
            else:
                updated_history = history + [(f"👤 {user_input}", "No response generated")]
                
            return updated_history
            
        except Exception as e:
            error_msg = f"Error in send_message: {str(e)}"
            print(error_msg)
            return history + [(f"👤 {user_input}", f"Error: {error_msg}")]
            
    def get_speech_from_text(self, text=""):
        """Generate speech from text using gTTS"""
        if not self.speech_enabled:
            return None
                
        if not text:
            # Get the last AI message
            for msg in reversed(self.state["messages"]):
                if isinstance(msg, AIMessage):
                    text = msg.content
                    break
                        
        # Generate speech
        return text_to_speech(text)
                
    def transcribe_audio(self, audio_path):
        """Transcribe audio to text using Whisper"""
        if not self.speech_enabled or not self.speech_model:
            return "Speech recognition is disabled"
                
        return transcribe_audio(self.speech_model, audio_path)# Cell 8: Main Application Class
# This is the core class that runs the system

            






print("Main application class defined!")

# Cell 9: Gradio Interface
# This creates the user interface for the system

def create_enhanced_gradio_interface():
    """Create the Gradio interface for the enhanced Hindi practice system with better error handling"""
    app = EnhancedHindiPracticeApp()
    
    with gr.Blocks(title="Enhanced Hindi Conversation Practice", theme=gr.themes.Soft()) as interface:
        gr.Markdown("# 🇮🇳 Enhanced Hindi Conversation Practice")
        gr.Markdown("Practice Hindi in dynamic roleplay scenarios with AI tutoring and feedback")
        
        with gr.Row():
            with gr.Column(scale=1):
                # Add progress indicator and status message
                status_msg = gr.Markdown("System status: Not initialized")
                init_progress = gr.Textbox(
                    label="Initialization Progress",
                    value="Click 'Initialize System' to begin",
                    interactive=False
                )
                init_button = gr.Button("Initialize System", variant="primary")
                
                with gr.Accordion("New Scenario", open=True):
                    proficiency_selector = gr.Radio(
                        choices=["beginner", "intermediate", "advanced"],
                        label="Choose your proficiency level",
                        value="beginner"
                    )
                    start_scenario_button = gr.Button("Start New Scenario", variant="secondary")
                
                scenario_description = gr.Markdown("Initialize the system and start a scenario to begin practicing.")
                vocabulary_section = gr.Markdown("Vocabulary will appear here.")
                
                with gr.Accordion("About This Enhanced App", open=False):
                    gr.Markdown("""
                    This enhanced Hindi practice app uses LangGraph to create:
                    
                    1. **Dynamic Scenarios**: Each practice session features a unique scenario tailored to your proficiency level
                    2. **Natural Conversation**: Practice with a Hindi-speaking AI tutor in realistic situations
                    3. **Performance Evaluation**: Get detailed feedback on your conversation skills
                    
                    How to use:
                    1. Click "Initialize System" to set up the AI
                    2. Select your proficiency level
                    3. Start a new scenario
                    4. Practice the conversation, trying to accomplish the goals
                    5. Get feedback on your performance
                    
                    Try to use Hindi words and phrases as much as possible!
                    """)
                
                # Add debug information section
                with gr.Accordion("Debug Information", open=False):
                    debug_info = gr.Textbox(
                        label="Debug Output",
                        value="Debug information will appear here",
                        interactive=False
                    )
                    debug_refresh = gr.Button("Refresh Debug Info")
            
            with gr.Column(scale=2):
                chatbot = gr.Chatbot(
                    height=500,
                    show_label=False,
                    elem_id="hindi_langraph_chatbot"
                )
                
                with gr.Row():
                    user_input = gr.Textbox(
                        placeholder="Type your response here...",
                        show_label=False,
                        scale=8
                    )
                    send_button = gr.Button("Send", scale=1)
                    mic_button = gr.Button("🎤", size="sm", scale=1, visible=ENABLE_SPEECH)
                
                with gr.Row(visible=ENABLE_SPEECH):
                    speak_button = gr.Button("🔊 Hear Tutor")
                    tutor_audio = gr.Audio(label="", autoplay=True)
            
            with gr.Column(visible=False) as audio_popup:
                gr.Markdown("### Speak Hindi")
                audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Record your voice")
                submit_audio = gr.Button("Submit Recording")
        
        # Create progress updater function
        def update_progress(progress_value, desc=""):
            percentage = int(progress_value * 100)
            return f"{desc} ({percentage}%)"
        
        # Modified initialization function
        def init_system():
            # Reset status
            yield update_progress(0, "Starting initialization..."), "System status: Initializing..."
            
            try:
                # Call the initialize method with progress updates
                result = app.initialize_system(
                    progress=lambda value, desc: gr.update(value=update_progress(value, desc))
                )
                
                # Check if initialization succeeded
                if app.model_loaded:
                    yield update_progress(1, "Initialization complete!"), "System status: Ready"
                else:
                    yield update_progress(1, f"Initialization failed: {result}"), f"System status: Error - {result}"
            except Exception as e:
                error_msg = f"Error during initialization: {str(e)}"
                yield update_progress(1, f"Error: {error_msg}"), f"System status: Error - {error_msg}"
        
        # Debug info refresh function
        def refresh_debug_info():
            if not hasattr(app, 'model_loaded'):
                return "App not initialized yet"
                
            debug_text = f"Model loaded: {app.model_loaded}\n"
            if hasattr(app, 'hindi_model') and app.hindi_model:
                debug_text += f"Hindi model loaded: Yes\n"
            else:
                debug_text += f"Hindi model loaded: No\n"
                
            if hasattr(app, 'generator_model') and app.generator_model:
                debug_text += f"Generator model loaded: Yes\n"
            else:
                debug_text += f"Generator model loaded: No\n"
                
            if hasattr(app, 'evaluator_model') and app.evaluator_model:
                debug_text += f"Evaluator model loaded: Yes\n"
            else:
                debug_text += f"Evaluator model loaded: No\n"
                
            if hasattr(app, 'conversation_graph') and app.conversation_graph:
                debug_text += f"Conversation graph built: Yes\n"
            else:
                debug_text += f"Conversation graph built: No\n"
                
            if hasattr(app, 'state') and app.state:
                debug_text += f"Current state: {app.state.get('current_stage', 'Not set')}\n"
                
            # Add GPU memory info
            if torch.cuda.is_available():
                debug_text += f"GPU memory usage: {torch.cuda.memory_allocated()/1024**3:.2f}GB / {torch.cuda.get_device_properties(0).total_memory/1024**3:.2f}GB\n"
                
            return debug_text
        
        # Modified check_start_scenario function with detailed error reporting
        def check_start_scenario(proficiency):
            try:
                if not app.model_loaded:
                    return [(None, "Please initialize the system first")], "System not initialized", "Please initialize the system first", "Error: System not initialized"
                
                print(f"Starting new scenario with proficiency level: {proficiency}")
                
                try:
                    result = app.start_new_scenario(proficiency)
                    print(f"Scenario generation completed")
                    return result[0], result[1], result[2], refresh_debug_info()
                except Exception as e:
                    import traceback
                    error_details = traceback.format_exc()
                    print(f"Error in start_new_scenario: {str(e)}")
                    print(f"Error details: {error_details}")
                    
                    error_msg = f"Error generating scenario: {str(e)}"
                    return [(None, error_msg)], error_msg, f"Error details: {str(e)}", error_details
                    
            except Exception as e:
                import traceback
                error_details = traceback.format_exc()
                print(f"Exception in check_start_scenario: {str(e)}")
                print(f"Error details: {error_details}")
                return [(None, f"System error: {str(e)}")], f"Error: {str(e)}", "Please try again after restarting", error_details
        
        # Set up event handlers with updated progress handling
        init_button.click(
            init_system,
            outputs=[init_progress, status_msg]
        )
        
        # Connect debug refresh button
        debug_refresh.click(
            refresh_debug_info,
            outputs=[debug_info]
        )
        
        # Connect scenario button with more outputs for debugging
        start_scenario_button.click(
            check_start_scenario,
            inputs=[proficiency_selector],
            outputs=[chatbot, scenario_description, vocabulary_section, debug_info]
        )
        
        user_input.submit(
            app.send_message,
            inputs=[user_input, chatbot],
            outputs=[chatbot]
        ).then(
            lambda: "",  # Clear input after sending
            outputs=[user_input]
        )
        
        send_button.click(
            app.send_message,
            inputs=[user_input, chatbot],
            outputs=[chatbot]
        ).then(
            lambda: "",  # Clear input after sending
            outputs=[user_input]
        )
        
        if ENABLE_SPEECH:
            mic_button.click(
                lambda: gr.update(visible=True),
                outputs=[audio_popup]
            )
            
            submit_audio.click(
                app.transcribe_audio,
                inputs=[audio_input],
                outputs=[user_input]
            ).then(
                lambda: gr.update(visible=False),
                outputs=[audio_popup]
            )
            
            speak_button.click(
                app.get_speech_from_text,
                outputs=[tutor_audio]
            )
        
    return interface, app
print("Gradio interface defined!")

# Cell 10: Data Preparation Helper (optional)
# This cell helps prepare sample data if you don't have the processed_hindi_dialogues.json file

def create_sample_dialogue_data():
    """Create a sample dataset if the original data is not available"""
    sample_data = []
    
    # Sample 1: Restaurant scenario
    restaurant_sample = {
        "page_content": "A conversation in a restaurant between a waiter and customer.",
        "metadata": {
            "scene_description": "A busy restaurant in Delhi during lunchtime.",
            "context_tags": ["restaurant", "food", "dining"],
            "emotion_tags": ["neutral", "polite"],
            "roman_dialogue": "Waiter: Namaste ji, kya khaayenge aap? Customer: Menu dikha dijiye. Waiter: Ji, yeh lijiye menu.",
            "devanagari_dialogue": "वेटर: नमस्ते जी, क्या खाएंगे आप? कस्टमर: मेनू दिखा दीजिए। वेटर: जी, यह लीजिए मेनू।",
            "dialogue_turns": [
                {
                    "speaker": "Waiter",
                    "text_roman": "Namaste ji, kya khaayenge aap?",
                    "text_devanagari": "नमस्ते जी, क्या खाएंगे आप?"
                },
                {
                    "speaker": "Customer",
                    "text_roman": "Menu dikha dijiye.",
                    "text_devanagari": "मेनू दिखा दीजिए।"
                },
                {
                    "speaker": "Waiter",
                    "text_roman": "Ji, yeh lijiye menu.",
                    "text_devanagari": "जी, यह लीजिए मेनू।"
                }
            ]
        }
    }
    
    # Sample 2: Market scenario
    market_sample = {
        "page_content": "A conversation in a market between a shopkeeper and customer.",
        "metadata": {
            "scene_description": "A busy vegetable market in Mumbai.",
            "context_tags": ["market", "shopping", "bazaar"],
            "emotion_tags": ["neutral", "negotiating"],
            "roman_dialogue": "Shopkeeper: Aaiye ji, kya chahiye? Customer: Tamatar kitne ka hai? Shopkeeper: Sau rupaye kilo, ekdum taza hai.",
            "devanagari_dialogue": "दुकानदार: आइए जी, क्या चाहिए? ग्राहक: टमाटर कितने का है? दुकानदार: सौ रुपये किलो, एकदम ताज़ा है।",
            "dialogue_turns": [
                {
                    "speaker": "Shopkeeper",
                    "text_roman": "Aaiye ji, kya chahiye?",
                    "text_devanagari": "आइए जी, क्या चाहिए?"
                },
                {
                    "speaker": "Customer",
                    "text_roman": "Tamatar kitne ka hai?",
                    "text_devanagari": "टमाटर कितने का है?"
                },
                {
                    "speaker": "Shopkeeper",
                    "text_roman": "Sau rupaye kilo, ekdum taza hai.",
                    "text_devanagari": "सौ रुपये किलो, एकदम ताज़ा है।"
                }
            ]
        }
    }
    
    # Sample 3: Hotel scenario
    hotel_sample = {
        "page_content": "A conversation at a hotel reception.",
        "metadata": {
            "scene_description": "Check-in at a mid-range hotel in Jaipur.",
            "context_tags": ["hotel", "travel", "accommodation"],
            "emotion_tags": ["formal", "polite"],
            "roman_dialogue": "Receptionist: Namaste ji, swagat hai aapka. Guest: Meri booking hai, Singh ke naam se. Receptionist: Ji, ek minute dekhta hoon.",
            "devanagari_dialogue": "रिसेप्शनिस्ट: नमस्ते जी, स्वागत है आपका। अतिथि: मेरी बुकिंग है, सिंह के नाम से। रिसेप्शनिस्ट: जी, एक मिनट देखता हूँ।",
            "dialogue_turns": [
                {
                    "speaker": "Receptionist",
                    "text_roman": "Namaste ji, swagat hai aapka.",
                    "text_devanagari": "नमस्ते जी, स्वागत है आपका।"
                },
                {
                    "speaker": "Guest",
                    "text_roman": "Meri booking hai, Singh ke naam se.",
                    "text_devanagari": "मेरी बुकिंग है, सिंह के नाम से।"
                },
                {
                    "speaker": "Receptionist",
                    "text_roman": "Ji, ek minute dekhta hoon.",
                    "text_devanagari": "जी, एक मिनट देखता हूँ।"
                }
            ]
        }
    }
    
    sample_data.extend([restaurant_sample, market_sample, hotel_sample])
    
    # Save to file
    if not os.path.exists(os.path.dirname(RAG_DATA_PATH)):
        os.makedirs(os.path.dirname(RAG_DATA_PATH))
        
    with open(RAG_DATA_PATH, 'w', encoding='utf-8') as f:
        json.dump(sample_data, f, ensure_ascii=False, indent=2)
    
    print(f"Sample dialogue data created at {RAG_DATA_PATH}")
    return sample_data

print("Data preparation helper defined!")

# Cell 11: Launch the application
# This cell starts the app and provides a public link

def main():
    """Main function to launch the application with memory optimizations"""
    # Set up aggressive memory management
    import os
    import gc
    import torch
    
    # Configure PyTorch for better memory management
    torch.cuda.empty_cache()
    if torch.cuda.is_available():
        # Set to release memory aggressively
        os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
        
    # First, check if we need to create sample data
    if not os.path.exists(RAG_DATA_PATH):
        print("Creating sample dialogue data...")
        create_sample_dialogue_data()
    
    print("Initializing Enhanced Hindi Language Learning System with LangGraph...")
    interface, app = create_enhanced_gradio_interface()
    
    # # Launch with share=True to create a public link
    # # Lower the max_threads for less memory usage
    # interface.launch(
    #     share=True,
    #     server_name="0.0.0.0",
    #     server_port=7861,
    #     max_threads=20  # Reduce thread count to save memory
    # )

# # Run the application
# if __name__ == "__main__":
#     main()

In [None]:
# WITH SPEECH FEATURES

In [None]:
def manual_initialize_fixed():
    app = EnhancedHindiPracticeApp()
    
    print("Starting fixed manual initialization with optimized models...")
    clear_gpu_memory()
    
    # Define the Whisper model size - this was missing
    WHISPER_MODEL_SIZE = "base"  # Options: tiny, base, small, medium, large
    
    # Initialize RAG with proper file path
    try:
        print("Initializing RAG system with preprocessed data...")
        app.rag_system = HindiLearningRAG(dummy_mode=False)
        
        # Try to load or create the vector store
        if not os.path.exists(RAG_DATA_PATH):
            print(f"Creating sample dialogue data at {RAG_DATA_PATH}...")
            create_sample_dialogue_data()
        
        app.rag_system.load_documents()
        if not app.rag_system.vector_store:
            app.rag_system.create_vector_store()
            
        print("RAG system initialized with vector store")
    except Exception as e:
        print(f"RAG initialization error: {str(e)}")
        print("Falling back to dummy RAG mode")
        app.rag_system = HindiLearningRAG(dummy_mode=True)
    
    # Load Hindi model with OPTIMIZED parameters for better generation
    print("Loading Hindi model with enhanced parameters...")
    if USE_4BIT:
        # Use improved quantization config with higher compute precision
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.float16,  # Use float16 for better precision
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4"
        )
        
        app.hindi_model = AutoModelForCausalLM.from_pretrained(
            HINDI_MODEL_NAME,
            quantization_config=quantization_config,
            device_map="auto",
            torch_dtype=torch.float16,
            # Add additional loading parameters for higher quality
            low_cpu_mem_usage=True
        )
    else:
        # Fallback to 8-bit precision with optimized parameters
        app.hindi_model = AutoModelForCausalLM.from_pretrained(
            HINDI_MODEL_NAME,
            load_in_8bit=True,
            device_map="auto",
            torch_dtype=torch.float16
        )
    
    app.hindi_tokenizer = AutoTokenizer.from_pretrained(HINDI_MODEL_NAME)
    if app.hindi_tokenizer.pad_token is None:
        app.hindi_tokenizer.pad_token = app.hindi_tokenizer.eos_token
    
    print(f"Hindi model loaded. GPU memory: {torch.cuda.memory_allocated()/1024**3:.2f}GB")
    
    # Use Hindi model for generator and evaluator
    print("Configuring generator and evaluator...")
    app.generator_model = app.hindi_model
    app.generator_tokenizer = app.hindi_tokenizer
    app.evaluator_model = app.hindi_model
    app.evaluator_tokenizer = app.hindi_tokenizer
    
    # Create proper wrapper with enhanced generation parameters
    app.generator_llm = TemplateModelWrapper(
        app.generator_model,
        app.generator_tokenizer,
        model_name="generator"
    )
    
    app.evaluator_llm = TemplateModelWrapper(
        app.evaluator_model,
        app.evaluator_tokenizer,
        model_name="evaluator"
    )
    
    # Enable speech features
    print("Setting up speech recognition capabilities...")
    try:
        # Initialize speech recognizer
        import whisper
        app.speech_model = whisper.load_model(WHISPER_MODEL_SIZE)
        app.speech_enabled = True
        print(f"Speech recognition enabled with Whisper {WHISPER_MODEL_SIZE} model")
    except Exception as e:
        print(f"Error initializing speech model: {e}")
        app.speech_enabled = False
    
    # Build the conversation graph (fix for the message validation)
    print("Building fixed conversation graph with enhanced generation...")
    
    # Create a custom version of the conversation flow that doesn't rely on complex state
    class FixedLangGraphAgents:
        def __init__(self, hindi_model, hindi_tokenizer, rag_system):
            self.hindi_model = hindi_model
            self.hindi_tokenizer = hindi_tokenizer
            self.rag_system = rag_system
            
        # Replace the simple scenario generation with this more dynamic version
        def generate_scenario(self, proficiency_level):
            """Generate a more dynamic scenario with higher temperature for creativity"""
            print(f"Generating dynamic scenario for {proficiency_level}...")
            
            # Try to generate a creative scenario using the Hindi model
            system_prompt = f"""Create a realistic Hindi conversation scenario for a {proficiency_level} language learner.
            
        Include:
        1. scenario_type: Choose from restaurant, market, transportation, hotel, zoo, temple, office, school, etc.
        2. scenario_title: A descriptive title
        3. scenario_description: Brief setting description (2-3 sentences)
        4. character_role: Role the AI plays (waiter, shopkeeper, etc.)
        5. user_role: Role the learner plays (customer, traveler, etc.)
        6. goals: 3-5 conversation goals for the learner
        7. key_vocabulary: 5-8 Hindi words/phrases with Roman and Devanagari scripts
        8. first_line: An opening dialogue line in both Roman and Devanagari

    Format as JSON exactly like this:
    {{
    "scenario_type": "market",
    "scenario_title": "Bargaining at a Street Market", 
    "scenario_description": "You are shopping at a busy street market in Delhi...",
    "character_role": "shopkeeper",
    "user_role": "customer",
    "goals": ["Ask about an item", "Negotiate the price", "Make a purchase"],
    "key_vocabulary": [
        {{"roman": "kitna", "devanagari": "कितना", "meaning": "how much"}},
        {{"roman": "sasta", "devanagari": "सस्ता", "meaning": "cheap"}}
    ],
    "first_line": {{
        "roman": "Kya chahiye aapko?",
        "devanagari": "क्या चाहिए आपको?"
    }}
    }}"""
        
            messages_formatted = [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"Create a unique {proficiency_level} level Hindi conversation scenario that's different from typical restaurant scenarios. Make it creative, educational and engaging. Include interesting and authentic cultural elements."}
            ]
            
            try:
                # Generate with higher temperature for more creativity
                input_ids = self.hindi_tokenizer.apply_chat_template(
                    messages_formatted,
                    add_generation_prompt=True,
                    return_tensors="pt"
                ).to(self.hindi_model.device)
                
                outputs = self.hindi_model.generate(
                    input_ids,
                    max_new_tokens=1000,  # Longer for better JSON generation
                    do_sample=True,
                    temperature=0.8,  # Higher for more creativity
                    top_p=0.92,  # Slightly higher top_p
                    repetition_penalty=1.15,
                    eos_token_id=self.hindi_tokenizer.eos_token_id,
                )
                
                generated_text = self.hindi_tokenizer.decode(
                    outputs[0][input_ids.shape[-1]:], 
                    skip_special_tokens=True
                )
            
                # Try to extract JSON
                import re
                import json
                
                # Find JSON-like structure
                json_pattern = r'```json\s*([\s\S]*?)\s*```|(\{[\s\S]*\})'
                match = re.search(json_pattern, generated_text)
                
                if match:
                    json_str = match.group(1) or match.group(2)
                    scenario_result = json.loads(json_str)
                    
                    # Add proficiency level for reference
                    scenario_result["proficiency_level"] = proficiency_level
                    
                    # Validate required fields
                    required_fields = [
                        "scenario_type", "scenario_title", "scenario_description", 
                        "character_role", "user_role", "goals", "key_vocabulary", "first_line"
                    ]
                    
                    for field in required_fields:
                        if field not in scenario_result:
                            print(f"Missing field {field} in generated scenario")
                            raise ValueError(f"Missing field: {field}")
                            
                    return scenario_result
                else:
                    print("Failed to extract JSON from generated scenario")
                    raise ValueError("Could not extract JSON from response")
                    
            except Exception as e:
                print(f"Error generating dynamic scenario: {e}")
                print("Falling back to predefined scenario")
                # Use fallback with some variations
                fallback = get_fallback_scenario(proficiency_level)
                fallback["proficiency_level"] = proficiency_level
                return fallback
            
        def process_message(self, user_input, scenario, history=None):
            """Process user input with enhanced response generation"""
            if history is None:
                history = []
                
            print(f"Processing message: {user_input}")
            
            # Create system prompt for Hindi model
            character_role = scenario["character_role"]
            scenario_type = scenario["scenario_type"]
            
            # Check if user is using Hindi
            user_used_hindi = is_hindi(user_input)
            
            system_prompt = f"""
You are a Hindi language tutor roleplaying as a {character_role} in a {scenario_type} scenario. 
    
IMPORTANT:
1. You MUST respond in this exact format:
   - First line: Clear, natural Hindi in Roman script (1-2 sentences)
   - Second line: The EXACT SAME text in Devanagari script
2. Keep responses SHORT, NATURAL and AUTHENTIC to how a real {character_role} would speak.
3. Stay IN CHARACTER as the {character_role}. The human is playing the {scenario["user_role"]}.
4. NEVER correct the user directly - stay in the roleplay.
5. Use vocabulary appropriate for a {scenario_type} setting.
6. Your responses should be grammatically correct and culturally authentic.

Example of correct format:
Aap kya khaana pasand karenge?
आप क्या खाना पसंद करेंगे?

DO NOT add "Roman Hindi:" or "Devanagari:" labels. Just write the two lines directly.


CONTENT GUIDELINES:
1. Keep responses SHORT and PRACTICAL.
2. Use authentic, everyday Hindi appropriate for a {scenario_type} setting.
3. Match the user's proficiency level with appropriate vocabulary and complexity.
4. Use REALISTIC Hindi that would be spoken in a real {scenario_type}.
5. If the user's message is unclear, respond naturally as a native speaker would.
"""
            
            if user_used_hindi:
                system_prompt += "\nNOTE: The learner is responding in Hindi, which is excellent! Acknowledge their effort in your response while staying in character."
            
            # Get relevant RAG examples if available
            if self.rag_system and not self.rag_system.dummy_mode:
                rag_examples = self.rag_system.retrieve_dialogue_examples(
                    query=user_input,
                    top_k=2,
                    context_tags=[scenario_type]
                )
                
                if rag_examples:
                    rag_content = "\n\nREFERENCE EXAMPLES (use these for authentic Hindi expressions):\n"
                    for i, example in enumerate(rag_examples[:2]):
                        turns = example.get("dialogue_turns", [])
                        if turns:
                            rag_content += f"Example {i+1}:\n"
                            for j, turn in enumerate(turns[:3]):
                                speaker = turn.get("speaker", "")
                                text = turn.get("text_roman", "")
                                rag_content += f"{speaker}: {text}\n"
                            rag_content += "\n"
                    system_prompt += rag_content
            
            # Create a direct prompt for the Hindi model with context from history
            messages_formatted = [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"The {scenario['user_role']} says: \"{user_input}\"\n\nRespond ONLY as the {character_role} in simple Hindi (both Roman and Devanagari). NEVER respond as the {scenario['user_role']}. Keep your response brief, practical and authentic."}
            ]
            
            try:
                # Generate directly with improved parameters
                input_ids = self.hindi_tokenizer.apply_chat_template(
                    messages_formatted,
                    add_generation_prompt=True,
                    return_tensors="pt"
                ).to(self.hindi_model.device)
                
                outputs = self.hindi_model.generate(
                    input_ids,
                    max_new_tokens=100,
                    do_sample=True,
                    temperature=0.3,  # Slightly higher for better quality
                    top_p=0.9,        # Add top_p parameter
                    repetition_penalty=1.2,
                    eos_token_id=self.hindi_tokenizer.eos_token_id,
                )
                
                response = self.hindi_tokenizer.decode(
                    outputs[0][input_ids.shape[-1]:], 
                    skip_special_tokens=True
                )
                
                # Clean the response
                response = clean_response(response)
                return response
                
            except Exception as e:
                print(f"Error generating response: {e}")
                return "Sorry, I couldn't generate a proper Hindi response.\nक्षमा करें, मैं उचित हिंदी प्रतिक्रिया नहीं दे सका।"
        
        def generate_evaluation(self, dialogue_history, scenario):
            """Generate an evaluation of the conversation with enhanced quality"""
            # Create a simple evaluation
            proficiency_level = scenario.get("proficiency_level", "beginner")
            scenario_type = scenario.get("scenario_type", "restaurant")
            goals = scenario.get("goals", [])
            
            # Format the conversation history for evaluation
            conversation_text = "Conversation transcript:\n"
            for turn in dialogue_history:
                role = "Tutor" if turn["role"] == "assistant" else "Learner"
                conversation_text += f"{role}: {turn['content']}\n\n"
            
            # Create system prompt for evaluation
            system_prompt = f"""You are a Hindi language learning evaluator. Analyze this conversation between a learner at {proficiency_level} level and an AI tutor in a {scenario_type} scenario.

Rate on a scale of 1-5:
1. Hindi Usage: How much Hindi did the learner use?
2. Cultural Appropriateness: Did responses fit an Indian context?
3. Goal Achievement: The goals were: {', '.join(goals)}. How well were they accomplished?
4. Overall Rating: Overall performance

Provide detailed and constructive feedback. Include specific examples from the conversation to illustrate your points.
Format your response with clear headings and bullet points where appropriate.
Always include encouraging comments to motivate the learner."""

            # Create direct prompt
            messages_formatted = [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": conversation_text}
            ]
            
            try:
                # Generate with improved parameters 
                input_ids = self.hindi_tokenizer.apply_chat_template(
                    messages_formatted,
                    add_generation_prompt=True,
                    return_tensors="pt"
                ).to(self.hindi_model.device)
                
                outputs = self.hindi_model.generate(
                    input_ids,
                    max_new_tokens=400,  # Longer for better evaluation
                    do_sample=True,
                    temperature=0.3,
                    top_p=0.92,
                    repetition_penalty=1.1,
                    eos_token_id=self.hindi_tokenizer.eos_token_id,
                )
                
                evaluation_text = self.hindi_tokenizer.decode(
                    outputs[0][input_ids.shape[-1]:], 
                    skip_special_tokens=True
                )
                
                # Format as markdown
                formatted_evaluation = f"""# Hindi Conversation Evaluation

{evaluation_text}

Would you like to try another scenario?
"""
                return formatted_evaluation
                
            except Exception as e:
                print(f"Error generating evaluation: {e}")
                return """# Hindi Conversation Evaluation

Hindi Usage: 3/5
Cultural Appropriateness: 3/5
Goal Achievement: 3/5
Overall Rating: 3/5

You made a good effort using some Hindi phrases. Keep practicing!

Would you like to try another scenario?"""
                
        def generate_pronunciation_feedback(self, spoken_text, expected_text, proficiency_level="beginner"):
            """Generate pronunciation feedback comparing spoken text to expected text"""
            system_prompt = f"""You are a Hindi language pronunciation coach. Compare the user's spoken Hindi with the expected Hindi and provide helpful, encouraging feedback.

User Level: {proficiency_level}

Your task is to:
1. Identify correctly pronounced words/phrases
2. Point out pronunciation issues in a constructive way
3. Provide specific tips to improve
4. Always be encouraging and positive
5. Rate pronunciation accuracy on a scale of 1-5

Format your response like this:
- Pronunciation Score: X/5
- What You Did Well: [specific words/phrases pronounced correctly]
- Areas for Improvement: [specific words/phrases to work on]
- Helpful Tips: [1-2 specific pronunciation tips]
- Encouragement: [positive, motivational message]
"""

            user_prompt = f"""Expected Hindi: {expected_text}

User's Spoken Hindi: {spoken_text}

Please analyze the pronunciation differences and provide helpful feedback."""

            messages_formatted = [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ]

            try:
                input_ids = self.hindi_tokenizer.apply_chat_template(
                    messages_formatted,
                    add_generation_prompt=True,
                    return_tensors="pt"
                ).to(self.hindi_model.device)

                outputs = self.hindi_model.generate(
                    input_ids,
                    max_new_tokens=300,
                    do_sample=True,
                    temperature=0.7,
                    repetition_penalty=1.1,
                    eos_token_id=self.hindi_tokenizer.eos_token_id,
                )

                feedback = self.hindi_tokenizer.decode(
                    outputs[0][input_ids.shape[-1]:], 
                    skip_special_tokens=True
                )
                return feedback
            except Exception as e:
                print(f"Error generating pronunciation feedback: {e}")
                return f"""Pronunciation Score: 3/5

What You Did Well:
- You attempted to speak in Hindi, which is great progress!

Areas for Improvement:
- Some sounds might need more practice

Helpful Tips:
- Listen to native speakers and try to imitate the sounds
- Practice daily, even for just a few minutes

Encouragement:
- Learning pronunciation takes time. Keep practicing and you'll improve!

Error note: {str(e)}"""
    
    # Create fixed agents
    app.fixed_agents = FixedLangGraphAgents(
        app.hindi_model,
        app.hindi_tokenizer,
        app.rag_system
    )
    
    # Adding speech-related methods to the app
    app.get_latest_tutor_message = lambda: (app.dialogue_history[-1]["content"] 
                                          if hasattr(app, 'dialogue_history') and app.dialogue_history and app.dialogue_history[-1]["role"] == "assistant" 
                                          else "")
    
    # Fix the text_to_speech function to handle the self argument properly
    def app_transcribe_audio(audio_path):
        if not app.speech_enabled or not hasattr(app, 'speech_model') or app.speech_model is None:
            return "Speech recognition is disabled"
        try:
            result = app.speech_model.transcribe(audio_path)
            return result["text"]
        except Exception as e:
            print(f"Error transcribing audio: {e}")
            return f"Error transcribing audio: {str(e)}"
    
    def app_text_to_speech():
        """Convert text to speech using gTTS"""
        if not app.speech_enabled:
            return None
        
        # Get the last AI message
        text = app.get_latest_tutor_message()
        if not text:
            return None
            
        # Extract just Hindi text if it contains both Roman and Devanagari
        lines = text.strip().split("\n")
        if len(lines) >= 2:
            # Use both lines for better speech synthesis
            text_for_tts = f"{lines[0]} {lines[1]}"
        else:
            text_for_tts = text
        
        try:
            from gtts import gTTS
            output_path = "/kaggle/working/tutor_speak.mp3"
            tts = gTTS(text=text_for_tts, lang="hi", slow=False)
            tts.save(output_path)
            return output_path
        except Exception as e:
            print(f"Error generating speech: {e}")
            return None
    
    def app_provide_pronunciation_feedback(audio_path, expected_text=None):
        """Transcribe audio and provide pronunciation feedback"""
        if not app.speech_enabled or not hasattr(app, 'speech_model') or app.speech_model is None:
            return "Speech recognition is disabled", "", ""
        
        # If no expected text provided, use the last tutor message
        if expected_text is None:
            expected_text = app.get_latest_tutor_message()
            # Just take the first line (Roman transliteration)
            expected_text = expected_text.split("\n")[0] if expected_text else ""
        
        try:
            # Transcribe the audio
            transcribed_text = app_transcribe_audio(audio_path)
            
            # Generate feedback using the fixed agents
            feedback = app.fixed_agents.generate_pronunciation_feedback(
                transcribed_text,
                expected_text,
                app.current_scenario.get("proficiency_level", "beginner") if hasattr(app, "current_scenario") else "beginner"
            )
            
            return feedback, transcribed_text, expected_text
        except Exception as e:
            print(f"Error providing pronunciation feedback: {e}")
            return f"Error analyzing pronunciation: {str(e)}", "", expected_text
    
    # Attach the fixed methods to the app
    app.transcribe_audio = app_transcribe_audio
    app.text_to_speech = app_text_to_speech
    app.provide_pronunciation_feedback = app_provide_pronunciation_feedback
    
    # Mark as loaded but with fixed approach
    app.model_loaded = True
    app.using_fixed_approach = True
    
    print(f"Fixed manual initialization with speech features complete! GPU memory: {torch.cuda.memory_allocated()/1024**3:.2f}GB")
    
    return app

In [None]:
# Run the fixed manual initialization
app = manual_initialize_fixed()

# Create interface with the manually initialized app
interface = gr.Blocks(title="Enhanced Hindi Conversation Practice", theme=gr.themes.Soft())

# Define the interface with the fixed functions and speech features
with interface:
    gr.Markdown("# 🇮🇳 Enhanced Hindi Conversation Practice (Fixed Mode)")
    gr.Markdown("Practice Hindi in dynamic roleplay scenarios with AI tutoring and speech recognition")
    
    with gr.Row():
        with gr.Column(scale=1):
            status_msg = gr.Markdown("System status: Fixed initialization complete")
            
            with gr.Accordion("New Scenario", open=True):
                proficiency_selector = gr.Radio(
                    choices=["beginner", "intermediate", "advanced"],
                    label="Choose your proficiency level",
                    value="beginner"
                )
                start_scenario_button = gr.Button("Start New Scenario", variant="primary")
            
            scenario_description = gr.Markdown("System initialized. Start a scenario to begin.")
            vocabulary_section = gr.Markdown("Vocabulary will appear here.")
            
            with gr.Accordion("Debug Information", open=False):
                debug_info = gr.Textbox(
                    label="Debug Output",
                    value="Debug information will appear here",
                    interactive=False,
                    lines=10
                )
                debug_refresh = gr.Button("Refresh Debug Info")
        
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(
                height=500,
                show_label=False,
                elem_id="hindi_langraph_chatbot"
            )
            
            with gr.Row():
                user_input = gr.Textbox(
                    placeholder="Type your response here...",
                    show_label=False,
                    scale=7
                )
                send_button = gr.Button("Send", scale=1)
                mic_button = gr.Button("🎤", size="sm", scale=1)
            
            with gr.Row():
                speak_button = gr.Button("🔊 Hear Tutor")
                tutor_audio = gr.Audio(label="", autoplay=True)
            
            # Add pronunciation practice section
            with gr.Accordion("Pronunciation Practice", open=True):
                gr.Markdown("### Record yourself saying the tutor's phrase to get feedback")
                pronunciation_audio = gr.Audio(
                    sources=["microphone"], 
                    type="filepath", 
                    label="Record your voice"
                )
                feedback_button = gr.Button("Get Pronunciation Feedback", variant="primary")
                
                with gr.Row():
                    with gr.Column(scale=1):
                        transcribed_text = gr.Textbox(
                            label="What you said (transcribed)",
                            placeholder="Your speech will appear here after recording",
                            interactive=False
                        )
                    with gr.Column(scale=1):
                        expected_text = gr.Textbox(
                            label="What you should say",
                            placeholder="The expected Hindi phrase",
                            interactive=False
                        )
                
                pronunciation_feedback = gr.Markdown("Record your voice and click 'Get Pronunciation Feedback'")
    
    # Audio popup for recording via mic button
    with gr.Column(visible=False) as audio_popup:
        gr.Markdown("### Speak Hindi")
        audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Record your voice")
        submit_audio = gr.Button("Submit Recording")
    
    # Connect event handlers
    def check_start_scenario(proficiency):
        try:
            print(f"Starting new scenario with proficiency level: {proficiency}")
            result = fixed_start_scenario(app, proficiency)
            return result[0], result[1], result[2], f"Generated scenario for {proficiency} level"
        except Exception as e:
            import traceback
            error_details = traceback.format_exc()
            print(f"Error in start_new_scenario: {str(e)}")
            print(f"Error details: {error_details}")
            return [(None, f"Error: {str(e)}")], "Error generating scenario", "Try again", error_details
    
    def refresh_debug():
        debug_text = "System Status:\n"
        debug_text += f"- Using fixed approach: {getattr(app, 'using_fixed_approach', False)}\n"
        debug_text += f"- Current scenario: {getattr(app, 'current_scenario', {}).get('scenario_title', 'None')}\n"
        debug_text += f"- RAG in dummy mode: {getattr(app.rag_system, 'dummy_mode', True)}\n"
        debug_text += f"- Dialog history length: {len(getattr(app, 'dialogue_history', []))}\n"
        debug_text += f"- Speech enabled: {getattr(app, 'speech_enabled', False)}\n"
        
        # Show GPU info
        if torch.cuda.is_available():
            debug_text += f"- GPU memory: {torch.cuda.memory_allocated()/1024**3:.2f}GB / {torch.cuda.get_device_properties(0).total_memory/1024**3:.2f}GB\n"
        
        return debug_text
    
    def get_pronunciation_feedback(audio_path):
        if not audio_path:
            return "Please record audio first", "", ""
        
        try:
            # Get the latest tutor message for expected text
            expected = app.get_latest_tutor_message().split('\n')[0] if app.get_latest_tutor_message() else ""
            
            # Call the app's pronunciation feedback function
            feedback, transcribed, expected = app.provide_pronunciation_feedback(audio_path, expected)
            return feedback, transcribed, expected
        except Exception as e:
            print(f"Error getting pronunciation feedback: {e}")
            return f"Error analyzing pronunciation: {str(e)}", "", ""
    
    def text_to_speech():
        """Generate speech from the tutor's last message"""
        try:
            return app.text_to_speech()
        except Exception as e:
            print(f"Error generating speech: {e}")
            return None
    
    def transcribe_audio_input(audio_path):
        """Transcribe audio from microphone"""
        if not audio_path:
            return ""
        
        try:
            return app.transcribe_audio(audio_path)
        except Exception as e:
            print(f"Error transcribing audio: {e}")
            return f"Error: {str(e)}"
    
    start_scenario_button.click(
        check_start_scenario,
        inputs=[proficiency_selector],
        outputs=[chatbot, scenario_description, vocabulary_section, debug_info]
    )
    
    debug_refresh.click(
        refresh_debug,
        outputs=[debug_info]
    )
    
    user_input.submit(
        lambda user_input, history: fixed_send_message(app, user_input, history),
        inputs=[user_input, chatbot],
        outputs=[chatbot]
    ).then(
        lambda: "",
        outputs=[user_input]
    )
    
    send_button.click(
        lambda user_input, history: fixed_send_message(app, user_input, history),
        inputs=[user_input, chatbot],
        outputs=[chatbot]
    ).then(
        lambda: "",
        outputs=[user_input]
    )
    
    # Speech and pronunciation features
    speak_button.click(
        text_to_speech,
        outputs=[tutor_audio]
    )
    
    feedback_button.click(
        get_pronunciation_feedback,
        inputs=[pronunciation_audio],
        outputs=[pronunciation_feedback, transcribed_text, expected_text]
    )
    
    # Microphone popup for message input
    mic_button.click(
        lambda: gr.update(visible=True),
        outputs=[audio_popup]
    )
    
    submit_audio.click(
        transcribe_audio_input,
        inputs=[audio_input],
        outputs=[user_input]
    ).then(
        lambda: gr.update(visible=False),
        outputs=[audio_popup]
    )

# Launch the interface
interface.launch(share=True, server_name="0.0.0.0", server_port=7862)

In [None]:
# interface.launch(share=True, server_name="0.0.0.0", server_port=7862)