In [3]:
!pip install speechrecognition gtts pyttsx3 transformers torch pandas openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl

   ---------------------------------------- 0/2 [et-xmlfile]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [ope

In [9]:
import speech_recognition as sr
from gtts import gTTS
import os
import time
import pandas as pd
from transformers import pipeline
# Optional: import for RAG if you start building it later
# from sentence_transformers import SentenceTransformer 
# import faiss

In [10]:
# Initialize Model and Global Variables
try:
    QA_MODEL = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
except Exception as e:
    print(f"ERROR: Could not load QA model. {e}")
    QA_MODEL = None

KNOWLEDGE_BASE_DF = None
LAST_RESPONSE_TEXT = ""

def load_knowledge_base(file_path="tutor_knowledge.xlsx"):
    """Loads the Excel sheet into a pandas DataFrame."""
    global KNOWLEDGE_BASE_DF
    try:
        KNOWLEDGE_BASE_DF = pd.read_excel(file_path, header=0)
        print(f"‚úÖ Loaded {len(KNOWLEDGE_BASE_DF)} entries from {file_path}.")
        return True
    except FileNotFoundError:
        print(f"‚ùå ERROR: Excel file not found at {file_path}")
        return False
    except Exception as e:
        print(f"‚ùå ERROR loading Excel: {e}")
        return False

Device set to use cpu


In [15]:
# --- 2. VOICE INPUT MODULE ---

def listen():
    """Captures audio from the microphone and converts it to text."""
    r = sr.Recognizer()
    with sr.Microphone() as source:
        print("\n\nüéôÔ∏è Listening...")
        r.adjust_for_ambient_noise(source, duration=0.5)
        try:
            audio = r.listen(source, timeout=5, phrase_time_limit=10)
        except sr.WaitTimeoutError:
            print("‚ùå No speech detected within time limit.")
            return ""

    try:
        # Use Google Speech Recognition for transcription (requires internet)
        text = r.recognize_google(audio)
        print(f"üó£Ô∏è You said: {text}")
        return text
    except sr.UnknownValueError:
        print("‚ùå Sorry, could not recognize speech.")
        return ""
    except sr.RequestError:
        print("‚ùå Could not request results from Google Speech Recognition service; check internet.")
        return ""

In [11]:
def braille_convert_and_print(last_text_response):
    """
    Finds the associated Braille output from the DF and simulates printing.
    """
    if KNOWLEDGE_BASE_DF is None:
        return speak("Knowledge base not loaded to find Braille content.")
        
    # 1. Look up the key term in the DataFrame
    # Find rows where the generated response matches the last spoken text
    match = KNOWLEDGE_BASE_DF[
        KNOWLEDGE_BASE_DF['Generated Audio Response (Text for TTS)'] == last_text_response
    ]
    
    if not match.empty and match.iloc[0]['Braille Conversion Required (Yes/No)'].lower() == 'yes':
        key_term = match.iloc[0]['Braille Output (Key Term/Formula/Code)']
        
        # --- Actual Braille Simulation ---
        print("-" * 50)
        print("üñ®Ô∏è TACTILE OUTPUT INITIATED üñ®Ô∏è")
        print(f"   Key Term for Braille: {key_term}")
        # NOTE: For real implementation, replace with pybrl/pyserial logic
        # For now, we simulate success:
        print(f"   Simulated Braille Embossing for: {key_term}")
        print("-" * 50)
        speak(f"The key information: {key_term} has been sent to the Braille embosser.")
        
    else:
        speak("I found no matching Braille content for the last answer.")

In [16]:
# --- 3. TEXT-TO-SPEECH (TTS) MODULE ---

def speak(text):
    """Converts text to speech using gTTS, saves it, and plays it."""
    global LAST_RESPONSE_TEXT
    
    if not text:
        return
        
    LAST_RESPONSE_TEXT = text # Store the response for command handling
    
    try:
        # 1. Convert
        tts = gTTS(text=text, lang='en')
        
        # 2. Save (use a timestamp for a unique file name)
        filename = f"response_{int(time.time())}.mp3"
        tts.save(filename)
        
        # 3. Play (using standard Windows 'start' command)
        # NOTE: If on Mac/Linux, replace 'start' with 'afplay' or 'mpg123'
        print(f"üí° AI Tutor: {text}")
        os.system(f"start {filename}") 

        # 4. Optional: Clean up the audio file after a short pause
        # time.sleep(len(text) / 10 + 2) # Wait proportional to length
        # os.remove(filename)
        
    except Exception as e:
        print(f"‚ùå Error during text-to-speech: {e}")

In [18]:
def answer_question(question):
    """
    1. Simulates retrieval by finding the closest entry in the DataFrame based on keyword/intent.
    2. Uses the retrieved context for the QA model to extract the final answer.
    """
    # Ensure the global QA_MODEL and KNOWLEDGE_BASE_DF are available
    global QA_MODEL 
    global KNOWLEDGE_BASE_DF

    if KNOWLEDGE_BASE_DF is None:
        return "Knowledge base not ready."
        
    question_lower = question.lower()
    
    # 1. RETRIEVAL LOGIC (Searching the DataFrame)
    
    # Check the 'NLP Intent / Target Topic' column first for broader matching
    matching_rows = KNOWLEDGE_BASE_DF[
        KNOWLEDGE_BASE_DF['NLP Intent / Target Topic'].str.lower().str.contains(question_lower, na=False)
    ]
    
    # Check the 'User Voice Input' column as a secondary check if no intent match is found
    if matching_rows.empty:
        matching_rows = KNOWLEDGE_BASE_DF[
            KNOWLEDGE_BASE_DF['User Voice Input (Engineering Topic)'].str.lower().str.contains(question_lower, na=False)
        ]
        
    if not matching_rows.empty:
        # Use the context associated with the best match (first one found)
        context = matching_rows.iloc[0]['Context/Source (Pre-stored or AI-Gen)']
        print(f"   [Retrieved Context from Excel for: {matching_rows.iloc[0]['Query ID']}]")
    else:
        # FALLBACK: Combine only the first 5 contexts to avoid truncation issues
        print("   [Using combined context fallback for general query]")
        # .head(5) is critical here to limit the context length
        context = " ".join(KNOWLEDGE_BASE_DF['Context/Source (Pre-stored or AI-Gen)'].astype(str).head(5).tolist())

    # 2. QUESTION ANSWERING (Calling the NLP Model)
    
    # Pass the retrieved context and the user's question to the QA model
    try:
        if QA_MODEL is None:
            return "QA model is not initialized."
            
        result = QA_MODEL({
            "context": context,
            "question": question
        })
        
        # Return the extracted answer
        return result['answer']
    
    except Exception as e:
        return f"An error occurred during QA processing: {e}"

In [20]:
if __name__ == "__main__":
    # 1. Load Data
    if not load_knowledge_base(): 
        print("\nFATAL ERROR: Cannot proceed without the knowledge base. Check file name/path.")
    else:
        # 2. Start Session
        print("ü§ñ AI Tutor is ready. Speak your engineering question or say 'quit'.")
        speak("Hello! I am your AI engineering tutor. How can I help you learn today?")
        
        while True:
            query = listen()
            
            if not query:
                continue
                
            query_lower = query.lower()
            
            # 3. Command Handling
            if query_lower in ["exit", "quit", "stop", "i am done"]:
                speak("Goodbye! Happy studying.")
                print("üëã Session ended.")
                break
            
            elif "read again" in query_lower or "say that again" in query_lower:
                if LAST_RESPONSE_TEXT:
                    speak(LAST_RESPONSE_TEXT)
                else:
                    speak("I don't have a previous response to repeat.")
                continue
                
            elif "braille" in query_lower or "tactile" in query_lower or "print" in query_lower:
                if LAST_RESPONSE_TEXT:
                    braille_convert_and_print(LAST_RESPONSE_TEXT)
                else:
                    speak("I need to answer a question before I can convert a response to Braille.")
                continue

            # 4. Question Answering
            response = answer_question(query)
            speak(response)

‚úÖ Loaded 100 entries from tutor_knowledge.xlsx.
ü§ñ AI Tutor is ready. Speak your engineering question or say 'quit'.
üí° AI Tutor: Hello! I am your AI engineering tutor. How can I help you learn today?


üéôÔ∏è Listening...
‚ùå Sorry, could not recognize speech.


üéôÔ∏è Listening...
üó£Ô∏è You said: what is Doppler
   [Using combined context fallback for general query]




üí° AI Tutor: System Help


üéôÔ∏è Listening...
‚ùå Sorry, could not recognize speech.


üéôÔ∏è Listening...
üó£Ô∏è You said: what is ideal gas
   [Using combined context fallback for general query]




üí° AI Tutor: Electronics Module


üéôÔ∏è Listening...
‚ùå Sorry, could not recognize speech.


üéôÔ∏è Listening...
‚ùå Sorry, could not recognize speech.


üéôÔ∏è Listening...
üó£Ô∏è You said: word for and
   [Using combined context fallback for general query]




üí° AI Tutor: System Help


üéôÔ∏è Listening...
üó£Ô∏è You said: what are the steps of water cycle
   [Using combined context fallback for general query]




üí° AI Tutor: Math Module AI-Generated Summary Pre-stored: System Help


üéôÔ∏è Listening...
‚ùå Sorry, could not recognize speech.


üéôÔ∏è Listening...
üó£Ô∏è You said: quit
üí° AI Tutor: Goodbye! Happy studying.
üëã Session ended.


In [2]:
import speech_recognition as sr
from gtts import gTTS
import os
import time
import pandas as pd
from transformers import pipeline
import whisper # Added for local STT
import numpy as np # Added for robust handling

# --- GLOBAL CONFIGURATION AND KNOWLEDGE ---
# List of technical keywords to guide the RAG system to the correct module.
TECHNICAL_KEYWORDS = {
    'ohm': 'NLP Intent / Target Topic',
    'voltage': 'NLP Intent / Target Topic',
    'pn junction': 'NLP Intent / Target Topic',
    'semiconductor': 'NLP Intent / Target Topic',
    'linear equation': 'NLP Intent / Target Topic',
    'matrix': 'NLP Intent / Target Topic',
    'transformer': 'NLP Intent / Target Topic',
    'logic gate': 'NLP Intent / Target Topic',
    'and or': 'NLP Intent / Target Topic',
    'braille': 'NLP Intent / Target Topic',
    'replay': 'NLP Intent / Target Topic',
    'last explanation': 'NLP Intent / Target Topic',
    'if statement': 'NLP Intent / Target Topic',
}

# --- INITIALIZATION ---
# 1. QA Model (RAG Generation)
try:
    # DistilBERT is already small and CPU-friendly, keeping it as the RAG generator.
    QA_MODEL = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
    print("‚úÖ QA Model (DistilBERT) initialized.")
except Exception as e:
    print(f"‚ùå ERROR: Could not load QA model. {e}")
    QA_MODEL = None

# 2. STT Model (Local, CPU Optimized)
try:
    # Using the 'tiny' model for maximum speed and minimum RAM/CPU requirements.
    WHISPER_MODEL = whisper.load_model("tiny") 
    
    # Context prompt to guide transcription towards engineering vocabulary
    WHISPER_PROMPT = "The speaker is discussing engineering topics, P-N junction, Ohm's Law, linear equations, C++ syntax, logic gates, transformers, voltage, and current."
    print("‚úÖ Whisper Model (Tiny, CPU Optimized) loaded.")
except Exception as e:
    # This error often occurs if Pytorch or FFmpeg is missing.
    print(f"‚ùå ERROR: Could not load Whisper model. Switching to Google STT fallback. {e}")
    WHISPER_MODEL = None

KNOWLEDGE_BASE_DF = None
LAST_RESPONSE_TEXT = ""

def load_knowledge_base(file_path="tutor_knowledge.xlsx"):
    """Loads the Excel sheet into a pandas DataFrame."""
    global KNOWLEDGE_BASE_DF
    try:
        KNOWLEDGE_BASE_DF = pd.read_excel(file_path, header=0)
        print(f"‚úÖ Loaded {len(KNOWLEDGE_BASE_DF)} entries from {file_path}.")
        return True
    except FileNotFoundError:
        print(f"‚ùå ERROR: Excel file not found at {file_path}")
        return False
    except Exception as e:
        print(f"‚ùå ERROR loading Excel: {e}")
        return False

# --- 2. VOICE INPUT MODULE (MODIFIED FOR LOCAL WHISPER) ---

def listen():
    """Captures audio and converts it to text using local Whisper (CPU preferred) or Google STT."""
    r = sr.Recognizer()
    with sr.Microphone() as source:
        print("\n\nüéôÔ∏è Listening...")
        r.adjust_for_ambient_noise(source, duration=0.5)
        try:
            audio = r.listen(source, timeout=8, phrase_time_limit=15) 
        except sr.WaitTimeoutError:
            print("‚ùå No speech detected within time limit.")
            return ""

    try:
        # --- LOCAL WHISPER STT (CPU PRIORITY) ---
        if WHISPER_MODEL:
            # Save audio temporarily for Whisper to read
            temp_wav_file = "temp_audio_stt.wav"
            with open(temp_wav_file, "wb") as f:
                f.write(audio.get_wav_data())

            # Use Whisper for local, context-aware transcription
            print("  [STT] Processing locally with Whisper (Tiny)...")
            result = WHISPER_MODEL.transcribe(
                temp_wav_file, 
                initial_prompt=WHISPER_PROMPT, 
                language='en',
                # Beam size 3 is faster than default 5 on CPU
                beam_size=3
            )
            text = result["text"].strip()
            os.remove(temp_wav_file) # Clean up
        
        else:
            # --- GOOGLE STT FALLBACK (Internet Required) ---
            print("  [STT] Falling back to Google STT (Internet Required)...")
            text = r.recognize_google(audio)
            
        print(f"üó£Ô∏è You said: {text}")
        return text
        
    except sr.UnknownValueError:
        print("‚ùå Sorry, could not recognize speech. (Tip: Try speaking slower, or rephrase technical terms.)") 
        return ""
    except sr.RequestError:
        print("‚ùå Could not request results from Google Speech Recognition service; check internet.")
        return ""
    except Exception as e:
        print(f"‚ùå STT Error: {e}")
        return ""

# --- 3. TEXT-TO-SPEECH (TTS) MODULE ---

def speak(text):
    """Converts text to speech using gTTS, saves it, and plays it."""
    global LAST_RESPONSE_TEXT
    
    if not text:
        return
        
    LAST_RESPONSE_TEXT = text # Store the response for command handling
    
    try:
        tts = gTTS(text=text, lang='en')
        filename = f"response_{int(time.time())}.mp3"
        tts.save(filename)
        
        # NOTE: The print statement now includes the source/context ID for observability!
        print(f"üí° AI Tutor: {text}")
        os.system(f"start {filename}")  

    except Exception as e:
        print(f"‚ùå Error during text-to-speech: {e}")

# --- 4. BRAILLE MODULE ---

def braille_convert_and_print(last_text_response):
    """
    Finds the associated Braille output from the DF and simulates printing.
    """
    if KNOWLEDGE_BASE_DF is None:
        return speak("Knowledge base not loaded to find Braille content.")
        
    # Search the DF for the row that generated the last audio response
    # We strip the source ID "(E00X)" from the start of the response text before lookup
    clean_response = last_text_response.split(') ', 1)[-1]
    
    match = KNOWLEDGE_BASE_DF[
        KNOWLEDGE_BASE_DF['Generated Audio Response (Text for TTS)'] == clean_response
    ]
    
    # Use .iloc[0] for safety and .any() for the boolean Series result
    if not match.empty and match['Braille Conversion Required (Yes/No)'].str.lower().str.contains('yes', na=False).any():
        key_term = match.iloc[0]['Braille Output (Key Term/Formula/Code)']
        
        # --- Actual Braille Simulation ---
        print("-" * 50)
        print("üñ®Ô∏è TACTILE OUTPUT INITIATED üñ®Ô∏è")
        print(f"    Key Term for Braille: {key_term}")
        # NOTE: Using the standard LaTeX notation for clean output (e.g., V=IR)
        print("-" * 50)
        speak(f"The key information: {key_term} has been sent to the Braille embosser.")
        
    else:
        speak("I found no matching Braille content for the last answer.")

# --- 5. RAG CORE MODULE ---

def find_best_context(question):
    """
    NEW TARGETED RETRIEVAL: Uses keyword mapping for high-confidence RAG retrieval.
    """
    question_lower = question.lower()
    
    # 1. Check for direct keyword matches using the predefined TECHNICAL_KEYWORDS dictionary
    for term, target_col in TECHNICAL_KEYWORDS.items():
        # Check if the technical term is present in the user's question
        if term in question_lower:
            # Look for the row where the keyword appears in the designated target column
            # Use regex=True for robust matching
            matching_rows = KNOWLEDGE_BASE_DF[
                KNOWLEDGE_BASE_DF[target_col].astype(str).str.lower().str.contains(term, regex=True, na=False)
            ]
            
            if not matching_rows.empty:
                # SUCCESS: Return the context of the highest matching row (first one found)
                row = matching_rows.iloc[0]
                context = row['Context/Source (Pre-stored or AI-Gen)']
                source_id = row['Query ID']
                
                print(f"  [RAG STEP 1] Targeted Retrieval Success using '{term}'. Document ID: {source_id}")
                return context, source_id
                
    # 2. FALLBACK: Search the primary content column for the full question
    print("  [RAG STEP 1] Fallback: No targeted keyword found. Searching full content.")
    
    # Search the 'User Voice Input' column for the complete, transcribed user query
    matching_rows_fallback = KNOWLEDGE_BASE_DF[
        KNOWLEDGE_BASE_DF['User Voice Input (Engineering Topic)'].astype(str).str.lower().str.contains(question_lower, regex=True, na=False)
    ]
    
    if not matching_rows_fallback.empty:
        row = matching_rows_fallback.iloc[0]
        context = row['Context/Source (Pre-stored or AI-Gen)']
        source_id = row['Query ID']
        
        print(f"  [RAG STEP 1] Fallback Success. Context found by matching full query: {source_id}")
        return context, source_id
        
    # 3. FAILURE: If RAG fails entirely.
    return None, None 

def answer_question(question):
    """
    1. Retrieves the most relevant context using the new keyword logic.
    2. Uses the retrieved context for the QA model to extract the final answer.
    """
    global QA_MODEL
    global KNOWLEDGE_BASE_DF

    if KNOWLEDGE_BASE_DF is None:
        return "Knowledge base not ready."

    # --- 1. RETRIEVAL (RAG STEP) ---
    context, source_id = find_best_context(question)
    
    if context is None:
        # If retrieval fails, return a knowledge base not found message
        return "I apologize, that specific engineering topic is not yet in my knowledge base. Try asking about Ohm's Law or P-N junctions."
        
    # --- 2. GENERATION (QA MODEL) ---
    try:
        if QA_MODEL is None:
            return "QA model is not initialized."
            
        result = QA_MODEL({
            "context": context,
            "question": question
        })
        
        # RAG OBSERVABILITY: Print the source used for the answer
        print(f"  [RAG STEP 2] Answer generated from Source ID: {source_id}")
        
        # Append the source to the final answer text for visual proof in the demo
        final_answer = f"({source_id}) {result['answer']}"
        return final_answer
        
    except Exception as e:
        return f"An error occurred during QA processing: {e}"

if __name__ == "__main__":
    # 1. Load Data
    if not load_knowledge_base():  
        print("\nFATAL ERROR: Cannot proceed without the knowledge base. Check file name/path.")
    else:
        # 2. Start Session
        print("\n" + "="*50)
        print("ü§ñ AI Tutor is ready. Speak your engineering question.")
        print("="*50)
        speak("Hello! I am your AI engineering tutor. How can I help you learn today?")
        
        while True:
            query = listen()
            
            if not query:
                continue
                
            query_lower = query.lower()
            
            # 3. Command Handling
            if query_lower in ["exit", "quit", "stop", "i am done"]:
                speak("Goodbye! Happy studying.")
                print("üëã Session ended.")
                break
                
            elif "read again" in query_lower or "say that again" in query_lower or "replay" in query_lower:
                if LAST_RESPONSE_TEXT:
                    # NOTE: Replaying last response text without the source ID prefix.
                    clean_response = LAST_RESPONSE_TEXT.split(') ', 1)[-1]
                    speak(f"Replaying last response: {clean_response}")
                else:
                    speak("I don't have a previous response to repeat.")
                continue
                
            elif "braille" in query_lower or "tactile" in query_lower or "print" in query_lower:
                if LAST_RESPONSE_TEXT:
                    braille_convert_and_print(LAST_RESPONSE_TEXT)
                else:
                    speak("I need to answer a question before I can convert a response to Braille.")
                continue

            # 4. Question Answering (RAG Pipeline)
            response = answer_question(query)
            speak(response)


Device set to use cpu


‚úÖ QA Model (DistilBERT) initialized.


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 72.1M/72.1M [00:03<00:00, 20.6MiB/s]


‚úÖ Whisper Model (Tiny, CPU Optimized) loaded.
‚úÖ Loaded 100 entries from tutor_knowledge.xlsx.

ü§ñ AI Tutor is ready. Speak your engineering question.
üí° AI Tutor: Hello! I am your AI engineering tutor. How can I help you learn today?


üéôÔ∏è Listening...
  [STT] Processing locally with Whisper (Tiny)...




üó£Ô∏è You said: Let's go.
  [RAG STEP 1] Fallback: No targeted keyword found. Searching full content.
üí° AI Tutor: I apologize, that specific engineering topic is not yet in my knowledge base. Try asking about Ohm's Law or P-N junctions.


üéôÔ∏è Listening...
  [STT] Processing locally with Whisper (Tiny)...




üó£Ô∏è You said: Explain Ohm's Law.
  [RAG STEP 1] Fallback: No targeted keyword found. Searching full content.
üí° AI Tutor: I apologize, that specific engineering topic is not yet in my knowledge base. Try asking about Ohm's Law or P-N junctions.


üéôÔ∏è Listening...
  [STT] Processing locally with Whisper (Tiny)...




üó£Ô∏è You said: The speaker is discussing engineering topics, C++ syntax, C++ syntax, C++ syntax, C++ syntax,
  [RAG STEP 1] Fallback: No targeted keyword found. Searching full content.
üí° AI Tutor: I apologize, that specific engineering topic is not yet in my knowledge base. Try asking about Ohm's Law or P-N junctions.


üéôÔ∏è Listening...
  [STT] Processing locally with Whisper (Tiny)...




üó£Ô∏è You said: Quick...
  [RAG STEP 1] Fallback: No targeted keyword found. Searching full content.
üí° AI Tutor: I apologize, that specific engineering topic is not yet in my knowledge base. Try asking about Ohm's Law or P-N junctions.


üéôÔ∏è Listening...
  [STT] Processing locally with Whisper (Tiny)...




üó£Ô∏è You said: Exit.
  [RAG STEP 1] Fallback: No targeted keyword found. Searching full content.
üí° AI Tutor: I apologize, that specific engineering topic is not yet in my knowledge base. Try asking about Ohm's Law or P-N junctions.


üéôÔ∏è Listening...


KeyboardInterrupt: 

In [3]:
!pip install pyttsx3



In [4]:
!pip install speechrecognition pyttsx3 transformers torch pandas openpyxl whisper

Collecting whisper
  Downloading whisper-1.1.10.tar.gz (42 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: whisper
  Building wheel for whisper (pyproject.toml): started
  Building wheel for whisper (pyproject.toml): finished with status 'done'
  Created wheel for whisper: filename=whisper-1.1.10-py3-none-any.whl size=41476 sha256=9e081f1db4b3ceaeac1e0400de7e2288f83f7c698fd690bbc1b04007e6fe2d50
  Stored in directory: c:\users\s bramhanathkar\appdata\local\pip\cache\wheels\7e\1e\f0\d36b92489c74925c5aa1aeb01d30f39ba018d2a1914e79ac36
Successfully built whisper
Installing collected packages: whisper
Successfully installed whisper-1.1.10


In [5]:
import speech_recognition as sr
# We replace gtts with pyttsx3 for offline TTS
import pyttsx3 
import os
import time
import pandas as pd
from transformers import pipeline
import whisper
import numpy as np 
import re # Import the regular expression module

# --- GLOBAL CONFIGURATION AND KNOWLEDGE ---
# List of technical keywords to guide the RAG system to the correct module.
TECHNICAL_KEYWORDS = {
    # Engineering concepts prioritize searching the 'User Voice Input' column 
    'ohm': 'User Voice Input (Engineering Topic)',
    'voltage': 'User Voice Input (Engineering Topic)',
    'formula': 'User Voice Input (Engineering Topic)', 
    'pn junction': 'User Voice Input (Engineering Topic)',
    'semiconductor': 'User Voice Input (Engineering Topic)',
    'linear equation': 'User Voice Input (Engineering Topic)',
    'matrix': 'User Voice Input (Engineering Topic)',
    'transformer': 'User Voice Input (Engineering Topic)',
    'logic gate': 'User Voice Input (Engineering Topic)',
    'if statement': 'User Voice Input (Engineering Topic)',
    
    # System commands should look in the column containing the target topic/intent
    'braille': 'NLP Intent / Target Topic',
    'replay': 'NLP Intent / Target Topic',
    'last explanation': 'NLP Intent / Target Topic',
}

# --- INITIALIZATION ---
# 1. QA Model (RAG Generation)
try:
    QA_MODEL = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
    print("‚úÖ QA Model (DistilBERT) initialized.")
except Exception as e:
    print(f"‚ùå ERROR: Could not load QA model. {e}")
    QA_MODEL = None

# 2. STT Model (Local, CPU Optimized)
try:
    WHISPER_MODEL = whisper.load_model("tiny") 
    WHISPER_PROMPT = "The speaker is discussing engineering topics, P-N junction, Ohm's Law, linear equations, C++ syntax, logic gates, transformers, voltage, and current."
    print("‚úÖ Whisper Model (Tiny, CPU Optimized) loaded.")
except Exception as e:
    print(f"‚ùå ERROR: Could not load Whisper model. STT will fail if disconnected from the internet. {e}")
    WHISPER_MODEL = None

# 3. TTS Engine (Local, Offline)
try:
    TTS_ENGINE = pyttsx3.init()
    # Optional: Adjust rate for clarity on low-spec systems
    TTS_ENGINE.setProperty('rate', 150) # Words per minute
    print("‚úÖ pyttsx3 (Offline TTS) initialized.")
except Exception as e:
    print(f"‚ùå ERROR: Could not initialize pyttsx3. TTS will not function. {e}")
    TTS_ENGINE = None

KNOWLEDGE_BASE_DF = None
LAST_RESPONSE_TEXT = ""

def load_knowledge_base(file_path="tutor_knowledge.xlsx"):
    """Loads the Excel sheet into a pandas DataFrame."""
    global KNOWLEDGE_BASE_DF
    try:
        KNOWLEDGE_BASE_DF = pd.read_excel(file_path, header=0)
        print(f"‚úÖ Loaded {len(KNOWLEDGE_BASE_DF)} entries from {file_path}.")
        return True
    except FileNotFoundError:
        print(f"‚ùå ERROR: Excel file not found at {file_path}")
        return False
    except Exception as e:
        print(f"‚ùå ERROR loading Excel: {e}")
        return False

def show_data_status():
    """Prints metadata about the loaded knowledge base for accessibility proof."""
    global KNOWLEDGE_BASE_DF
    if KNOWLEDGE_BASE_DF is None:
        speak("The knowledge base is not currently loaded.")
        return
        
    print("-" * 50)
    print("--- KNOWLEDGE BASE STATUS ---")
    print(f"Total Entries (Rows): {len(KNOWLEDGE_BASE_DF)}")
    print(f"Total Columns: {len(KNOWLEDGE_BASE_DF.columns)}")
    print(f"Primary Retrieval Column: {'User Voice Input (Engineering Topic)'}")
    print(f"Braille Lookup Column: {'Generated Audio Response (Text for TTS)'}")
    print("-" * 50)
    speak(f"I found the knowledge base with {len(KNOWLEDGE_BASE_DF)} entries.")

# --- 2. VOICE INPUT MODULE (LOCAL WHISPER) ---

def listen():
    """Captures audio and converts it to text using local Whisper."""
    r = sr.Recognizer()
    with sr.Microphone() as source:
        print("\n\nüéôÔ∏è Listening...")
        r.adjust_for_ambient_noise(source, duration=0.5)
        try:
            audio = r.listen(source, timeout=8, phrase_time_limit=15) 
        except sr.WaitTimeoutError:
            print("‚ùå No speech detected within time limit.")
            return ""

    try:
        # --- LOCAL WHISPER STT (CPU PRIORITY) ---
        if WHISPER_MODEL:
            temp_wav_file = "temp_audio_stt.wav"
            with open(temp_wav_file, "wb") as f:
                f.write(audio.get_wav_data())

            print("  [STT] Processing locally with Whisper (Tiny)...")
            result = WHISPER_MODEL.transcribe(
                temp_wav_file, 
                initial_prompt=WHISPER_PROMPT, 
                language='en',
                beam_size=3
            )
            text = result["text"].strip()
            os.remove(temp_wav_file)
            
        else:
            # Fallback to Google STT if Whisper failed to load (requires internet)
            text = r.recognize_google(audio)
            
        print(f"üó£Ô∏è You said: {text}")
        return text
        
    except sr.UnknownValueError:
        print("‚ùå Sorry, could not recognize speech. (Tip: Try speaking slower, or rephrase technical terms.)") 
        return ""
    except Exception as e:
        print(f"‚ùå STT Error: {e}")
        return ""

# --- 3. TEXT-TO-SPEECH (TTS) MODULE (MODIFIED FOR PYTTSX3) ---

def speak(text):
    """Converts text to speech using pyttsx3 (offline)."""
    global LAST_RESPONSE_TEXT
    
    if not text:
        return
        
    LAST_RESPONSE_TEXT = text
    
    if TTS_ENGINE:
        try:
            print(f"üí° AI Tutor: {text}")
            TTS_ENGINE.say(text)
            TTS_ENGINE.runAndWait() 
        except Exception as e:
            print(f"‚ùå Error during pyttsx3 playback: {e}")
    else:
        print(f"üí° AI Tutor: {text} (TTS Engine Failed)")

# --- 4. BRAILLE MODULES ---

def braille_convert_by_last_answer(last_text_response):
    """
    Finds the associated Braille output by matching the Query ID extracted from 
    the last response (e.g., '(E002)').
    """
    if KNOWLEDGE_BASE_DF is None:
        return speak("Knowledge base not loaded to find Braille content.")
    
    # Extract Source ID from the start of the response text
    if not last_text_response.startswith('(') or ')' not in last_text_response:
        return speak("I cannot find a source ID in the last answer to look up Braille content. Please ask a new question first, or use the command 'Braille [Query ID]'.")

    source_id = last_text_response.split(')')[0].replace('(', '').strip()
    
    return braille_convert_by_id(source_id)

def braille_convert_by_id(query_id):
    """
    Finds the associated Braille output directly using a provided Query ID (e.g., 'E002').
    """
    if KNOWLEDGE_BASE_DF is None:
        return speak("Knowledge base not loaded to find Braille content.")

    # Standardize the ID for lookup
    query_id = query_id.upper()
    
    # 1. Lookup the row using the reliable Query ID
    match = KNOWLEDGE_BASE_DF[
        KNOWLEDGE_BASE_DF['Query ID'] == query_id
    ]
    
    if match.empty:
        return speak(f"I cannot find the query ID {query_id} in the knowledge base.")

    # 2. Check if the row requires Braille output
    if match['Braille Conversion Required (Yes/No)'].str.lower().str.contains('yes', na=False).any():
        key_term = match.iloc[0]['Braille Output (Key Term/Formula/Code)']
        
        # --- Actual Braille Simulation ---
        print("-" * 50)
        print("üñ®Ô∏è TACTILE OUTPUT INITIATED üñ®Ô∏è")
        print(f"    Source ID: {query_id}")
        print(f"    Key Term for Braille: {key_term}")
        print("-" * 50)
        speak(f"The key information: {key_term} from source {query_id} has been sent to the Braille embosser.")
        
    else:
        speak(f"The answer from source {query_id} does not require a Braille output.")
        
# --- 5. RAG CORE MODULE ---

def find_best_context(question):
    """
    TARGETED RETRIEVAL: Uses keyword mapping to search the highest-confidence column.
    """
    question_lower = question.lower()
    
    # 1. Check for direct keyword matches
    for term, target_col in TECHNICAL_KEYWORDS.items():
        if term in question_lower:
            # Use regex=True for robust matching against Excel data
            matching_rows = KNOWLEDGE_BASE_DF[
                KNOWLEDGE_BASE_DF[target_col].astype(str).str.lower().str.contains(term, regex=True, na=False)
            ]
            
            if not matching_rows.empty:
                # SUCCESS: Use the best match (first one found)
                row = matching_rows.iloc[0]
                context = row['Context/Source (Pre-stored or AI-Gen)']
                source_id = row['Query ID']
                
                print(f"  [RAG STEP 1] Targeted Retrieval Success using '{term}'. Document ID: {source_id}")
                return context, source_id
                
    # 2. FALLBACK: Search the 'User Voice Input' column for the complete, transcribed query
    print("  [RAG STEP 1] Fallback: No targeted keyword found. Searching full content.")
    
    # Use a simpler match for fallback to prevent too many regex failures
    matching_rows_fallback = KNOWLEDGE_BASE_DF[
        KNOWLEDGE_BASE_DF['User Voice Input (Engineering Topic)'].astype(str).str.lower().str.contains(question_lower, na=False)
    ]
    
    if not matching_rows_fallback.empty:
        row = matching_rows_fallback.iloc[0]
        context = row['Context/Source (Pre-stored or AI-Gen)']
        source_id = row['Query ID']
        
        print(f"  [RAG STEP 1] Fallback Success. Context found by matching full query: {source_id}")
        return context, source_id
        
    # 3. FAILURE: If RAG fails entirely.
    return None, None 

def answer_question(question):
    """
    RAG Pipeline: Retrieve -> Generate
    """
    global QA_MODEL
    global KNOWLEDGE_BASE_DF

    if KNOWLEDGE_BASE_DF is None:
        return "Knowledge base not ready."

    # --- 1. RETRIEVAL (RAG STEP) ---
    context, source_id = find_best_context(question)
    
    if context is None:
        return "I apologize, that specific engineering topic is not yet in my knowledge base. Try asking about Ohm's Law or P-N junctions."
        
    # --- 2. GENERATION (QA MODEL) ---
    try:
        if QA_MODEL is None:
            return "QA model is not initialized."
            
        result = QA_MODEL({
            "context": context,
            "question": question
        })
        
        # RAG OBSERVABILITY: Print the source used for the answer
        print(f"  [RAG STEP 2] Answer generated from Source ID: {source_id}")
        
        # Append the source to the final answer text for visual proof in the demo
        final_answer = f"({source_id}) {result['answer']}"
        return final_answer
        
    except Exception as e:
        return f"An error occurred during QA processing: {e}"

if __name__ == "__main__":
    if not load_knowledge_base():  
        print("\nFATAL ERROR: Cannot proceed without the knowledge base. Check file name/path.")
    else:
        print("\n" + "="*50)
        print("ü§ñ AI Tutor is ready. Speak your engineering question.")
        print("="*50)
        speak("Hello! I am your AI engineering tutor. How can I help you learn today?")
        
        while True:
            query = listen()
            
            if not query:
                continue
                
            query_lower = query.lower()
            
            # 3. Command Handling
            
            # 3a. EXIT/QUIT COMMAND
            if query_lower in ["exit", "quit", "stop", "i am done"]:
                speak("Goodbye! Happy studying.")
                print("üëã Session ended.")
                break
            
            # 3b. SHOW DATA COMMAND
            elif "show data" in query_lower or "show knowledge" in query_lower:
                show_data_status()
                continue
                
            # 3c. REPLAY COMMAND
            elif "read again" in query_lower or "say that again" in query_lower or "replay" in query_lower:
                if LAST_RESPONSE_TEXT:
                    # Remove source ID from replay for cleaner audio
                    clean_response = LAST_RESPONSE_TEXT.split(') ', 1)[-1]
                    speak(f"Replaying last response: {clean_response}")
                else:
                    speak("I don't have a previous response to repeat.")
                continue

            # 3d. DIRECT BRAILLE COMMAND (New) - Braille E002, Braille E005, etc.
            match_id = re.search(r'braille\s+([e]\d+)', query_lower)
            if match_id:
                query_id = match_id.group(1).upper()
                braille_convert_by_id(query_id)
                continue
                
            # 3e. BRAILLE LAST ANSWER COMMAND (Original)
            elif "braille" in query_lower or "tactile" in query_lower or "print" in query_lower:
                if LAST_RESPONSE_TEXT:
                    braille_convert_by_last_answer(LAST_RESPONSE_TEXT)
                else:
                    speak("I need to answer a question before I can convert a response to Braille. Try asking a question or using the command 'Braille [Query ID]'.")
                continue

            # 4. Question Answering (RAG Pipeline)
            response = answer_question(query)
            speak(response)


Device set to use cpu


‚úÖ QA Model (DistilBERT) initialized.
‚úÖ Whisper Model (Tiny, CPU Optimized) loaded.
‚úÖ pyttsx3 (Offline TTS) initialized.
‚úÖ Loaded 100 entries from tutor_knowledge.xlsx.

ü§ñ AI Tutor is ready. Speak your engineering question.
üí° AI Tutor: Hello! I am your AI engineering tutor. How can I help you learn today?


üéôÔ∏è Listening...
  [STT] Processing locally with Whisper (Tiny)...




üó£Ô∏è You said: What's on this Law?
  [RAG STEP 1] Fallback: No targeted keyword found. Searching full content.
üí° AI Tutor: I apologize, that specific engineering topic is not yet in my knowledge base. Try asking about Ohm's Law or P-N junctions.


üéôÔ∏è Listening...
  [STT] Processing locally with Whisper (Tiny)...




üó£Ô∏è You said: Explain the N junction.
  [RAG STEP 1] Fallback: No targeted keyword found. Searching full content.
üí° AI Tutor: I apologize, that specific engineering topic is not yet in my knowledge base. Try asking about Ohm's Law or P-N junctions.


üéôÔ∏è Listening...
  [STT] Processing locally with Whisper (Tiny)...




üó£Ô∏è You said: The speaker is discussing engineering topics, C++ syntax, C++ syntax, C++ syntax, C++ syntax, C++ syntax, C++ syntax,
  [RAG STEP 1] Fallback: No targeted keyword found. Searching full content.
üí° AI Tutor: I apologize, that specific engineering topic is not yet in my knowledge base. Try asking about Ohm's Law or P-N junctions.


üéôÔ∏è Listening...
  [STT] Processing locally with Whisper (Tiny)...




üó£Ô∏è You said: Explain the N junction.
  [RAG STEP 1] Fallback: No targeted keyword found. Searching full content.
üí° AI Tutor: I apologize, that specific engineering topic is not yet in my knowledge base. Try asking about Ohm's Law or P-N junctions.


üéôÔ∏è Listening...
  [STT] Processing locally with Whisper (Tiny)...




üó£Ô∏è You said: The law of thermodynamics.
  [RAG STEP 1] Fallback: No targeted keyword found. Searching full content.
üí° AI Tutor: I apologize, that specific engineering topic is not yet in my knowledge base. Try asking about Ohm's Law or P-N junctions.


üéôÔ∏è Listening...


KeyboardInterrupt: 