In [1]:
import os
import json
import sqlite3
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document
from groq import Groq
from fuzzywuzzy import fuzz
from nltk.tokenize import sent_tokenize
import nltk

In [2]:
nltk.download('punkt')


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/devayushrout/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
# Symptom synonyms (from your codebase)
symptom_synonyms = {
    "fever": ["bukhar", "tapman", "high temperature", "tez bukhar"],
    "headache": ["sar dard", "sar mein dard", "migraine"],
    "cough": ["khaansi", "khansi", "dry cough"],
    "cold": ["zukaam", "runny nose", "nasal congestion"],
    "vomiting": ["ulti", "throwing up", "nausea"],
    "diarrhea": ["patla mal", "loose motions", "dast"],
    "body pain": ["jodo ka dard", "sareer mein dard", "body ache"],
    "sore throat": ["gale mein dard", "gala kharab"],
}

In [4]:
# Initialize ChatGroq
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
llm = lambda prompt: client.chat.completions.create(
    model="llama3-70b-8192",
    messages=[{"role": "user", "content": prompt}],
    temperature=0.7
).choices[0].message.content

In [5]:
# Load FAISS vectorstore
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.load_local("baymax_vectorstore", embedding_model, allow_dangerous_deserialization=True)

  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [6]:
# SQLite for conversation state
def init_db():
    conn = sqlite3.connect("conversations.db")
    c = conn.cursor()
    c.execute("""CREATE TABLE IF NOT EXISTS conversations
                (user_id TEXT, session TEXT, symptoms TEXT, duration TEXT, severity INTEGER, history TEXT)""")
    conn.commit()
    return conn

In [7]:
# Normalize symptoms (from your codebase)
def normalize_symptoms(user_input):
    normalized = set()
    for standard, synonyms in symptom_synonyms.items():
        for term in synonyms:
            if fuzz.partial_ratio(term.lower(), user_input.lower()) >= 85:
                normalized.add(standard)
    return list(normalized)

In [8]:
# Retrieve chunks (adapted from get_top_chunk_by_source)
def retrieve_chunks(symptoms, vectorstore, top_k=5):
    query = " ".join(symptoms)
    results = vectorstore.similarity_search_with_score(query, k=top_k)
    selected = {}
    for doc, score in results:
        source = doc.metadata.get("source")
        if source not in selected and score > 0.85:
            selected[source] = {"content": doc.page_content, "metadata": doc.metadata}
    return selected

In [9]:
# Dynamic prompt generation
def generate_prompt(symptoms, chunks, user_input, language="English"):
    template = """You are Baymax, a caring assistant for rural India. Use these excerpts to give simple, empathetic advice for {symptoms}. If unclear, prioritize rural_care and iphs. Avoid technical terms. Ask follow-ups based on symptom_to_diagnosis logic. Use {language} if requested. Use nfi.txt only for medicine recognition. Cite sources. If no relevant info, say: 'I lack information. Please see a doctor.'
    Excerpts: {excerpts}
    User: {user_input}"""
    
    excerpts = "\n".join([f"[{c['metadata']['source']}]: {c['content'][:300]}" for c in chunks.values()])
    return template.format(symptoms=", ".join(symptoms), excerpts=excerpts, user_input=user_input, language=language)

In [None]:
# Main consultation loop
def consult(user_input, user_id="user123", language="English"):
    conn = init_db()
    c = conn.cursor()
    
    # Normalize symptoms
    symptoms = normalize_symptoms(user_input)
    if not symptoms:
        return "Please describe your symptoms clearly, like 'fever' or 'cough'."
    
    # Retrieve chunks
    chunks = retrieve_chunks(symptoms, vectorstore)
    if not chunks:
        return "I lack information in my knowledge base. Please consult a doctor."
    
    # Check for red flags (stored in SQLite)
    c.execute("SELECT symptoms FROM red_flags WHERE symptoms IN (?)", (",".join(symptoms),))
    if c.fetchone():
        return "This is serious. Please visit a doctor immediately."
    
    # Store conversation state
    c.execute("INSERT INTO conversations (user_id, session, symptoms, duration, severity, history) VALUES (?, ?, ?, ?, ?, ?)",
            (user_id, user_input, ",".join(symptoms), "", 0, ""))
    conn.commit()
    
    # Generate response
    prompt = generate_prompt(symptoms, chunks, user_input, language)
    response = llm(prompt)
    
    # Follow-up questions (dynamic, based on consultation_flow)
    follow_ups = ["How many days have you had these symptoms?",
                "What is your current temperature?",
                "On a scale of 1-10, how severe is it?",
                "Any other symptoms?",
                "Any medical conditions or allergies?"]
    
    if "consultation_flow" in chunks:
        # Extract follow-up logic from symptom_to_diagnosis
        follow_up_symptoms = [s for s in chunks["consultation_flow"]["metadata"].get("symptoms", []) if s not in symptoms]
        if follow_up_symptoms:
            follow_ups.append(f"Do you have {', '.join(follow_up_symptoms)}?")
    
    response += f"\nTo provide better guidance, {follow_ups[0]}. Please consult a doctor for proper diagnosis."
    return response


In [11]:
# Example usage
if __name__ == "__main__":
    user_input = "Mujhe bukhar aur khansi hai"
    response = consult(user_input, language="Hindi")
    print(response)

I lack information in my knowledge base. Please consult a doctor.
