### 1) Environment setup and local dependencies
We install from local `requirements.txt`, load `.env` if present, and ensure the project path is importable.


In [1]:
# Environment and dependencies
import os
import sys
from pathlib import Path

project_root = Path(r"C:\Users\carry\OneDrive\Документы\Cursor\LangChain\hna")
requirements_path = project_root / "requirements.txt"

if requirements_path.exists():
    import subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", str(requirements_path)])

from dotenv import load_dotenv
for env_candidate in [project_root / ".env", Path.cwd() / ".env"]:
    if env_candidate.exists():
        load_dotenv(str(env_candidate))
        break

if str(project_root) not in sys.path:
    sys.path.append(str(project_root))



### 2) Centralized prompts (system and human)
We keep prompts versioned and easily extendable. Adjust these variables as the scope grows.


In [2]:
# Prompts (modifiable)
from datetime import datetime

DISCLAIMER_TEXT = (
    "This conversation is for education only and is not medical advice. "
    "If you have urgent symptoms, seek professional care or emergency services."
)

SYSTEM_PROMPT = f"""
You are a compassionate multi-turn health navigator.
- Greet empathetically and acknowledge feelings.
- Ask concise follow-ups to gather symptoms, onset, severity, and risk factors.
- Use retrieved WHO/CDC guidance for education only.
- Never provide diagnosis or treatment instructions.
- Always respect the user's autonomy and privacy.
- When flagged, prepend the disclaimer:
  "{DISCLAIMER_TEXT}"
"""

HUMAN_PROMPT_TEMPLATE = (
    "User message: {{user_input}}\n"
    "Known symptoms so far: {{symptom_state}}\n"
    "Retrieved guidance (if any): {{retrieved_context}}\n"
    "Respond empathetically and continue the dialogue."
)




Prompts loaded. You can edit SYSTEM_PROMPT and HUMAN_PROMPT_TEMPLATE above.

### 3) Load WHO/CDC guidance (web scraping)
We fetch content from `https://www.who.int/health-topics` and `https://www.cdc.gov/health-topics.html` and parse text for RAG.


In [3]:
# Simple web scraper for WHO/CDC health topics
import requests
from bs4 import BeautifulSoup

WHO_URL = "https://www.who.int/health-topics"
CDC_URL = "https://www.cdc.gov/health-topics.html"


def fetch_text_from_url(url: str) -> str:
    try:
        resp = requests.get(url, timeout=20)
        resp.raise_for_status()
        soup = BeautifulSoup(resp.text, "lxml")
        # Keep visible text within main content areas; fallback to body text
        main = soup.find("main") or soup
        texts = [t.get_text(" ", strip=True) for t in main.find_all(["p", "li", "h1", "h2", "h3"])[:2000]]
        return "\n".join(t for t in texts if t)
    except Exception as e:
        return f"ERROR fetching {url}: {e}"

who_text = fetch_text_from_url(WHO_URL)
cdc_text = fetch_text_from_url(CDC_URL)

print("WHO text length:", len(who_text))
print("CDC text length:", len(cdc_text))


WHO text length: 8199
CDC text length: 506


## Search JSON URL for providers by list of target zips within X mile radius of current zipcode and returns top 10 providers
### fetch JSON url from list of providers by state: https://www22.anthem.com/cms-data-index.json/index.html

In [4]:
import requests
import json
from geopy.distance import geodesic
from typing import List
import zipcodes

def get_zip_codes_within_distance(target_zip: str, radius_miles: float, debug=False) -> List[str]:
    """
    Returns a list of zip codes within X miles from a target zip code.
    Uses bounding box optimization and CA-only filtering for speed.
    
    Args:
        target_zip: The target zip code (string)
        radius_miles: Search radius in miles
    
    Returns:
        List of zip codes within the specified radius
    """
    import math
    
    # Get target zip code info
    target_info = zipcodes.matching(target_zip)
    if not target_info:
        raise ValueError(f"Could not find zip code '{target_zip}'")
    
    target_lat = float(target_info[0]['lat'])
    target_lon = float(target_info[0]['long'])
    target_coords = (target_lat, target_lon)
    
    # Calculate bounding box
    lat_offset = radius_miles / 69
    lon_offset = radius_miles / (69 * math.cos(math.radians(target_lat)))
    
    min_lat = target_lat - lat_offset
    max_lat = target_lat + lat_offset
    min_lon = target_lon - lon_offset
    max_lon = target_lon + lon_offset
    
    nearby_zips_with_distance = []
    
    # Add the target zip first
    nearby_zips_with_distance.append((target_zip, 0))
    
    # Only check California zip codes within bounding box
    for zip_obj in zipcodes.list_all():
        zip_code = zip_obj['zip_code']
        
        # Skip the target zip since we already added it
        if zip_code == target_zip:
            continue
            
        # Only include California zip codes
        if zip_obj['state'] != 'CA':
            continue
        
        lat = float(zip_obj['lat'])
        lon = float(zip_obj['long'])
        
        # Quick bounding box check first
        if lat < min_lat or lat > max_lat or lon < min_lon or lon > max_lon:
            continue
        
        # Only calculate exact distance for candidates in bounding box
        coords = (lat, lon)
        distance = geodesic(target_coords, coords).miles
        
        if distance <= radius_miles:
            nearby_zips_with_distance.append((zip_code, distance))
    
    # Sort by distance (nearest to farthest)
    nearby_zips_with_distance.sort(key=lambda x: x[1])
    
    # Debug output if requested
    if debug:
        print(f"\nZip codes within {radius_miles} miles of {target_zip}:")
        for zip_code, distance in nearby_zips_with_distance[:20]:
            print(f"  {zip_code}: {distance:.2f} miles")
    
    # Return just the zip codes
    return [zip_code for zip_code, distance in nearby_zips_with_distance]

def scrape_json_url(url):
    """Fetch JSON from a given URL."""
    try:
        response = requests.get(url)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from {url}: {e}")
        return None


def filter_providers_by_zip(url, target_zips):
    """
    Fetch providers from Anthem JSON URL and return filtered provider data
    for a list of ZIP codes ("addresses" -> "zip").
    
    Args:
        url: The URL to fetch provider data from
        target_zips: A string (single ZIP) or list of strings (multiple ZIPs)
    
    Returns:
        List of provider dictionaries matching the target ZIPs
    """
    data = scrape_json_url(url)
    if not data:
        return []

    if not isinstance(data, list):
        print("Unexpected JSON structure.")
        return []

    # Convert target_zips to list if it's a string
    if isinstance(target_zips, str):
        target_zips = [target_zips]
    
    # Normalize all target zips
    target_zips = [str(z).strip() for z in target_zips]

    filtered = []

    for p in data:
        addresses = p.get("addresses", [])
        if not addresses:
            continue

        # Some providers may have multiple addresses
        for addr in addresses:
            zip_code = str(addr.get("zip", "")).strip()
            if zip_code in target_zips:
                name_info = p.get("name", {})
                # Flatten name dict if present
                full_name = " ".join(
                    filter(None, [name_info.get("first"), name_info.get("middle"), name_info.get("last")])
                ).strip()

                # Specialty is a list — join into string
                specialty = ", ".join(p.get("specialty", [])) if isinstance(p.get("specialty"), list) else p.get("specialty")

                result = {
                    "npi": p.get("npi"),
                    "type": p.get("type"),
                    "name": full_name or p.get("name"),
                    "address": addr.get("address"),
                    "city": addr.get("city"),
                    "state": addr.get("state"),
                    "zip": zip_code,
                    "phone": addr.get("phone"),
                    "specialty": specialty
                }
                filtered.append(result)
                break  # Stop after first matching address for this provider

    return filtered


# Example usage:
if __name__ == "__main__":
    target_zips = get_zip_codes_within_distance('95008', 10, debug=True)
    print(f"\nTotal: {len(target_zips)} zip codes found\n")
    
    json_url = "https://www22.anthem.com/CMS/PROVIDERS_CAM.json"
    results = filter_providers_by_zip(json_url, target_zips)

    print(f"Found {len(results)} providers in the target ZIP codes.\n")
    for r in results[:10]:
        print(json.dumps(r, indent=4))


Zip codes within 10 miles of 95008:
  95008: 0.00 miles
  95009: 0.73 miles
  95011: 0.73 miles
  95130: 1.24 miles
  95124: 2.37 miles
  95117: 2.38 miles
  95128: 2.80 miles
  95129: 3.19 miles
  95125: 3.62 miles
  95071: 3.74 miles
  95031: 3.75 miles
  95126: 3.87 miles
  95118: 4.04 miles
  95030: 4.06 miles
  95032: 4.42 miles
  95053: 4.97 miles
  95050: 5.01 miles
  95052: 5.06 miles
  95055: 5.06 miles
  95056: 5.06 miles

Total: 75 zip codes found

Found 1358 providers in the target ZIP codes.

{
    "npi": "1003008772",
    "type": "INDIVIDUAL",
    "name": "LOUISE M BRUCE",
    "address": "750 S BASCOM AVE STE 330",
    "city": "SAN JOSE",
    "state": "CA",
    "zip": "95128",
    "phone": "8883341000",
    "specialty": "Nurse Practitioner"
}
{
    "npi": "1003013715",
    "type": "INDIVIDUAL",
    "name": "ROBERTA Y WANG",
    "address": "751 S BASCOM AVE BLDG Q FL 1",
    "city": "SAN JOSE",
    "state": "CA",
    "zip": "95128",
    "phone": "4088852100",
    "special

### 4) Vector store with Chroma and embeddings
We chunk scraped text and store embeddings locally. This will power retrieval.


In [None]:
# Build vector store
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

raw_docs = [
    {"source": WHO_URL, "text": who_text},
    {"source": CDC_URL, "text": cdc_text},
]

# Basic cleaning and chunking
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)

documents = []
for d in raw_docs:
    if not d["text"].startswith("ERROR"):
        for chunk in splitter.split_text(d["text"]):
            documents.append({"page_content": chunk, "metadata": {"source": d["source"]}})

# Embeddings model
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Create Chroma DB in a local folder
chroma_dir = str(project_root / "chroma_db")
vectorstore = Chroma.from_documents(
    documents=[
        type("Doc", (), doc)() for doc in documents  # lightweight doc-like objects
    ],
    embedding=embeddings,
    persist_directory=chroma_dir,
)

retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
print("Vector DB ready. Docs:", len(documents))

### 5) LangChain retrieval stage
We use the retriever to ground the response. This is still LangChain.


In [None]:
# LangChain retrieval
from langchain.prompts import ChatPromptTemplate
from langchain.schema import StrOutputParser
from langchain.chat_models import init_chat_model

from langchain.llms import HuggingFaceHub
import os

# Set your Hugging Face token
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

# Use a model from Hugging Face Hub
llm = HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-Instruct-v0.1", # Mistral 7B - Excellent for Reasoning, balance of performance and efficiency
    model_kwargs={
        "temperature": 0.7, 
        "max_length": 512,  # Increased for better responses
        "top_p": 0.95,
        "repetition_penalty": 1.1
    }
)

RAG_TEMPLATE = ChatPromptTemplate.from_template(
    """
    {system}
    
    Context:
    {context}
    
    Conversation state:
    - Symptoms: {symptom_state}
    
    User: {user_input}
    Assistant: Provide an empathetic, educational reply. Do not diagnose or prescribe.
    """
)

chain = (
    RAG_TEMPLATE
    | llm
    | StrOutputParser()
)

# Helper to run a retrieved response
def retrieved_response(user_input: str, symptom_state: str = ""):
    docs = retriever.get_relevant_documents(user_input)
    context = "\n\n".join(d.page_content for d in docs)
    return chain.invoke({
        "system": SYSTEM_PROMPT,
        "context": context,
        "symptom_state": symptom_state,
        "user_input": user_input,
    })



  llm = HuggingFaceHub(


LangChain retrieval chain ready

### 6) SWITCH: LangChain → LangGraph
We now orchestrate the multi-turn dialogue using LangGraph nodes and edges. Vector DB is used inside the retrieval node.


In [None]:
# LangGraph workflow
from typing import TypedDict, Annotated
from langgraph.graph import StateGraph, END
from langgraph.checkpoint.memory import MemorySaver

# Define proper state schema for LangGraph
class State(TypedDict):
    user_input: str
    symptom_state: str
    retrieved_context: str
    risk_level: str
    response_text: str
    empathy: bool

# Nodes

def node_empathy(state: State) -> State:
    return {**state, "empathy": True}

def node_collect_symptoms(state: State) -> State:
    user_input = state.get("user_input", "")
    # naive extraction heuristic; plug in a structured symptom extractor later
    known = state.get("symptom_state", "")
    new_symptom_state = (known + " " + user_input).strip()
    return {**state, "symptom_state": new_symptom_state}

def node_retrieve(state: State) -> State:
    query = state.get("user_input", "")
    docs = retriever.get_relevant_documents(query)
    retrieved_context = "\n\n".join(d.page_content for d in docs)
    return {**state, "retrieved_context": retrieved_context}

def node_risk_assess(state: State) -> State:
    # placeholder; future: rules or model-based risk heuristics
    text = state.get("symptom_state", "")
    risk = "low"
    if any(k in text.lower() for k in ["chest pain", "shortness of breath", "fainting"]):
        risk = "high"
    return {**state, "risk_level": risk}

def node_respond(state: State) -> State:
    response = retrieved_response(
        user_input=state.get("user_input", ""),
        symptom_state=state.get("symptom_state", ""),
    )
    return {**state, "response_text": response}

# Build graph
builder = StateGraph(State)

builder.add_node("empathy", node_empathy)
builder.add_node("collect_symptoms", node_collect_symptoms)
builder.add_node("retrieve", node_retrieve)
builder.add_node("risk", node_risk_assess)
builder.add_node("respond", node_respond)

builder.set_entry_point("empathy")

# Linear flow for now; can branch by conditions later
builder.add_edge("empathy", "collect_symptoms")
builder.add_edge("collect_symptoms", "retrieve")
builder.add_edge("retrieve", "risk")
builder.add_edge("risk", "respond")
builder.add_edge("respond", END)

memory = MemorySaver()
app = builder.compile(checkpointer=memory)



### 7) Disclaimer filter and sentiment analysis
We add a toggle to include the disclaimer and run a lightweight VADER sentiment to adapt tone.


In [None]:
# Disclaimer + sentiment
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()

def apply_disclaimer(text: str, enabled: bool) -> str:
    if not enabled:
        return text
    if DISCLAIMER_TEXT in text:
        return text
    return f"{DISCLAIMER_TEXT}\n\n{text}"


def adapt_tone_with_sentiment(user_input: str, reply: str) -> str:
    scores = analyzer.polarity_scores(user_input)
    if scores.get("neg", 0) > 0.4:
        # append a brief empathetic line
        return reply + "\n\nI’m here with you. That sounds tough."
    return reply




Disclaimer and sentiment ready.

### 8) Gradio UI for Health Navigator
Interactive chat interface with disclaimer toggle and conversation history.


In [None]:
# Gradio UI
import gradio as gr
from typing import List, Tuple

# Global conversation state
conversation_history = []

def chat_with_agent(message: str, history: List[Tuple[str, str]], disclaimer_enabled: bool) -> Tuple[str, List[Tuple[str, str]]]:
    """Main chat function that processes user input through the LangGraph workflow"""
    if not message.strip():
        return "", history
    
    try:
        # Run through LangGraph workflow
        config = {"configurable": {"thread_id": "main"}}
        result = app.invoke(
            {"user_input": message, "symptom_state": ""}, 
            config=config
        )
        
        # Get the response
        response = result.get("response_text", "I'm sorry, I couldn't process that.")
        
        # Apply sentiment adaptation
        response = adapt_tone_with_sentiment(message, response)
        
        # Apply disclaimer if enabled
        response = apply_disclaimer(response, disclaimer_enabled)
        
        # Update history
        history.append((message, response))
        
        return "", history
        
    except Exception as e:
        error_msg = f"I encountered an error: {str(e)}. Please try again."
        history.append((message, error_msg))
        return "", history

def clear_history():
    """Clear conversation history"""
    global conversation_history
    conversation_history = []
    return []

# Create Gradio interface
with gr.Blocks(title="Health Navigator Agent", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # 🏥 Health Navigator Agent
    
    A compassionate AI assistant that helps you understand health topics using WHO/CDC guidance.
    
    **⚠️ Important:** This is for educational purposes only and does not provide medical advice.
    """)
    
    with gr.Row():
        with gr.Column(scale=3):
            chatbot = gr.Chatbot(
                value=[],
                height=500,
                label="Conversation",
                show_label=True
            )
            
            with gr.Row():
                msg_input = gr.Textbox(
                    placeholder="Describe your symptoms or ask a health question...",
                    label="Your message",
                    lines=2,
                    scale=4
                )
                send_btn = gr.Button("Send", variant="primary", scale=1)
            
            with gr.Row():
                disclaimer_toggle = gr.Checkbox(
                    label="Include medical disclaimer",
                    value=True,
                    info="Adds safety disclaimer to responses"
                )
                clear_btn = gr.Button("Clear History", variant="secondary")
        
        with gr.Column(scale=1):
            gr.Markdown("""
            ### Features
            - 🤗 Empathetic responses
            - 📚 WHO/CDC guidance retrieval
            - 🧠 Multi-turn conversation
            - ⚠️ Safety disclaimers
            - 💭 Sentiment analysis
            - 🔄 LangGraph workflow
            """)
            
            gr.Markdown("""
            ### How it works
            1. **Empathy**: Acknowledges your feelings
            2. **Symptom Collection**: Gathers information
            3. **Retrieval**: Finds relevant guidance
            4. **Risk Assessment**: Evaluates urgency
            5. **Response**: Provides educational support
            """)
    
    # Event handlers
    send_btn.click(
        chat_with_agent,
        inputs=[msg_input, chatbot, disclaimer_toggle],
        outputs=[msg_input, chatbot]
    )
    
    msg_input.submit(
        chat_with_agent,
        inputs=[msg_input, chatbot, disclaimer_toggle],
        outputs=[msg_input, chatbot]
    )
    
    clear_btn.click(
        clear_history,
        outputs=[chatbot]
    )



"Gradio UI ready! Run demo.launch() to start the interface.

### 9) Launch and Test the Health Navigator
Run the Gradio interface to test the complete experience.


### 10) Test Hugging Face Model Performance
Let's test the model with a sample health question to verify performance.


In [None]:
# Test the model performance
print("🧪 Testing Hugging Face model performance...")

# Test 1: Simple health question
test_input = "I have a headache and feel tired. What should I know?"
print(f"\n📝 Test Input: {test_input}")

try:
    # Test the LangGraph workflow
    config = {"configurable": {"thread_id": "test"}}
    result = app.invoke({"user_input": test_input}, config=config)
    
    print("✅ LangGraph workflow completed successfully!")
    print(f"📊 Risk Level: {result.get('risk_level', 'unknown')}")
    print(f"💬 Response Length: {len(result.get('response_text', ''))} characters")
    print(f"🔍 Retrieved Context Length: {len(result.get('retrieved_context', ''))} characters")
    
    # Show a snippet of the response
    response = result.get('response_text', '')
    if response:
        print(f"\n🤖 Model Response Preview:")
        print(response[:200] + "..." if len(response) > 200 else response)
    
except Exception as e:
    print(f"❌ Error testing model: {e}")
    print("This might be due to Hugging Face API limits or model availability.")

print("\n" + "="*50)


In [None]:
# Launch the Gradio interface
if __name__ == "__main__":
    print("🚀 Starting Health Navigator Agent...")
    print("📊 Vector DB contains", len(documents), "document chunks")
    print("🤖 Using Hugging Face model: Mistral-7B-Instruct")
    print("🔗 LangGraph workflow with", len(app.get_graph().nodes), "nodes")
    print("\n" + "="*50)
    print("Launching Gradio interface...")
    print("="*50)
    
    # Launch with public sharing disabled for security
    demo.launch(
        share=False,  # Set to True if you want to share publicly
        server_name="127.0.0.1",
        server_port=7860,
        show_error=True,
        quiet=False
    )


🚀 Starting Health Navigator Agent...
📊 Vector DB contains 14 document chunks
🤖 Using Hugging Face model: Mistral-7B-Instruct


TypeError: Type Dict cannot be instantiated; use dict() instead

### 11) Ready to Launch! 🚀

The Health Navigator Agent is now complete with:

✅ **Fixed Dependencies**: Using local `requirements.txt`  
✅ **Centralized Prompts**: Scalable system and human prompts  
✅ **Web Scraping**: WHO/CDC health guidance loaded  
✅ **Vector Database**: Chroma with sentence-transformers embeddings  
✅ **LangChain Retrieval**: RAG pipeline for context  
✅ **LangGraph Workflow**: Multi-turn dialogue orchestration  
✅ **Disclaimer Filter**: Toggle for safety disclaimers  
✅ **Sentiment Analysis**: VADER for emotional adaptation  
✅ **Gradio UI**: Interactive chat interface  

**To run the agent:**
1. Execute all cells above
2. Run the test cell to verify model performance
3. Launch the Gradio interface

**Note**: The Hugging Face model (Mistral-7B-Instruct) provides good performance for health conversations. If you encounter API limits, consider using OpenAI or other providers by updating the model configuration in the LangChain retrieval cell.
