In [None]:
# Install required packages
%pip install requests numpy


In [None]:
import requests
import json
import numpy as np
from typing import List, Dict, Any, Optional
import time

# Configuration
HUGGING_FACE_API_TOKEN = ""  # Add your Hugging Face API token here
USE_MOCK_MODE = True  # Set to False to use real API calls

# API endpoints
BIOGPT_URL = "https://api-inference.huggingface.co/models/microsoft/BioGPT-Large"
BIOBERT_URL = "https://api-inference.huggingface.co/models/dmis-lab/biobert-base-cased-v1.1"

# Headers for API requests
headers = {
    "Authorization": f"Bearer {HUGGING_FACE_API_TOKEN}",
    "Content-Type": "application/json"
}

print(f"Configuration loaded. Mock mode: {USE_MOCK_MODE}")
if HUGGING_FACE_API_TOKEN:
    print("✅ Hugging Face API token provided")
else:
    print("⚠️ No Hugging Face API token provided - will use mock mode")


In [None]:
# Mock health documents database
HEALTH_DOCUMENTS = [
    {
        "id": "1",
        "title": "Managing Diabetes Through Diet",
        "content": "A balanced diet is crucial for managing diabetes. Focus on complex carbohydrates, lean proteins, and healthy fats. Monitor blood sugar levels regularly and work with a healthcare provider to adjust your meal plan as needed.",
        "type": "article",
        "tags": ["diabetes", "diet", "nutrition"]
    },
    {
        "id": "2",
        "title": "Exercise Guidelines for Heart Health",
        "content": "Regular cardiovascular exercise strengthens the heart muscle and improves circulation. Aim for at least 150 minutes of moderate-intensity exercise per week. Include both aerobic activities and strength training for optimal heart health.",
        "type": "guideline",
        "tags": ["heart disease", "exercise", "cardiovascular"]
    },
    {
        "id": "3",
        "title": "Understanding High Blood Pressure",
        "content": "Hypertension affects millions worldwide. Key management strategies include reducing sodium intake, maintaining a healthy weight, regular exercise, stress management, and medication adherence when prescribed.",
        "type": "educational",
        "tags": ["hypertension", "blood pressure", "lifestyle"]
    },
    {
        "id": "4",
        "title": "Sleep and Mental Health Connection",
        "content": "Quality sleep is essential for mental health. Poor sleep can worsen anxiety and depression. Establish a consistent sleep schedule, create a relaxing bedtime routine, and limit screen time before bed.",
        "type": "research",
        "tags": ["anxiety", "depression", "sleep", "mental health"]
    },
    {
        "id": "5",
        "title": "Arthritis Pain Management",
        "content": "Arthritis pain can be managed through a combination of medication, physical therapy, gentle exercise, heat/cold therapy, and lifestyle modifications. Work with your healthcare team to develop a comprehensive pain management plan.",
        "type": "treatment",
        "tags": ["arthritis", "pain management", "mobility"]
    }
]

print(f"Loaded {len(HEALTH_DOCUMENTS)} health documents for context")


In [None]:
def generate_embedding_real(text: str) -> List[float]:
    """Generate embeddings using BioBERT via Hugging Face API"""
    try:
        response = requests.post(
            BIOBERT_URL,
            headers=headers,
            json={"inputs": text, "options": {"wait_for_model": True}}
        )
        
        if response.status_code == 200:
            # BioBERT returns pooled embeddings
            result = response.json()
            if isinstance(result, list) and len(result) > 0:
                return result[0]  # Return the first (and usually only) embedding
            return result
        else:
            print(f"Embedding API error: {response.status_code} - {response.text}")
            return generate_embedding_mock(text)
    except Exception as e:
        print(f"Embedding error: {e}")
        return generate_embedding_mock(text)

def generate_embedding_mock(text: str) -> List[float]:
    """Generate mock embeddings for testing"""
    # Simple hash-based mock embedding
    import hashlib
    hash_object = hashlib.md5(text.encode())
    hash_hex = hash_object.hexdigest()
    
    # Convert to pseudo-random 768-dimensional vector (BioBERT size)
    np.random.seed(int(hash_hex[:8], 16))
    embedding = np.random.normal(0, 1, 768).tolist()
    return embedding

def generate_embedding(text: str) -> List[float]:
    """Generate embeddings with fallback to mock"""
    if USE_MOCK_MODE or not HUGGING_FACE_API_TOKEN:
        return generate_embedding_mock(text)
    return generate_embedding_real(text)

print("Embedding functions loaded")


In [None]:
def cosine_similarity(a: List[float], b: List[float]) -> float:
    """Calculate cosine similarity between two vectors"""
    a_np = np.array(a)
    b_np = np.array(b)
    
    dot_product = np.dot(a_np, b_np)
    norm_a = np.linalg.norm(a_np)
    norm_b = np.linalg.norm(b_np)
    
    if norm_a == 0 or norm_b == 0:
        return 0
    
    return dot_product / (norm_a * norm_b)

def find_relevant_documents(query: str, health_conditions: List[str] = None, limit: int = 3) -> List[Dict]:
    """Find documents relevant to the query and health conditions"""
    query_embedding = generate_embedding(query)
    
    # Add health conditions to query context
    if health_conditions:
        enhanced_query = f"{query} {' '.join(health_conditions)}"
        query_embedding = generate_embedding(enhanced_query)
    
    # Calculate similarity with all documents
    similarities = []
    for doc in HEALTH_DOCUMENTS:
        doc_embedding = generate_embedding(doc['content'])
        similarity = cosine_similarity(query_embedding, doc_embedding)
        
        # Boost similarity if health conditions match document tags
        if health_conditions:
            condition_match = any(condition.lower() in [tag.lower() for tag in doc['tags']] for condition in health_conditions)
            if condition_match:
                similarity += 0.2  # Boost for condition relevance
        
        similarities.append((doc, similarity))
    
    # Sort by similarity and return top results
    similarities.sort(key=lambda x: x[1], reverse=True)
    return [doc for doc, _ in similarities[:limit]]

print("Similarity functions loaded")


In [None]:
def generate_answer_real(prompt: str) -> str:
    """Generate answer using BioGPT via Hugging Face API"""
    try:
        response = requests.post(
            BIOGPT_URL,
            headers=headers,
            json={
                "inputs": prompt,
                "parameters": {
                    "max_new_tokens": 200,
                    "temperature": 0.7,
                    "do_sample": True,
                    "return_full_text": False
                },
                "options": {"wait_for_model": True}
            }
        )
        
        if response.status_code == 200:
            result = response.json()
            if isinstance(result, list) and len(result) > 0:
                return result[0].get('generated_text', '').strip()
            return str(result).strip()
        else:
            print(f"Text generation API error: {response.status_code} - {response.text}")
            return generate_answer_mock(prompt)
    except Exception as e:
        print(f"Text generation error: {e}")
        return generate_answer_mock(prompt)

def generate_answer_mock(prompt: str) -> str:
    """Generate mock answer for testing"""
    mock_responses = [
        "Based on current medical guidelines, it's important to consult with your healthcare provider for personalized advice. General recommendations include maintaining a balanced diet, regular exercise, and following prescribed treatments.",
        "This is a complex health topic that requires individualized care. Consider discussing these symptoms or concerns with a medical professional who can provide guidance based on your specific health history and current condition.",
        "Health management often involves a combination of lifestyle modifications, medical treatments, and regular monitoring. Your healthcare team can help develop a comprehensive plan tailored to your needs.",
        "While general health information can be helpful, it's crucial to work with qualified healthcare providers for diagnosis and treatment decisions. They can consider your complete medical history and current health status."
    ]
    
    # Use a simple hash to consistently return the same mock response for the same prompt
    import hashlib
    hash_object = hashlib.md5(prompt.encode())
    hash_int = int(hash_object.hexdigest()[:8], 16)
    return mock_responses[hash_int % len(mock_responses)]

def generate_answer(prompt: str) -> str:
    """Generate answer with fallback to mock"""
    if USE_MOCK_MODE or not HUGGING_FACE_API_TOKEN:
        return generate_answer_mock(prompt)
    return generate_answer_real(prompt)

print("Text generation functions loaded")


In [None]:
def ask_wellness_llm(question: str, health_conditions: List[str] = None) -> Dict[str, Any]:
    """Main function to ask the Wellness LLM a question"""
    try:
        # Find relevant documents
        relevant_docs = find_relevant_documents(question, health_conditions)
        
        # Build context from relevant documents
        context = "\n\n".join([
            f"Source: {doc['title']}\nContent: {doc['content']}"
            for doc in relevant_docs
        ])
        
        # Build prompt with context and health conditions
        prompt_parts = []
        
        if health_conditions:
            prompt_parts.append(f"Patient health conditions: {', '.join(health_conditions)}")
        
        if context:
            prompt_parts.append(f"Relevant medical information:\n{context}")
        
        prompt_parts.append(f"Question: {question}")
        prompt_parts.append("Please provide a helpful, evidence-based response:")
        
        full_prompt = "\n\n".join(prompt_parts)
        
        # Generate answer
        answer = generate_answer(full_prompt)
        
        # Format sources
        sources = [{
            "title": doc['title'],
            "type": doc['type'],
            "id": doc['id']
        } for doc in relevant_docs]
        
        return {
            "answer": answer,
            "sources": sources,
            "health_conditions": health_conditions or [],
            "mode": "mock" if (USE_MOCK_MODE or not HUGGING_FACE_API_TOKEN) else "real"
        }
        
    except Exception as e:
        return {
            "error": f"An error occurred while processing your question: {str(e)}",
            "answer": "I apologize, but I'm unable to process your question right now. Please try again later or consult with a healthcare professional.",
            "sources": [],
            "health_conditions": health_conditions or [],
            "mode": "error"
        }

print("Main LLM function loaded")


In [None]:
# Example 1: Basic health question
question1 = "What are some good exercises for heart health?"
result1 = ask_wellness_llm(question1)

print("=== Example 1: Basic Health Question ===")
print(f"Question: {question1}")
print(f"\nAnswer: {result1['answer']}")
print(f"\nMode: {result1['mode']}")
print(f"\nSources ({len(result1['sources'])}):")
for i, source in enumerate(result1['sources'], 1):
    print(f"  {i}. {source['title']} ({source['type']})")
print("\n" + "="*50)


In [None]:
# Example 2: Question with health conditions
question2 = "How should I manage my diet?"
conditions2 = ["diabetes", "hypertension"]
result2 = ask_wellness_llm(question2, conditions2)

print("=== Example 2: Question with Health Conditions ===")
print(f"Question: {question2}")
print(f"Health Conditions: {', '.join(conditions2)}")
print(f"\nAnswer: {result2['answer']}")
print(f"\nMode: {result2['mode']}")
print(f"\nSources ({len(result2['sources'])}):")
for i, source in enumerate(result2['sources'], 1):
    print(f"  {i}. {source['title']} ({source['type']})")
print("\n" + "="*50)


In [None]:
# Example 3: Mental health question
question3 = "I'm having trouble sleeping and feeling anxious. What can help?"
conditions3 = ["anxiety"]
result3 = ask_wellness_llm(question3, conditions3)

print("=== Example 3: Mental Health Question ===")
print(f"Question: {question3}")
print(f"Health Conditions: {', '.join(conditions3)}")
print(f"\nAnswer: {result3['answer']}")
print(f"\nMode: {result3['mode']}")
print(f"\nSources ({len(result3['sources'])}):")
for i, source in enumerate(result3['sources'], 1):
    print(f"  {i}. {source['title']} ({source['type']})")
print("\n" + "="*50)


In [None]:
# Uncomment and run this cell to test with real API
# Make sure to set your HUGGING_FACE_API_TOKEN first!

# USE_MOCK_MODE = False
# print(f"Switched to real API mode. Token available: {bool(HUGGING_FACE_API_TOKEN)}")

# # Test with a simple question
# test_question = "What is diabetes?"
# test_result = ask_wellness_llm(test_question)
# print(f"\nTest Question: {test_question}")
# print(f"Answer: {test_result['answer']}")
# print(f"Mode: {test_result['mode']}")


In [None]:
# Interactive testing - modify these variables and run the cell
your_question = "Tell me about managing high blood pressure"
your_conditions = ["hypertension", "diabetes"]  # Add your conditions here

# Run your custom query
custom_result = ask_wellness_llm(your_question, your_conditions)

print("=== Your Custom Question ===")
print(f"Question: {your_question}")
if your_conditions:
    print(f"Health Conditions: {', '.join(your_conditions)}")
print(f"\nAnswer: {custom_result['answer']}")
print(f"\nMode: {custom_result['mode']}")
print(f"\nSources ({len(custom_result['sources'])}):")
for i, source in enumerate(custom_result['sources'], 1):
    print(f"  {i}. {source['title']} ({source['type']})")

# Show the full prompt that was sent to the model (for debugging)
print(f"\n=== Debug Info ===")
print(f"Total health documents: {len(HEALTH_DOCUMENTS)}")
print(f"API Mode: {'Real' if not USE_MOCK_MODE and HUGGING_FACE_API_TOKEN else 'Mock'}")
