In [None]:
# Install required packages for Supabase integration
%pip install sentence-transformers pandas numpy requests --quiet
%pip install supabase psycopg2-binary python-dotenv --quiet


In [None]:
import os
import json
import pandas as pd
from datetime import datetime
from typing import List, Dict, Any, Optional
import numpy as np
import requests

# Vector embeddings
from sentence_transformers import SentenceTransformer

# Supabase integration
from supabase import create_client, Client
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

print("üì¶ All packages imported successfully!")
print(f"üïê Query session started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("üîÑ Using Supabase pgvector instead of Chroma!")


In [None]:
# Configuration - Supabase and Flask Backend Integration
CONFIG = {
    "embedding_model": "NeuML/pubmedbert-base-embeddings",
    "table_name": "medical_documents",
    "embeddings_table": "document_embeddings", 
    "top_k": 5,  # Number of similar documents to retrieve
    "similarity_threshold": 0.5,  # Minimum similarity score to consider
    "max_context_length": 2000,  # Maximum characters for context
    "use_flask_backend": True,  # Use your existing Flask backend
}

# Supabase configuration
SUPABASE_CONFIG = {
    "url": os.getenv("NEXT_PUBLIC_SUPABASE_URL"),
    "key": os.getenv("NEXT_PUBLIC_SUPABASE_ANON_KEY"),
}

# Flask backend configuration (your existing setup)
FLASK_CONFIG = {
    "api_url": os.getenv("FLASK_API_URL", "http://localhost:5000"),
    "embed_endpoint": "/embed",
    "generate_endpoint": "/generate"
}

# Validate configuration
if not SUPABASE_CONFIG["url"] or not SUPABASE_CONFIG["key"]:
    print("‚ö†Ô∏è WARNING: Supabase credentials not found!")
    print("Please set NEXT_PUBLIC_SUPABASE_URL and NEXT_PUBLIC_SUPABASE_ANON_KEY")
else:
    print("‚úÖ Supabase credentials loaded")

if not FLASK_CONFIG["api_url"]:
    print("‚ö†Ô∏è WARNING: Flask API URL not found!")
    print("Please set FLASK_API_URL in your environment")
    CONFIG["use_flask_backend"] = False
else:
    print(f"‚úÖ Flask backend configured: {FLASK_CONFIG['api_url']}")

print(f"\n‚öôÔ∏è Query Configuration:")
print(f"   üéØ Retrieve top {CONFIG['top_k']} similar documents")
print(f"   üìä Similarity threshold: {CONFIG['similarity_threshold']}")
print(f"   üìè Max context length: {CONFIG['max_context_length']} chars")
print(f"   ü§ñ Use Flask backend: {CONFIG['use_flask_backend']}")
print(f"   üóÑÔ∏è Supabase tables: {CONFIG['table_name']}, {CONFIG['embeddings_table']}")


In [None]:
# Load PubMedBERT for query embeddings
print("üß† Loading PubMedBERT model...")
embedding_model = SentenceTransformer(CONFIG['embedding_model'])
print(f"‚úÖ Embedding model loaded: {CONFIG['embedding_model']}")

# Connect to Supabase database
print("üóÑÔ∏è Connecting to Supabase database...")
if not SUPABASE_CONFIG["url"] or not SUPABASE_CONFIG["key"]:
    raise ValueError("Supabase credentials not found. Please set NEXT_PUBLIC_SUPABASE_URL and NEXT_PUBLIC_SUPABASE_ANON_KEY")

supabase: Client = create_client(SUPABASE_CONFIG["url"], SUPABASE_CONFIG["key"])

# Test connection and get database stats
try:
    docs_result = supabase.table(CONFIG['table_name']).select('*', count='exact').execute()
    embeddings_result = supabase.table(CONFIG['embeddings_table']).select('*', count='exact').execute()
    
    print(f"‚úÖ Connected to Supabase successfully!")
    print(f"üìä Database contains:")
    print(f"   üìÑ {docs_result.count} documents")
    print(f"   üîç {embeddings_result.count} embedded chunks")
    
    if docs_result.count == 0:
        print("‚ö†Ô∏è No documents found! Make sure to run embed_documents.ipynb first.")
        
except Exception as e:
    print(f"‚ùå Failed to connect to Supabase: {e}")
    raise

# Test Flask backend connection (optional)
flask_available = False
if CONFIG['use_flask_backend']:
    try:
        print(f"üß™ Testing Flask backend connection...")
        health_url = f"{FLASK_CONFIG['api_url']}/health"
        response = requests.get(health_url, timeout=5)
        if response.status_code == 200:
            print(f"‚úÖ Flask backend is available at {FLASK_CONFIG['api_url']}")
            flask_available = True
        else:
            print(f"‚ö†Ô∏è Flask backend responded with status {response.status_code}")
    except Exception as e:
        print(f"‚ö†Ô∏è Flask backend not available: {e}")
        print("   Will proceed with context retrieval only")

CONFIG['flask_available'] = flask_available
print("‚úÖ Setup complete!")


In [None]:
class WellnessRAGSystem:
    """RAG system for medical/wellness queries using Supabase and Flask backend"""
    
    def __init__(self, embedding_model, supabase_client, config, flask_config):
        self.embedding_model = embedding_model
        self.supabase = supabase_client
        self.config = config
        self.flask_config = flask_config
        
    def retrieve_context(self, query: str) -> Dict[str, Any]:
        """Retrieve relevant document chunks from Supabase using pgvector similarity search"""
        
        # Generate query embedding using local PubMedBERT model
        query_embedding = self.embedding_model.encode([query])[0].tolist()
        
        # Perform vector similarity search using Supabase RPC function
        # This uses the pgvector extension for fast similarity search
        try:
            # Use RPC function for vector similarity search
            rpc_result = self.supabase.rpc(
                'search_embeddings',
                {
                    'query_embedding': query_embedding,
                    'match_threshold': self.config['similarity_threshold'],
                    'match_count': self.config['top_k']
                }
            ).execute()
            
            if rpc_result.data:
                search_results = rpc_result.data
            else:
                # Fallback: manual similarity search if RPC not available
                print("‚ö†Ô∏è RPC function not available, using fallback search...")
                search_results = self._fallback_similarity_search(query_embedding)
                
        except Exception as e:
            print(f"‚ö†Ô∏è Vector search error: {e}")
            # Fallback search
            search_results = self._fallback_similarity_search(query_embedding)
        
        # Process results
        retrieved_docs = []
        for i, result in enumerate(search_results):
            similarity_score = result.get('similarity', 0)
            
            if similarity_score >= self.config['similarity_threshold']:
                retrieved_docs.append({
                    'content': result.get('chunk_content', ''),
                    'metadata': {
                        'source': result.get('source', 'Unknown'),
                        'topic': result.get('topic', 'Unknown'),
                        'title': result.get('title', 'Unknown'),
                        'document_type': result.get('document_type', 'Unknown')
                    },
                    'similarity_score': similarity_score,
                    'rank': i + 1
                })
        
        # Prepare context string
        context_parts = []
        total_chars = 0
        
        for doc in retrieved_docs:
            if total_chars + len(doc['content']) <= self.config['max_context_length']:
                context_parts.append(f"Source: {doc['metadata']['source']}\n{doc['content']}")
                total_chars += len(doc['content'])
            else:
                break
        
        context = "\n\n".join(context_parts)
        
        return {
            'query': query,
            'context': context,
            'retrieved_documents': retrieved_docs,
            'total_documents_found': len(search_results),
            'documents_used': len(retrieved_docs),
            'context_length': len(context)
        }
    
    def _fallback_similarity_search(self, query_embedding: List[float]) -> List[Dict]:
        """Fallback similarity search when RPC function is not available"""
        try:
            # Get all embeddings (this is not efficient for large datasets)
            # In production, you'd want to implement proper vector search
            embeddings_result = self.supabase.table('document_embeddings').select(
                'chunk_content, embedding, document_id'
            ).limit(1000).execute()  # Limit to prevent memory issues
            
            if not embeddings_result.data:
                return []
            
            # Calculate similarities manually (not recommended for production)
            similarities = []
            for row in embeddings_result.data:
                try:
                    embedding = np.array(row['embedding'])
                    query_emb = np.array(query_embedding)
                    
                    # Cosine similarity
                    similarity = np.dot(query_emb, embedding) / (
                        np.linalg.norm(query_emb) * np.linalg.norm(embedding)
                    )
                    
                    similarities.append({
                        'chunk_content': row['chunk_content'],
                        'similarity': float(similarity),
                        'document_id': row['document_id']
                    })
                except Exception as e:
                    continue
            
            # Sort by similarity and return top results
            similarities.sort(key=lambda x: x['similarity'], reverse=True)
            return similarities[:self.config['top_k']]
            
        except Exception as e:
            print(f"‚ùå Fallback search failed: {e}")
            return []
    
    def generate_response_flask(self, query: str, context: str) -> Optional[str]:
        """Generate response using your existing Flask backend"""
        if not self.config.get('flask_available', False):
            return None
            
        try:
            # Call your existing Flask /generate endpoint
            generate_url = f"{self.flask_config['api_url']}{self.flask_config['generate_endpoint']}"
            
            payload = {
                'query': query,
                'context': context,
                'max_tokens': 200,
                'temperature': 0.7
            }
            
            response = requests.post(
                generate_url,
                json=payload,
                timeout=30,
                headers={'Content-Type': 'application/json'}
            )
            
            if response.status_code == 200:
                result = response.json()
                return result.get('answer', '')
            else:
                print(f"‚ö†Ô∏è Flask backend error: {response.status_code}")
                return None
                
        except Exception as e:
            print(f"‚ö†Ô∏è Error calling Flask backend: {e}")
            return None
    
    def query(self, question: str) -> Dict[str, Any]:
        """Complete RAG query: retrieve context and optionally generate response"""
        print(f"üîç Processing query: {question}")
        
        # Step 1: Retrieve relevant context
        context_result = self.retrieve_context(question)
        
        print(f"üìä Found {context_result['total_documents_found']} similar documents")
        print(f"üìÑ Using {context_result['documents_used']} documents for context")
        
        # Step 2: Generate response using Flask backend (optional)
        generated_response = None
        if self.config.get('flask_available', False):
            print("ü§ñ Generating response using Flask backend...")
            generated_response = self.generate_response_flask(
                question, 
                context_result['context']
            )
            
            if generated_response:
                print("‚úÖ Response generated successfully")
            else:
                print("‚ö†Ô∏è Response generation failed")
        
        # Prepare final result
        result = {
            'query': question,
            'answer': generated_response or "Context retrieved successfully. Enable Flask backend for AI responses.",
            'sources': [
                {
                    'title': doc['metadata']['title'],
                    'source': doc['metadata']['source'],
                    'similarity': f"{doc['similarity_score']:.3f}",
                    'rank': doc['rank']
                }
                for doc in context_result['retrieved_documents']
            ],
            'metadata': {
                'documentsUsed': context_result['documents_used'],
                'totalFound': context_result['total_documents_found'],
                'contextLength': context_result['context_length'],
                'flaskBackendUsed': bool(generated_response),
                'processingTime': datetime.now().isoformat()
            }
        }
        
                 return result
        
        context = "\\n\\n".join(context_parts)
        
        return {
            'query': query,
            'context': context,
            'retrieved_documents': retrieved_docs,
            'num_retrieved': len(retrieved_docs),
            'context_length': len(context)
        }
    
    def generate_response(self, query: str, context: str) -> str:
        """Generate response using the language model"""
        if not CONFIG['generate_response'] or 'model' not in globals():
            return "Response generation not available. Context retrieval completed."
        
        try:
            # Create medical prompt
            prompt = f"""Based on the following medical information, provide a helpful and accurate response to the user's question.

Medical Context:
{context}

User Question: {query}

Response:"""
            
            # Generate response
            inputs = tokenizer.encode(prompt, return_tensors="pt", max_length=1024, truncation=True)
            inputs = inputs.to(model.device)
            
            with torch.no_grad():
                outputs = model.generate(
                    inputs,
                    max_length=inputs.shape[1] + 150,
                    num_return_sequences=1,
                    temperature=0.7,
                    do_sample=True,
                    pad_token_id=tokenizer.pad_token_id,
                    eos_token_id=tokenizer.eos_token_id,
                    no_repeat_ngram_size=2
                )
            
            # Decode response
            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
            
            # Extract only the generated part
            if "Response:" in response:
                response = response.split("Response:")[-1].strip()
            
            # Clean up response
            if not response or len(response) < 10:
                response = "Based on the available medical information, I recommend consulting with a healthcare professional for personalized advice."
            
            return response
            
        except Exception as e:
            return f"I apologize, but I encountered an error generating a response. Please consult with a healthcare professional. Error: {str(e)}"
    
    def query(self, question: str) -> Dict[str, Any]:
        """Main query method - retrieves context and generates response"""
        print(f"üîç Processing query: '{question}'")
        
        # Step 1: Retrieve relevant context
        retrieval_result = self.retrieve_context(question)
        print(f"üìö Retrieved {retrieval_result['num_retrieved']} relevant documents")
        print(f"üìè Context length: {retrieval_result['context_length']} characters")
        
        # Step 2: Generate response (if enabled)
        if CONFIG['generate_response']:
            print("ü§ñ Generating response...")
            response = self.generate_response(question, retrieval_result['context'])
        else:
            response = "Response generation disabled. Please review the retrieved context below."
        
        # Prepare final result
        result = {
            'question': question,
            'response': response,
            'context': retrieval_result['context'],
            'sources': [
                {
                    'source': doc['metadata']['source'],
                    'topic': doc['metadata']['topic'],
                    'similarity': round(doc['similarity_score'], 3),
                    'content_preview': doc['content'][:150] + "..."
                }
                for doc in retrieval_result['retrieved_documents']
            ],
            'metadata': {
                'num_sources': len(retrieval_result['retrieved_documents']),
                'context_length': retrieval_result['context_length'],
                'timestamp': datetime.now().isoformat()
            }
        }
        
        return result

# Initialize the RAG system
rag_system = WellnessRAGSystem(embedding_model, collection, CONFIG)
print("‚úÖ RAG system initialized and ready for queries!")


In [None]:
# Initialize the RAG system
rag_system = WellnessRAGSystem(
    embedding_model=embedding_model,
    supabase_client=supabase,
    config=CONFIG,
    flask_config=FLASK_CONFIG
)

print("üéØ WellnessGrid RAG system initialized!")
print("Ready to answer medical questions using Supabase + Flask backend integration")


In [None]:
# Test the RAG system with sample medical questions
test_questions = [
    "What are the symptoms of diabetes?",
    "How can I manage high blood pressure?",
    "What are the risk factors for heart disease?",
    "How do I know if I have depression?",
    "What foods should I eat for better nutrition?",
    "What are the side effects of metformin?"
]

def display_rag_result(result: Dict[str, Any]):
    """Display RAG query results in a nice format"""
    print("=" * 80)
    print(f"‚ùì QUESTION: {result['question']}")
    print("=" * 80)
    
    print(f"\\nü§ñ AI RESPONSE:")
    print(f"{result['response']}")
    
    print(f"\\nüìö SOURCES ({result['metadata']['num_sources']} documents):")
    for i, source in enumerate(result['sources'], 1):
        print(f"   {i}. {source['source']} - {source['topic']}")
        print(f"      üìä Similarity: {source['similarity']:.1%}")
        print(f"      üìÑ Preview: {source['content_preview']}")
        print()
    
    print(f"üìã METADATA:")
    print(f"   üîç Context length: {result['metadata']['context_length']} chars")
    print(f"   ‚è∞ Generated at: {result['metadata']['timestamp']}")
    print("\\n")

# Test with a few questions
print("üß™ Testing RAG system with sample questions...\\n")

for i, question in enumerate(test_questions[:3], 1):  # Test first 3 questions
    print(f"\\nüìã Test {i}/{len(test_questions[:3])}")
    result = rag_system.query(question)
    display_rag_result(result)


In [None]:
# Interactive query interface
def interactive_query():
    """Interactive interface for asking medical questions"""
    print("ü©∫ WellnessGrid RAG System - Interactive Query Interface")
    print("=" * 60)
    print("Ask any medical or wellness question. Type 'quit' to exit.")
    print("Example questions:")
    for q in test_questions[:3]:
        print(f"  ‚Ä¢ {q}")
    print("=" * 60)
    
    while True:
        try:
            # Get user input
            question = input("\\n‚ùì Your question: ").strip()
            
            if question.lower() in ['quit', 'exit', 'q']:
                print("üëã Thank you for using WellnessGrid RAG system!")
                break
            
            if not question:
                print("‚ö†Ô∏è Please enter a question.")
                continue
                
            # Process query
            print("\\nüîÑ Processing your question...")
            result = rag_system.query(question)
            
            # Display results
            display_rag_result(result)
            
            # Ask if user wants to continue
            continue_query = input("‚ùì Ask another question? (y/n): ").strip().lower()
            if continue_query in ['n', 'no']:
                print("üëã Thank you for using WellnessGrid RAG system!")
                break
                
        except KeyboardInterrupt:
            print("\\n\\nüëã Session ended by user. Goodbye!")
            break
        except Exception as e:
            print(f"‚ùå Error: {e}")
            print("Please try again with a different question.")

# Run interactive interface
print("üöÄ Starting interactive query interface...")
print("üí° Tip: Run this cell and then ask your medical questions!")
# Uncomment the next line to start interactive mode
# interactive_query()


In [None]:
# API-compatible function for WellnessGrid integration
def wellness_rag_api(question: str, user_context: Dict[str, Any] = None) -> Dict[str, Any]:
    """
    API-compatible function for WellnessGrid app integration
    Matches the expected format from the current Flask backend
    """
    try:
        # Process the question with RAG
        result = rag_system.query(question)
        
        # Format response to match Flask API
        api_response = {
            "response": result['response'],
            "sources": [
                {
                    "title": f"{source['source']} - {source['topic']}",
                    "content": source['content_preview'],
                    "similarity": source['similarity']
                }
                for source in result['sources']
            ],
            "mockMode": False,
            "metadata": {
                "num_sources": result['metadata']['num_sources'],
                "context_length": result['metadata']['context_length'],
                "model": CONFIG['embedding_model'],
                "timestamp": result['metadata']['timestamp']
            }
        }
        
        return api_response
        
    except Exception as e:
        # Return error response in Flask API format
        return {
            "response": f"I apologize, but I encountered an error processing your question: {str(e)}. Please consult with a healthcare professional.",
            "sources": [],
            "mockMode": True,
            "error": str(e)
        }

# Test the API function
print("üîó Testing WellnessGrid API integration...")
test_question = "What are the symptoms of diabetes?"
api_result = wellness_rag_api(test_question)

print(f"‚úÖ API Response Format:")
print(f"   üìù Response: {api_result['response'][:100]}...")
print(f"   üìö Sources: {len(api_result['sources'])} documents")
print(f"   üîß Mock mode: {api_result['mockMode']}")
print(f"   üìä Metadata: {list(api_result['metadata'].keys())}")

# Save API function for external use
print("\\nüíæ Saving API function for integration...")

api_code = '''
def wellness_rag_query(question: str, user_context: dict = None):
    """
    Standalone function for WellnessGrid RAG integration
    Copy this function to your Flask backend or API endpoint
    """
    # Load models and database (do this once at startup)
    from sentence_transformers import SentenceTransformer
    import chromadb
    
    # Configuration
    CONFIG = {
        "embedding_model": "NeuML/pubmedbert-base-embeddings",
        "collection_name": "wellness_medical_docs",
        "persist_directory": "./chroma_db",
        "top_k": 5
    }
    
    # Load models
    embedding_model = SentenceTransformer(CONFIG['embedding_model'])
    chroma_client = chromadb.PersistentClient(path=CONFIG['persist_directory'])
    collection = chroma_client.get_collection(name=CONFIG['collection_name'])
    
    # Query the RAG system
    rag_system = WellnessRAGSystem(embedding_model, collection, CONFIG)
    result = rag_system.query(question)
    
    return {
        "response": result['response'],
        "sources": result['sources'],
        "mockMode": False
    }
'''

with open('wellness_rag_api.py', 'w') as f:
    f.write(api_code)

print("‚úÖ API integration code saved to 'wellness_rag_api.py'")
print("\\nüìã Integration Steps:")
print("1. Copy the RAG system classes to your Flask backend")
print("2. Initialize the models once at startup")
print("3. Replace the current '/ask' endpoint logic with wellness_rag_api()")
print("4. Update the Flask API URL in your frontend if needed")


In [None]:
print("üéâ RAG Query System Setup Complete!")
print("=" * 60)
print("‚úÖ What's Working:")
print("   ‚Ä¢ PubMedBERT embeddings for medical domain")
print("   ‚Ä¢ Chroma vector database with persistent storage")
print("   ‚Ä¢ Semantic search across medical documents")
print("   ‚Ä¢ Source attribution and similarity scores")
print("   ‚Ä¢ API-ready format for WellnessGrid integration")
if CONFIG['generate_response']:
    print("   ‚Ä¢ BioGPT response generation")
else:
    print("   ‚Ä¢ Context retrieval (response generation disabled)")

print("\\nüîÑ Next Steps:")
print("1. üìù Test more queries using the interactive interface above")
print("2. üîó Integrate with your WellnessGrid Flask backend:")
print("   - Copy the WellnessRAGSystem class")
print("   - Update your /ask endpoint to use wellness_rag_api()")
print("   - Update FLASK_API_URL in your Next.js app")
print("3. üìä Monitor query performance and similarity scores")
print("4. üîÑ Re-run embed_documents.ipynb to add more medical sources")
print("5. üéØ Fine-tune similarity thresholds and context length")

print("\\nüí° Advanced Features to Add:")
print("   ‚Ä¢ User-specific document upload")
print("   ‚Ä¢ Query history and analytics")
print("   ‚Ä¢ Multi-language support")
print("   ‚Ä¢ Custom medical domain fine-tuning")
print("   ‚Ä¢ Integration with health records")

print("\\n‚ö†Ô∏è  Important Reminders:")
print("   ‚Ä¢ Always include medical disclaimers in responses")
print("   ‚Ä¢ Direct users to healthcare professionals for serious concerns")
print("   ‚Ä¢ Keep the vector database updated with latest medical information")
print("   ‚Ä¢ Monitor for hallucinations and incorrect medical advice")

print("\\nüìä Current System Stats:")
if embedding_summary:
    print(f"   üìÑ Documents in knowledge base: {embedding_summary['statistics']['total_documents']}")
    print(f"   üß© Total chunks: {embedding_summary['statistics']['total_chunks']}")
    print(f"   üìê Embedding dimensions: {embedding_summary['statistics']['embedding_dimension']}")
print(f"   üîç Retrieving top {CONFIG['top_k']} results per query")
print(f"   üìè Max context length: {CONFIG['max_context_length']} characters")

print(f"\\nüïê Session completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("=" * 60)
