In [None]:
# Install required packages for Google Colab
%pip install transformers torch sentence-transformers --quiet
%pip install flask flask-cors pyngrok --quiet
%pip install supabase python-dotenv --quiet
%pip install sacremoses --quiet


In [None]:
import os
import json
from datetime import datetime
from typing import List, Dict, Any, Optional
import torch
from getpass import getpass

# AI Models
from transformers import AutoTokenizer, AutoModelForCausalLM

# Supabase
from supabase import create_client

# Flask API
from flask import Flask, request, jsonify
from flask_cors import CORS

print("üì¶ All packages imported successfully!")
print(f"üïê RAG session started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"üîß Using device: {'cuda' if torch.cuda.is_available() else 'cpu'}")
print("ü§ñ Loading BioGPT model for medical text generation...")


In [None]:
# Load BioGPT for medical text generation
print("üß† Loading BioGPT for medical text generation...")
tokenizer = AutoTokenizer.from_pretrained("microsoft/BioGPT")
biogpt_model = AutoModelForCausalLM.from_pretrained(
    "microsoft/BioGPT",
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True
)

# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
biogpt_model = biogpt_model.to(device)

# Set pad token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print(f"‚úÖ BioGPT loaded and moved to {device}")

# Setup Supabase connection
print("üóÑÔ∏è Setting up Supabase connection...")
print("Please enter your Supabase credentials:")
supabase_url = getpass("Supabase URL: ")
supabase_key = getpass("Supabase Service Role Key: ")

supabase = create_client(supabase_url, supabase_key)
print("‚úÖ Supabase client initialized")

# Configuration
CONFIG = {
    "top_k": 5,
    "similarity_threshold": 0.5,
    "max_context_length": 2000,
    "max_response_length": 150,
}

print(f"\n‚öôÔ∏è RAG Configuration:")
print(f"   üéØ Retrieve top {CONFIG['top_k']} similar documents")
print(f"   üìä Similarity threshold: {CONFIG['similarity_threshold']}")
print(f"   üìè Max context length: {CONFIG['max_context_length']} chars")


In [None]:
# Supabase document retrieval functions
def query_supabase_documents(query: str, top_k: int = None) -> List[Dict[str, Any]]:
    """Query Supabase for similar documents using vector search"""
    try:
        from sentence_transformers import SentenceTransformer
        
        # Load the same embedding model used for indexing
        print(f"üîç Loading embedding model for query: {query[:50]}...")
        embedding_model = SentenceTransformer('NeuML/pubmedbert-base-embeddings')
        
        top_k = top_k or CONFIG['top_k']
        
        # Generate embedding for the query
        print(f"üß† Generating embedding vector...")
        query_embedding = embedding_model.encode([query])[0].tolist()
        
        # Use the correct RPC function from schema.sql: search_embeddings
        print(f"üîç Searching embeddings with threshold {CONFIG['similarity_threshold']}...")
        result = supabase.rpc('search_embeddings', {
            'query_embedding': query_embedding,
            'match_threshold': CONFIG['similarity_threshold'],
            'match_count': top_k
        }).execute()
        
        if result.data:
            documents = []
            for i, doc in enumerate(result.data):
                documents.append({
                    'content': doc.get('chunk_content', ''),  # Correct field name from RPC
                    'similarity_score': doc.get('similarity', 0.0),
                    'metadata': {
                        'title': doc.get('title', 'Medical Document'),
                        'source': doc.get('source', 'unknown'),
                        'topic': doc.get('topic', 'general'),
                        'document_type': doc.get('document_type', 'unknown'),
                        'document_id': doc.get('document_id', '')
                    },
                    'rank': i + 1,
                    'doc_id': doc.get('document_id', '')
                })
            
            print(f"üìä Found {len(documents)} similar documents from Supabase")
            return documents
        else:
            print("‚ö†Ô∏è No similar documents found in Supabase")
            return []
            
    except Exception as e:
        print(f"‚ùå Error querying Supabase: {str(e)}")
        # Fallback: try direct table query if RPC function doesn't exist
        try:
            print("üîÑ Trying fallback query method...")
            result = supabase.table('medical_documents').select('*').limit(top_k).execute()
            
            if result.data:
                documents = []
                for i, doc in enumerate(result.data[:top_k]):
                    documents.append({
                        'content': doc.get('content', ''),
                        'similarity_score': 0.8,  # Default similarity
                        'metadata': {
                            'title': doc.get('title', 'Medical Document'),
                            'source': doc.get('source', 'unknown'),
                            'topic': doc.get('topic', 'general'),
                            'document_type': doc.get('document_type', 'unknown'),
                            'document_id': doc.get('id', '')
                        },
                        'rank': i + 1,
                        'doc_id': doc.get('id', '')
                    })
                
                print(f"üìä Fallback: Retrieved {len(documents)} documents from Supabase")
                return documents
            
        except Exception as fallback_error:
            print(f"‚ùå Fallback query also failed: {str(fallback_error)}")
            return []

# Test Supabase connection and RPC functions
print("üß™ Testing Supabase connection...")
try:
    # Test basic connection
    test_result = supabase.table('medical_documents').select('count').execute()
    doc_count = len(test_result.data) if test_result.data else 0
    print(f"‚úÖ Supabase connected - Found {doc_count} documents in database")
    
    # Test RPC function availability
    print("üß™ Testing RPC functions...")
    try:
        stats_result = supabase.rpc('get_document_stats').execute()
        if stats_result.data:
            print("‚úÖ RPC functions working")
            for stat in stats_result.data[:3]:  # Show first 3 document sources
                print(f"   üìÑ {stat['source']}: {stat['count']} documents")
        else:
            print("‚ö†Ô∏è RPC function exists but returned no data")
    except Exception as rpc_error:
        print(f"‚ö†Ô∏è RPC function test failed: {str(rpc_error)}")
        print("   Vector search will use fallback method")
        
except Exception as e:
    print(f"‚ö†Ô∏è Supabase connection test failed: {str(e)}")
    print("   The system will still work but may have limited document retrieval")


In [None]:
def generate_biogpt_response(prompt: str, max_length: int = 150) -> str:
    """Generate medical response using BioGPT"""
    try:
        inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)
        
        with torch.no_grad():
            outputs = biogpt_model.generate(
                inputs,
                max_length=len(inputs[0]) + max_length,
                num_return_sequences=1,
                temperature=0.7,
                do_sample=True,
                pad_token_id=tokenizer.pad_token_id,
                eos_token_id=tokenizer.eos_token_id,
                no_repeat_ngram_size=2
            )
        
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        if prompt in response:
            response = response.replace(prompt, "").strip()
        
        if not response or len(response) < 10:
            response = "I understand your question about health. Please consult with a healthcare professional for personalized medical advice."
        
        return response
    except Exception as e:
        return f"I apologize, but I encountered an error processing your question. Please try rephrasing your question or consult with a healthcare professional."

class WellnessRAGSystem:
    """RAG system for medical/wellness queries using Supabase and BioGPT"""
    
    def __init__(self, config):
        self.config = config
    
    def retrieve_context(self, query: str) -> Dict[str, Any]:
        """Retrieve relevant document chunks from Supabase"""
        retrieved_docs = query_supabase_documents(query, self.config['top_k'])
        
        context_parts = []
        total_chars = 0
        
        for doc in retrieved_docs:
            if total_chars + len(doc['content']) <= self.config['max_context_length']:
                context_parts.append(f"Source: {doc['metadata']['source']}\n{doc['content']}")
                total_chars += len(doc['content'])
            else:
                break
        
        context = "\n\n".join(context_parts)
        
        return {
            'query': query,
            'context': context,
            'retrieved_documents': retrieved_docs,
            'total_documents_found': len(retrieved_docs),
            'documents_used': len(retrieved_docs),
            'context_length': len(context)
        }
    
    def query(self, question: str) -> Dict[str, Any]:
        """Complete RAG query: retrieve context and generate response"""
        print(f"üîç Processing query: {question}")
        
        context_result = self.retrieve_context(question)
        
        print(f"üìä Found {context_result['total_documents_found']} similar documents")
        print(f"üìÑ Using {context_result['documents_used']} documents for context")
        
        print("ü§ñ Generating response using BioGPT...")
        medical_prompt = f"Medical Question: {question}\n{context_result['context']}\nAnswer:"
        generated_response = generate_biogpt_response(medical_prompt, self.config['max_response_length'])
        print("‚úÖ Response generated successfully")
        
        result = {
            'query': question,
            'response': generated_response,
            'sources': [
                {
                    'title': doc['metadata'].get('title', 'Medical Document'),
                    'source': doc['metadata']['source'],
                    'topic': doc['metadata']['topic'],
                    'similarity': f"{doc['similarity_score']:.3f}",
                    'rank': doc['rank'],
                    'content_preview': doc['content'][:150] + "..."
                }
                for doc in context_result['retrieved_documents']
            ],
            'metadata': {
                'documentsUsed': context_result['documents_used'],
                'totalFound': context_result['total_documents_found'],
                'contextLength': context_result['context_length'],
                'model': 'BioGPT',
                'embeddings': 'Supabase pgvector',
                'processingTime': datetime.now().isoformat()
            }
        }
        
        return result

# Initialize the RAG system
rag_system = WellnessRAGSystem(config=CONFIG)
print("‚úÖ WellnessGrid RAG system initialized!")


In [None]:
# Flask API Setup
app = Flask(__name__)
CORS(app)

@app.route('/query', methods=['POST'])
def query_docs():
    """Query similar documents from Supabase"""
    data = request.get_json()
    query = data.get("query", "")
    top_k = data.get("top_k", CONFIG['top_k'])
    
    if not query:
        return jsonify({"error": "Missing 'query' field."}), 400

    results = query_supabase_documents(query, top_k=top_k)
    return jsonify(results)

@app.route('/ask', methods=['POST'])
def ask_rag():
    """Main RAG endpoint for WellnessGrid app"""
    try:
        data = request.get_json()
        question = data.get("question", "")
        
        if not question:
            return jsonify({"error": "Missing 'question' field."}), 400
        
        result = rag_system.query(question)
        
        return jsonify({
            "response": result['response'],
            "sources": [
                {
                    "title": source['title'], 
                    "content": source['content_preview'],
                    "similarity": float(source['similarity'])
                }
                for source in result['sources']
            ],
            "mockMode": False,
            "metadata": result['metadata']
        })
        
    except Exception as e:
        return jsonify({
            "response": f"I apologize, but I encountered an error processing your question: {str(e)}",
            "sources": [],
            "mockMode": True,
            "error": str(e)
        }), 500

@app.route('/health', methods=['GET'])
def health_check():
    """Health check endpoint"""
    try:
        # Test Supabase connection
        test_result = supabase.table('medical_documents').select('count').execute()
        doc_count = len(test_result.data) if test_result.data else 0
        
        return jsonify({
            "status": "healthy",
            "model": "BioGPT",
            "database": "Supabase + pgvector",
            "documents_in_db": doc_count,
            "rag_system": "active"
        })
    except Exception as e:
        return jsonify({
            "status": "partial",
            "model": "BioGPT", 
            "database": "Supabase (connection issues)",
            "documents_in_db": "unknown",
            "rag_system": "active",
            "warning": str(e)
        })

print("üåê Flask API endpoints configured:")
print("  - POST /ask - Main RAG endpoint for WellnessGrid")
print("  - GET /health - Health check")
print("  - POST /query - Query documents from Supabase")
print("‚úÖ Ready to start ngrok tunnel and Flask server")


In [None]:
# Test the RAG system
print("üß™ Testing RAG system with sample question...")

test_question = "What are the symptoms of diabetes?"
try:
    print(f"üîç Testing query: {test_question}")
    result = rag_system.query(test_question)
    
    print("\n" + "=" * 80)
    print(f"‚ùì QUESTION: {result['query']}")
    print("=" * 80)
    
    print(f"\nü§ñ AI RESPONSE:")
    print(f"{result['response']}")
    
    print(f"\nüìö SOURCES ({result['metadata']['documentsUsed']} documents):")
    if result['sources']:
        for i, source in enumerate(result['sources'], 1):
            print(f"   {i}. {source['title']} - {source['source']}")
            print(f"      üìä Similarity: {source['similarity']}")
            print(f"      üìÑ Preview: {source['content_preview']}")
            print()
    else:
        print("   ‚ö†Ô∏è No sources found - this could indicate:")
        print("   ‚Ä¢ No documents in database yet")
        print("   ‚Ä¢ Similarity threshold too high")
        print("   ‚Ä¢ RPC function needs adjustment")
    
    print(f"\nüìä Metadata:")
    print(f"   üîß Model: {result['metadata']['model']}")
    print(f"   üíæ Embeddings: {result['metadata']['embeddings']}")
    print(f"   üìÑ Documents Used: {result['metadata']['documentsUsed']}")
    print(f"   üéØ Total Found: {result['metadata']['totalFound']}")
    
    print("‚úÖ RAG system test completed!")
    
except Exception as e:
    print(f"‚ö†Ô∏è RAG test failed: {str(e)}")
    print("   This might be normal if:")
    print("   ‚Ä¢ Supabase connection needs adjustment")
    print("   ‚Ä¢ No documents have been embedded yet")
    print("   ‚Ä¢ RPC function is not deployed")
    print("   The Flask server will still start and you can test via the API")


In [None]:
# ngrok setup and Flask server startup
from pyngrok import ngrok

print("üîë Please enter your ngrok auth token (get it from https://ngrok.com)")
print("   Your token will be hidden for security")
token = getpass("Enter ngrok auth token: ")

ngrok.set_auth_token(token)
print("‚úÖ Ngrok token set successfully!")

# Validate setup
print("üîç Validating setup...")

try:
    # Check everything is loaded
    assert biogpt_model is not None, "BioGPT model not loaded"
    assert tokenizer is not None, "Tokenizer not loaded"
    assert supabase is not None, "Supabase client not initialized"
    assert rag_system is not None, "RAG system not initialized"
    
    print(f"‚úÖ BioGPT model: Loaded")
    print(f"‚úÖ Supabase client: Initialized")
    print(f"‚úÖ RAG system: Initialized")
    print(f"‚úÖ Device: {device}")
    
    # Test Supabase connection one more time
    try:
        test_result = supabase.table('medical_documents').select('count').execute()
        doc_count = len(test_result.data) if test_result.data else 0
        print(f"‚úÖ Supabase: Connected ({doc_count} documents)")
    except Exception as db_error:
        print(f"‚ö†Ô∏è Supabase: Connection issues ({str(db_error)})")
        print("   RAG system will still work but may have limited retrieval")
    
    # Start ngrok tunnel
    print("üåê Starting ngrok tunnel...")
    public_url = ngrok.connect(5000)
    print(f"üåç Public URL: {public_url}")
    print("üìã Copy this URL to your WellnessGrid app configuration!")
    
    # Start Flask app
    print("üöÄ Starting Flask app...")
    print("üì° Available endpoints:")
    print("  - POST /ask - Main RAG endpoint for WellnessGrid")
    print("  - GET /health - Health check")
    print("  - POST /query - Query documents from Supabase")
    print("\n‚ö†Ô∏è  Keep this cell running to maintain the server!")
    print("\nüéØ Your WellnessGrid RAG system is now live!")
    
    app.run(host='0.0.0.0', port=5000, debug=False)
    
except Exception as e:
    print(f"‚ùå Setup validation failed: {str(e)}")
    print("Please run all previous cells in order before starting the Flask app.")
