In [47]:
import sys

sys.path.append('..')

from src.config import SnowflakeConfig, validate_config
from src.utils.snowflake_helper import SnowflakeHelper
from snowflake.cortex import complete, extract_answer, summarize
from snowflake.core import Root

# Validate and connect
validate_config()
config = SnowflakeConfig()
sf_helper = SnowflakeHelper(config.get_connection_params())
session = sf_helper.connect()

print("✓ Cortex Search Service Test initialized")

✓ Configuration validated successfully
✓ Connected to Snowflake as harismad
  Role: "DEV_ROLE"
  Warehouse: "TEST_WAREHOUSE"
  Database: "TEST_DATABASE"
  Schema: "TEST_SCHEMA"
✓ Cortex Search Service Test initialized


### Prepare Data for Cortex Search

In [2]:
def prepare_search_content():
    """Prepare and enrich content for better search results"""

    # Combine SOP content with metadata for better search
    query = """
            SELECT SOP_ID,
                   SOP_TITLE || ' - ' || SOP_CATEGORY || '. ' || SOP_CONTENT as SEARCH_CONTENT,
                   SOP_TITLE,
                   SOP_CATEGORY,
                   DEPARTMENT,
                   SOP_CONTENT,
                   LAST_UPDATED
            FROM hospital_sop \
            """

    sop_df = sf_helper.execute_query(query)
    print(f"✓ Prepared {len(sop_df)} SOP records for search")

    # Create enriched facility descriptions
    query_facility = """
                     SELECT FACILITY_ID,
                            FACILITY_NAME || ' is a ' || FACILITY_TYPE ||
                            ' located at ' || LOCATION ||
                            '. Operating hours: ' || OPERATING_HOURS ||
                            '. Contact: ' || CONTACT_INFO as SEARCH_CONTENT,
                            FACILITY_NAME,
                            FACILITY_TYPE,
                            LOCATION,
                            CAPACITY,
                            CURRENT_USAGE,
                            OPERATING_HOURS,
                            STATUS
                     FROM hospital_facilities \
                     """

    facility_df = sf_helper.execute_query(query_facility)
    print(f"✓ Prepared {len(facility_df)} facility records for search")

    return sop_df, facility_df


sop_df, facility_df = prepare_search_content()

✓ Prepared 20 SOP records for search
✓ Prepared 25 facility records for search


In [3]:
# Display samples
print("\nSOP Search Content Sample:")
print(sop_df[['SOP_ID', 'SOP_TITLE']].head(3))

print("\nFacility Search Content Sample:")
print(facility_df[['FACILITY_ID', 'FACILITY_NAME']].head(3))


SOP Search Content Sample:
     SOP_ID                            SOP_TITLE
0  SOP-0001     Appointment Scheduling Procedure
1  SOP-0002  Personal Protective Equipment Usage
2  SOP-0003   Medication Administration Protocol

Facility Search Content Sample:
  FACILITY_ID     FACILITY_NAME
0    FAC-0001     MRI Scanner 5
1    FAC-0002         ICU Bed 4
2    FAC-0003  Emergency Room 2


### Create Cortex Search Service for SOPs

In [4]:
# First, create a view with search-optimized content
create_sop_search_view = """
                         CREATE
                         OR REPLACE VIEW sop_search_view AS
                         SELECT SOP_ID,
                                SOP_TITLE,
                                SOP_CATEGORY,
                                DEPARTMENT,
                                SOP_CONTENT,
                                SOP_TITLE || ' - ' || SOP_CATEGORY || '. ' ||
                                'Department: ' || DEPARTMENT || '. ' ||
                                SOP_CONTENT as SEARCH_DOCUMENT,
                                LAST_UPDATED,
                                VERSION
                         FROM hospital_sop \
                         """

try:
    session.sql(create_sop_search_view).collect()
    print("✓ Created SOP search view")
except Exception as e:
    print(f"Error creating view: {e}")

✓ Created SOP search view


In [6]:
# Create Cortex Search Service for SOPs
create_sop_search_service = """
CREATE OR REPLACE CORTEX SEARCH SERVICE sop_search_service
ON SEARCH_DOCUMENT
ATTRIBUTES SOP_ID, SOP_TITLE, SOP_CATEGORY, DEPARTMENT, SOP_CONTENT
WAREHOUSE = TEST_WAREHOUSE
TARGET_LAG = '1 minute'
AS (
    SELECT
        SOP_ID,
        SOP_TITLE,
        SOP_CATEGORY,
        DEPARTMENT,
        SOP_CONTENT,
        SEARCH_DOCUMENT
    FROM sop_search_view
)
"""

try:
    session.sql(create_sop_search_service).collect()
    print("✓ Created Cortex Search Service: sop_search_service")
    print("  Note: Service may take a few minutes to build index")
except Exception as e:
    print(f"Note: {e}")
    print("  Search service might already exist or needs permissions")

✓ Created Cortex Search Service: sop_search_service
  Note: Service may take a few minutes to build index


### Create Cortex Search Service for Facilities

In [7]:
create_facility_search_view = """
                              CREATE
                              OR REPLACE VIEW facility_search_view AS
                              SELECT FACILITY_ID,
                                     FACILITY_NAME,
                                     FACILITY_TYPE,
                                     LOCATION,
                                     FACILITY_NAME || ' is a ' || FACILITY_TYPE ||
                                     ' located at ' || LOCATION ||
                                     '. Capacity: ' || CAPACITY ||
                                     '. Operating hours: ' || OPERATING_HOURS ||
                                     '. Equipment: ' || EQUIPMENT_LIST ||
                                     '. Contact: ' || CONTACT_INFO as SEARCH_DOCUMENT,
                                     CAPACITY,
                                     CURRENT_USAGE,
                                     OPERATING_HOURS,
                                     CONTACT_INFO,
                                     STATUS
                              FROM hospital_facilities
                              WHERE STATUS = 'OPERATIONAL' \
                              """

try:
    session.sql(create_facility_search_view).collect()
    print("✓ Created facility search view")
except Exception as e:
    print(f"Error creating view: {e}")

✓ Created facility search view


In [8]:
create_facility_search_service = """
CREATE OR REPLACE CORTEX SEARCH SERVICE facility_search_service
ON SEARCH_DOCUMENT
ATTRIBUTES FACILITY_ID, FACILITY_NAME, FACILITY_TYPE, LOCATION, CAPACITY, OPERATING_HOURS
WAREHOUSE = TEST_WAREHOUSE
TARGET_LAG = '1 minute'
AS (
    SELECT
        FACILITY_ID,
        FACILITY_NAME,
        FACILITY_TYPE,
        LOCATION,
        CAPACITY,
        CURRENT_USAGE,
        OPERATING_HOURS,
        CONTACT_INFO,
        STATUS,
        SEARCH_DOCUMENT
    FROM facility_search_view
)
"""

try:
    session.sql(create_facility_search_service).collect()
    print("✓ Created Cortex Search Service: facility_search_service")
except Exception as e:
    print(f"Note: {e}")

✓ Created Cortex Search Service: facility_search_service


### Test Cortex Search - Basic Queries

In [30]:
def cortex_search_sop(query: str, limit: int = 5):
    root = Root(session)
    my_service = (
        root
        .databases["TEST_DATABASE"]
        .schemas["TEST_SCHEMA"]
        .cortex_search_services["SOP_SEARCH_SERVICE"]
    )
    resp = my_service.search(
        query=query,
        columns=["SEARCH_DOCUMENT", "SOP_ID", "SOP_TITLE", "SOP_CATEGORY"],
        limit=limit,
    )
    return resp


def cortex_search_facility(query: str, limit: int = 5):
    root = Root(session)
    my_service = (
        root
        .databases["TEST_DATABASE"]
        .schemas["TEST_SCHEMA"]
        .cortex_search_services["FACILITY_SEARCH_SERVICE"]
    )
    resp = my_service.search(
        query=query,
        columns=["SEARCH_DOCUMENT", "FACILITY_ID", "FACILITY_NAME", "FACILITY_TYPE", "LOCATION", "CAPACITY"],
        limit=limit,
    )
    return resp

In [31]:
def print_clean_sop_results(result):
    results = result.to_dict().get('results', [])
    for idx, sop in enumerate(results, 1):
        print(f"{idx}. {sop['SOP_TITLE']} [{sop['SOP_CATEGORY']}] (ID: {sop['SOP_ID']})")
        snippet = sop['SEARCH_DOCUMENT'][:100] + "..." if 'SEARCH_DOCUMENT' in sop else ""
        score = sop.get('@scores', {}).get('cosine_similarity', None)
        if score is not None:
            print(f"   Similarity: {score:.2f}")
        print(f"   Snippet: {snippet}\n")


def print_clean_facility_results(result):
    results = result.to_dict().get('results', [])
    for idx, fac in enumerate(results, 1):
        print(f"{idx}. {fac['FACILITY_NAME']} [{fac['FACILITY_TYPE']}] (ID: {fac['FACILITY_ID']})")
        snippet = fac['SEARCH_DOCUMENT'][:100] + "..." if 'SEARCH_DOCUMENT' in fac else ""
        score = fac.get('@scores', {}).get('cosine_similarity', None)
        if score is not None:
            print(f"   Similarity: {score:.2f}")
        print(f"   Snippet: {snippet}\n")

In [32]:
# Test searches
print("=== Testing Cortex Search ===\n")

# Test 1: Natural language SOP search
print("Test 1: 'How to handle patient emergencies?'")
result1 = cortex_search_sop("How to handle patient emergencies?", limit=3)
if result1 is not None:
    print_clean_sop_results(result1)
else:
    print("  Note: Search service might still be indexing. Try again in a few minutes.")

print("\nTest 2: 'infection control and hygiene'")
result2 = cortex_search_sop("infection control and hygiene", limit=3)
if result2 is not None:
    print_clean_sop_results(result2)
else:
    print("  Note: Search service might still be indexing. Try again in a few minutes.")

=== Testing Cortex Search ===

Test 1: 'How to handle patient emergencies?'
1. Medical Emergency Response [Emergency Procedures] (ID: SOP-0004)
   Similarity: 0.49
   Snippet: Medical Emergency Response - Emergency Procedures. Department: Radiology. This is the detailed proce...

2. Patient Admission Procedure [Patient Care] (ID: SOP-0005)
   Similarity: 0.44
   Snippet: Patient Admission Procedure - Patient Care. Department: ICU. This is the detailed procedure for Pati...

3. Personal Protective Equipment Usage [Safety Protocol] (ID: SOP-0002)
   Similarity: 0.44
   Snippet: Personal Protective Equipment Usage - Safety Protocol. Department: Emergency. This is the detailed p...


Test 2: 'infection control and hygiene'
1. Infection Control Measures [Safety Protocol] (ID: SOP-0007)
   Similarity: 0.56
   Snippet: Infection Control Measures - Safety Protocol. Department: Administration. This is the detailed proce...

2. Infection Control Measures [Safety Protocol] (ID: SOP-0017)
   Simil

### Compare Traditional SQL vs Cortex Search

In [33]:
def compare_search_methods(query: str):
    """Compare traditional ILIKE search vs Cortex Search"""

    print(f"\n{'=' * 60}")
    print(f"Query: '{query}'")
    print(f"{'=' * 60}")

    # Traditional SQL search
    sql_query = f"""
    SELECT
        SOP_ID,
        SOP_TITLE,
        SOP_CATEGORY,
        DEPARTMENT
    FROM hospital_sop
    WHERE SOP_TITLE ILIKE '%{query}%'
       OR SOP_CONTENT ILIKE '%{query}%'
       OR SOP_CATEGORY ILIKE '%{query}%'
    LIMIT 5
    """

    sql_results = sf_helper.execute_query(sql_query)

    print(f"\n📊 Traditional SQL Search ({len(sql_results)} results):")
    if not sql_results.empty:
        for idx, row in sql_results.iterrows():
            print(f"  {idx + 1}. {row['SOP_TITLE']}")
    else:
        print("  No results found")

    # Cortex Search
    print(f"\n🤖 Cortex Semantic Search:")
    cortex_results = cortex_search_sop(query, limit=5)
    if cortex_results is not None:
        print_clean_sop_results(cortex_results)
    else:
        print("  Service still indexing or no results")

    return sql_results, cortex_results

In [34]:
# Run comparisons
test_queries = [
    "patient safety",
    "emergency response",
    "admission process"
]

for query in test_queries:
    sql_res, cortex_res = compare_search_methods(query)


Query: 'patient safety'

📊 Traditional SQL Search (0 results):
  No results found

🤖 Cortex Semantic Search:
1. Personal Protective Equipment Usage [Safety Protocol] (ID: SOP-0008)
   Similarity: 0.46
   Snippet: Personal Protective Equipment Usage - Safety Protocol. Department: Outpatient. This is the detailed ...

2. Infection Control Measures [Safety Protocol] (ID: SOP-0017)
   Similarity: 0.47
   Snippet: Infection Control Measures - Safety Protocol. Department: Radiology. This is the detailed procedure ...

3. Patient Feedback Management [Quality Assurance] (ID: SOP-0020)
   Similarity: 0.44
   Snippet: Patient Feedback Management - Quality Assurance. Department: Radiology. This is the detailed procedu...

4. Patient Feedback Management [Quality Assurance] (ID: SOP-0012)
   Similarity: 0.43
   Snippet: Patient Feedback Management - Quality Assurance. Department: Laboratory. This is the detailed proced...

5. Infection Control Measures [Safety Protocol] (ID: SOP-0007)
   Similarit

### Advanced RAG with Cortex Search + Complete

In [48]:
class AdvancedRAGAgent:
    """Advanced RAG agent using Cortex Search and Complete"""

    def __init__(self, sf_helper, model: str = "mistral-7b"):
        self.sf_helper = sf_helper
        self.model = model

    def search_and_answer(self, question: str, search_limit: int = 3) -> dict:
        search_results = cortex_search_sop(question, limit=search_limit)
        results = search_results.to_dict().get('results', []) if search_results else []
        if not results:
            return {
                "question": question,
                "answer": "I don't have enough information to answer this question. The search service might still be indexing.",
                "sources": []
            }
        context_parts = []
        sources = []
        for sop in results:
            context_parts.append(
                f"[SOP {sop['SOP_ID']}] {sop['SOP_TITLE']}\n"
                f"Category: {sop['SOP_CATEGORY']}\n"
                f"Content: {sop.get('SOP_CONTENT', '')}\n"
            )
            sources.append({
                "sop_id": sop['SOP_ID'],
                "title": sop['SOP_TITLE'],
                "category": sop['SOP_CATEGORY']
            })
        context = "\n---\n".join(context_parts)
        prompt = f"""You are a hospital staff assistant. Answer the question based ONLY on the provided SOPs.

Available SOPs:
{context}

Question: {question}

Instructions:
- Provide a clear, concise answer
- Cite specific SOP IDs when referencing information
- If the question cannot be answered from the SOPs, say so
- Be professional and helpful

Answer:"""
        answer = complete(self.model, prompt, session=session)
        return {
            "question": question,
            "answer": answer,
            "sources": sources,
            "num_sources": len(sources)
        }

    def search_facility_and_answer(self, question: str, search_limit: int = 3) -> dict:
        search_results = cortex_search_facility(question, limit=search_limit)
        results = search_results.to_dict().get('results', []) if search_results else []
        if not results:
            return {
                "question": question,
                "answer": "I don't have enough information to answer this question. The search service might still be indexing.",
                "sources": []
            }
        context_parts = []
        sources = []
        for fac in results:
            context_parts.append(
                f"[Facility {fac['FACILITY_ID']}] {fac['FACILITY_NAME']}\n"
                f"Type: {fac['FACILITY_TYPE']}\n"
                f"Location: {fac['LOCATION']}\n"
                f"Capacity: {fac.get('CAPACITY', '')}\n"
                f"Description: {fac.get('SEARCH_DOCUMENT', '')}\n"
            )
            sources.append({
                "facility_id": fac['FACILITY_ID'],
                "name": fac['FACILITY_NAME'],
                "type": fac['FACILITY_TYPE'],
                "location": fac['LOCATION']
            })
        context = "\n---\n".join(context_parts)
        prompt = f"""You are a hospital staff assistant. Answer the question based ONLY on the provided facilities.

Available Facilities:
{context}

Question: {question}

Instructions:
- Provide a clear, concise answer
- Cite specific Facility IDs when referencing information
- If the question cannot be answered from the facilities, say so
- Be professional and helpful

Answer:"""
        answer = complete(self.model, prompt, session=session)
        return {
            "question": question,
            "answer": answer,
            "sources": sources,
            "num_sources": len(sources)
        }

    def multi_document_qa(self, question: str) -> dict:
        search_results = cortex_search_sop(question, limit=5)
        results = search_results.to_dict().get('results', []) if search_results else []
        if not results:
            return {
                "question": question,
                "answer": "No relevant documents found",
                "sources": []
            }
        answers = []
        for sop in results:
            try:
                extracted = extract_answer(
                    sop.get('SOP_CONTENT', ''),
                    question,
                    session=session
                )
                if extracted and extracted.strip():
                    answers.append({
                        "source": sop['SOP_ID'],
                        "title": sop['SOP_TITLE'],
                        "answer": extracted
                    })
            except Exception as e:
                print(f"Extract error for {sop['SOP_ID']}: {e}")
        if len(answers) > 1:
            combined = "\n\n".join([f"{a['title']}: {a['answer']}" for a in answers])
            synthesis_prompt = f"""Synthesize these answers into one coherent response:

{combined}

Original question: {question}

Provide a unified, clear answer:"""
            final_answer = complete(self.model, synthesis_prompt, session=session)
        elif len(answers) == 1:
            final_answer = answers[0]['answer']
        else:
            final_answer = "Could not extract a clear answer from the available documents."
        return {
            "question": question,
            "answer": final_answer,
            "sources": [{"sop_id": a['source'], "title": a['title']} for a in answers],
            "detailed_answers": answers
        }

    def summarize_sop_category(self, category: str) -> dict:
        query = f"""
        SELECT SOP_ID, SOP_TITLE, SOP_CONTENT
        FROM hospital_sop
        WHERE SOP_CATEGORY = '{category}'
        LIMIT 10
        """
        results = self.sf_helper.execute_query(query)
        if results.empty:
            return {
                "category": category,
                "summary": f"No SOPs found in category: {category}"
            }
        all_content = "\n\n".join([
            f"{row['SOP_TITLE']}: {row['SOP_CONTENT']}"
            for _, row in results.iterrows()
        ])
        summary = summarize(all_content, session=session)
        return {
            "category": category,
            "num_sops": len(results),
            "summary": summary,
            "sop_titles": results['SOP_TITLE'].tolist()
        }


# Initialize advanced agent
rag_agent = AdvancedRAGAgent(sf_helper)
print("✓ Advanced RAG Agent initialized")

✓ Advanced RAG Agent initialized


In [49]:
print("\n=== Testing Advanced RAG Agent ===\n")

# Test 1: Search and Answer
print("Test 1: Search and Answer")
print("-" * 60)
result1 = rag_agent.search_and_answer(
    "What are the steps for patient admission?"
)
print(f"Question: {result1['question']}")
print(f"Answer: {result1['answer']}")
print(f"\nSources ({result1['num_sources']}):")
for src in result1['sources']:
    print(f"  - [{src['sop_id']}] {src['title']}")

# Test 2: Multi-document QA
print("\n\nTest 2: Multi-Document QA")
print("-" * 60)
result2 = rag_agent.multi_document_qa(
    "How should staff handle emergency situations?"
)
print(f"Question: {result2['question']}")
print(f"Answer: {result2['answer']}")
print(f"\nSources:")
for src in result2['sources']:
    print(f"  - [{src['sop_id']}] {src['title']}")

# Test 3: Category Summary
print("\n\nTest 3: Category Summary")
print("-" * 60)
result3 = rag_agent.summarize_sop_category("Patient Care")
print(f"Category: {result3['category']}")
print(f"Number of SOPs: {result3['num_sops']}")
print(f"Summary:\n{result3['summary']}")


=== Testing Advanced RAG Agent ===

Test 1: Search and Answer
------------------------------------------------------------
Question: What are the steps for patient admission?
Answer:  Based on the provided SOPs, the steps for patient admission are as follows:

1. The patient is required to report to the Admitting Desk or Registration Area.
2. The patient's demographic information, such as name, date of birth, and insurance details, is collected and recorded.
3. The patient is assessed by a nurse or other healthcare professional to determine their medical condition and the level of care required.
4. The patient's vital signs, such as temperature, blood pressure, pulse, and respiratory rate, are measured and recorded.
5. The patient is given a hospital identification bracelet and a hospital gown.
6. The patient's room and bed are assigned, and they are shown to their room.
7. The patient's belongings are labeled and stored in the designated area.
8. The patient is introduced to their he