#### 1. Setup and Configuration

In [None]:
%pip install boto3

import os
import json
from concurrent.futures import ThreadPoolExecutor
from JEDI_Search_Task.Neo4jRetrieval import Neo4jRetrieval
from JEDI_Search_Task.BedrockQuery import BedrockQuery

# Define the search question
question = "Identify all British Columbia regulations that contain clauses that specify how the regulation applies to goods or services originating outside the province"

# Neo4j Configuration
NEO4J_URI = "bolt://neo4j:7687"
NEO4J_USERNAME = "admin"
NEO4J_PASSWORD = "admin"

# AWS Bedrock Configuration
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
bedrock = BedrockQuery(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)

#### 2. Generate Initial Terms from User Query

In [None]:
def generate_initial_terms(query):
    prompt = f"""
    Given the following legal search query, generate a list of 8-12 relevant search terms that would help identify regulations with clauses about goods/services from outside the province.
    
    Query: "{query}"
    
    Focus on terms related to:
    - Jurisdictional scope and territorial application
    - Cross-border commerce and trade
    - Provincial regulatory authority
    - Goods and services classification
    
    Return the terms as a Python list format only, no explanations.
    Example format: ["term1", "term2", "term3"]
    """
    
    response = bedrock.get_response(prompt)
    try:
        import ast
        terms = ast.literal_eval(response.strip())
        return terms
    except:
        return ["interprovincial", "extraprovincial", "out-of-province", "territorial", "jurisdiction"]

initial_terms = generate_initial_terms(question)
print("Initial terms:", initial_terms)

#### 3. Refine and Expand Terms

In [None]:
def refine_terms(initial_terms):
    refined_terms = []
    
    for term in initial_terms:
        expand_prompt = f"""
        Given the search term "{term}" in the context of British Columbia regulations about goods/services from outside the province, generate 2-3 closely related legal terms or synonyms.
        
        Focus on:
        - Legal terminology variations
        - Regulatory language alternatives
        - Jurisdictional concepts
        
        Return the terms as a Python list format only, no explanations.
        Example format: ["term1", "term2", "term3"]
        """
        
        response = bedrock.get_response(expand_prompt)
        try:
            new_terms = [term.strip().strip('"').strip("'") for term in response.split(",")]
            refined_terms.extend(new_terms)
        except:
            refined_terms.append(term)
    
    # Remove duplicates and combine with original terms
    all_terms = list(set(initial_terms + refined_terms))
    return all_terms

refined_terms = refine_terms(initial_terms)
print("Refined terms:", refined_terms)
print(f"Total terms: {len(refined_terms)}")

#### 4. Search Neo4j Database for Matching Nodes

In [None]:
# Use the generated terms for searching
terms = refined_terms

neo4j_worker = Neo4jRetrieval(NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD)
nodes = neo4j_worker.search_many(terms)
print(f"{len(nodes)} nodes found using generated terms")
neo4j_worker.close()

#### 5. Define Relevance Assessment Function

In [None]:
def create_prompt(question, node):
    prompt = f"""
    Question: {question}
    
    Text: {node.get('text', '')}
    
    Does this text contain information relevant to the question? 
    Specifically, does it describe how regulations apply to goods or services from outside British Columbia?
    
    Respond with only 1 for yes or 0 for no.
    """
    return prompt

#### 6. Filter Nodes for Relevance Using LLM

In [None]:
related_nodes = []

def add_or_dispose(node):
    # Use LLM to determine if the node is relevant
    prompt = create_prompt(question, node)
    bedrock_response = bedrock.get_response(prompt)
    try:
        if int(bedrock_response.strip()):
            related_nodes.append(node)
            print("Relevant:", node.get("text")[:100] + "...")
        else:
            print("Not relevant:", node.get("text")[:50] + "...")
    except:
        pass

# Process nodes in parallel
with ThreadPoolExecutor(4) as executor:
    executor.map(add_or_dispose, nodes)
   
print(f"{len(related_nodes)} relevant nodes found")

#### 7. Save Relevant Nodes to File

In [None]:
# Save the related nodes for later use
with open("related_nodes.jsonl", "w") as f:
    for node in related_nodes:
        f.write(json.dumps(node) + "\n")

print("Relevant nodes saved to related_nodes.jsonl")

#### 8. Generate Final Report

In [None]:
# Load nodes from file (if needed)
with open("related_nodes.jsonl", "r") as f:
    related_nodes = [json.loads(line) for line in f]
    related_nodes = related_nodes[:100]  # Limit for demonstration
    
    formatted_nodes = [{
        "document": node.get("regId") or node.get("actId"), 
        "section_number": node.get("sectionNumber")
    } for node in related_nodes]
    
    # Generate report table
    prompt = f"""
    You are a report generator. Generate a table outlining BC regulations with clauses about out-of-province goods/services.
    
    Create a Markdown table with two columns: Document and Section Number.
    Do not repeat the same combination of document and section number.
    
    Here is the data:
    {json.dumps(formatted_nodes, indent=2)}
    """
    
    bedrock_response = bedrock.get_response(prompt)
    print("Final Report:")
    print("=" * 50)
    print(bedrock_response.strip())