#### 1. Setup and Configuration

In [None]:
%pip install boto3

import os
import json
from concurrent.futures import ThreadPoolExecutor
from Neo4jRetrieval import Neo4jRetrieval
from BedrockQuery import BedrockQuery

# Define the search question
question = "Identify all British Columbia regulations that contain clauses that specify how the regulation applies to goods or services originating outside the province"

# Neo4j Configuration
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USERNAME = "admin"
NEO4J_PASSWORD = "admin"

# AWS Bedrock Configuration
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID", "")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY", "")

bedrock = BedrockQuery(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)

#### 2. Generate Initial Terms from User Query

In [None]:
def generate_initial_terms(question):
    prompt = f"""
    Based on this question about British Columbia regulations:
    "{question}"
    
    Generate 8-10 search terms that include BOTH broad and specific terms:
    
    BROAD terms (3-4 terms) - General concepts:
    - Examples: "extraprovincial", "interprovincial", "out-of-province", "jurisdiction"
    
    SPECIFIC terms (4-6 terms) - Precise legal/regulatory phrases:  
    - Examples: "place of supply rules", "cross-border commerce", "territorial application"
    
    Focus on terms related to:
    - Cross-border commerce and trade
    - Provincial regulatory authority
    - Goods and services classification
    
    Return the terms as a Python list format only, no explanations.
    Example format: ["broad_term1", "specific_phrase_here", "broad_term2", "detailed_regulatory_concept"]
    """
    
    response = bedrock.get_response(prompt)
    try:
        import ast
        terms = ast.literal_eval(response.strip())
        return terms
    except:
        # Fallback with mix of broad and specific
        return ["extraprovincial", "place of supply rules", "interprovincial", "cross-border commerce", 
                "out-of-province", "territorial application", "jurisdiction", "goods and services classification"]

# Generate new terms based on the original question and existing terms
def generate_additional_terms(question, existing_terms):
    existing_terms_str = ", ".join(existing_terms)
    
    prompt = f"""
    Based on this original question about British Columbia regulations:
    "{question}"
    
    We already have these search terms: {existing_terms_str}
    
    Generate 4-6 NEW search terms that maintain a balance of BROAD and SPECIFIC terms:
    
    BROAD terms (2-3 new terms) - Single words or short general concepts:
    - Examples: "territorial", "jurisdictional", "provincial"
    
    SPECIFIC terms (2-3 new terms) - Multi-word precise legal phrases:
    - Examples: "supply chain regulations", "import compliance requirements", "inter-jurisdictional trade rules"
    
    Ensure these are different from existing terms and focus on:
    - Cross-border commerce and trade
    - Provincial regulatory authority  
    - Goods and services classification
    
    If no relevant new terms can be generated, return an empty list: []
    
    Return the terms as a Python list format only, no explanations.
    Example format: ["broad_term", "specific multi-word phrase", "another_broad_term"] or []
    """
    
    response = bedrock.get_response(prompt)
    try:
        import ast
        terms = ast.literal_eval(response.strip())
        return terms if terms else []
    except:
        return []

# Generate initial terms
all_terms = []
initial_terms = generate_initial_terms(question)
all_terms.extend(initial_terms)
print("Initial terms:", initial_terms)

# Loop to generate additional terms
max_iterations = 6  # Prevent infinite loops
iteration = 0

while iteration < max_iterations:
    new_terms = generate_additional_terms(question, all_terms)
    if not new_terms:  # No more terms to generate
        print(f"No more new terms generated after {iteration + 1} iterations")
        break
    
    all_terms.extend(new_terms)
    print(f"Iteration {iteration + 1} - New terms added:", new_terms)
    iteration += 1

print(f"Total terms generated: {len(all_terms)}")
print("All terms:", all_terms)

#### 3. Refine and Expand Terms

In [None]:
def refine_terms(initial_terms):
    refined_terms = []
    
    for term in initial_terms:
        # Determine if term is broad or specific based on word count and content
        is_broad = len(term.split()) <= 2 and not any(keyword in term.lower() 
                     for keyword in ['rule', 'regulation', 'requirement', 'compliance', 'application'])
        
        expand_prompt = f"""
        Given the search term "{term}" in the context of British Columbia regulations about goods/services from outside the province:
        
        {"This appears to be a BROAD term." if is_broad else "This appears to be a SPECIFIC term."}
        
        Generate 2-3 related terms that maintain the same specificity level:
        
        {"For this BROAD term, provide other broad synonyms or general concepts." if is_broad else 
         "For this SPECIFIC term, provide other specific legal phrases or detailed regulatory concepts."}
        
        Focus on:
        - Legal terminology variations
        - Regulatory language alternatives  
        - Jurisdictional concepts
        {"- Keep terms general and short (1-2 words)" if is_broad else "- Keep terms specific and detailed (multi-word phrases)"}
        
        Return the terms as a Python list format only, no explanations.
        Example format: {["broad1", "broad2", "broad3"] if is_broad else ["specific detailed phrase 1", "specific regulatory concept 2", "specific legal term 3"]}
        """
        
        response = bedrock.get_response(expand_prompt)
        try:
            import ast
            expanded_terms = ast.literal_eval(response.strip())
            refined_terms.append(expanded_terms)
        except:
            refined_terms.append([term])  # Keep original if expansion fails
    
    return refined_terms

refined_terms_nested = refine_terms(all_terms)  # Use all_terms instead of initial_terms
print("Refined terms (nested):", refined_terms_nested)

#Flatten nested list of terms to a single list
def flatten_terms(nested_terms):
    flattened = []
    for item in nested_terms:
        if isinstance(item, list):
            flattened.extend(item)
        else:
            flattened.append(item)
    return flattened

# Flatten the nested terms
flattened_terms = flatten_terms(refined_terms_nested)
print(f"Flattened terms ({len(flattened_terms)}): {flattened_terms}")

# Convert all terms to lowercase and remove duplicates
def lowercase_terms(terms):
    lowercase_terms = list(set([term.lower() for term in terms]))
    return lowercase_terms

# Convert to lowercase and remove duplicates
refined_terms = lowercase_terms(flattened_terms)
print(f"Final refined terms ({len(refined_terms)}): {refined_terms}")

#### 4. Search Neo4j Database for Matching Nodes

In [None]:
# Use the generated terms for searching
terms = refined_terms

neo4j_worker = Neo4jRetrieval(NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD)
nodes = neo4j_worker.search_many(terms)
print(f"{len(nodes)} nodes found using generated terms")
neo4j_worker.close()

#### 5. Define Relevance Assessment Function

In [5]:
def create_prompt(question, node):
    prompt = f"""
    Your task is to determine if the following object is related to the question: "{question}".

    Object: {node}

    If the object is related, "1". If it is not related, "0".
    Do not return any explanation, just the number.

    For example:
    Question: "What is the capital of France?"
    Object: {{ "text": "Paris is the capital of France." }}
    Response: 1
    """
    return prompt

#Question: {question}
        
#Text: {node.get('text', '')}
        
#Does this text contain information relevant to the question?
        
#    Specifically, does it describe how regulations apply to goods or 
#    services from outside British Columbia?
        
#    Respond with only 1 for yes or 0 for no.

#### 6. Filter Nodes for Relevance Using LLM

In [None]:
related_nodes = []

def add_or_dispose(node):
    # Use LLM to determine if the node is relevant
    prompt = create_prompt(question, node)
    bedrock_response = bedrock.get_response(prompt)
    try:
        print(bedrock_response.strip())
        if int(bedrock_response.strip()):
            related_nodes.append(node)
            print("Relevant:", node.get("text")[:100] + "...")
        else:
            print("Not relevant:", node.get("text")[:50] + "...")
    except:
        pass

# Process nodes in parallel
with ThreadPoolExecutor(10) as executor:
    executor.map(add_or_dispose, nodes)
    
print(f"{len(related_nodes)} relevant nodes found")

#### 7. Save Relevant Nodes to File

In [None]:
# Save the related nodes for later use
with open("related_nodes.jsonl", "w") as f:
    for node in related_nodes:
        f.write(json.dumps(node) + "\n")

print("Relevant nodes saved to related_nodes.jsonl")

#### 8. Generate Final Report

In [None]:
# Load nodes from file (if needed)
with open("related_nodes.jsonl", "r") as f:
    related_nodes = [json.loads(line) for line in f]
    related_nodes = related_nodes[:100]  # Limit for demonstration
    
    formatted_nodes = [{
        "document": node.get("regId") or node.get("actId"), 
        "section_number": node.get("sectionNumber")
    } for node in related_nodes]
    
    # Generate report table
    prompt = f"""
    You are a report generator. Generate a table outlining BC regulations with clauses about out-of-province goods/services.
    
    Create a Markdown table with two columns: Document and Section Number.
    Do not repeat the same combination of document and section number.
    
    Here is the data:
    {json.dumps(formatted_nodes, indent=2)}
    """
    
    bedrock_response = bedrock.get_response(prompt)
    print("Final Report:")
    print("=" * 50)
    print(bedrock_response.strip())