#### 1. Setup and Configuration

In [10]:
%pip install boto3

import os
import json
from concurrent.futures import ThreadPoolExecutor
from Neo4jRetrieval import Neo4jRetrieval
from BedrockQuery import BedrockQuery

# Define the search question
question = "Identify all British Columbia regulations that contain clauses that specify how the regulation applies to goods or services originating outside the province"

# Neo4j Configuration
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USERNAME = "admin"
NEO4J_PASSWORD = "admin"

# AWS Bedrock Configuration
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID", "")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY", "")

bedrock = BedrockQuery(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


#### 2. Generate Initial Terms from User Query

In [11]:
def generate_initial_terms(question):
    prompt = f"""
    Based on this question about British Columbia regulations:
    "{question}"
    
    Generate 8-10 search terms focused on INTERPROVINCIAL and INTERTERRITORIAL trade barriers within Canada only (NOT international trade):
    
    BROAD terms (3-4 terms) - General concepts:
    - Examples: "interprovincial", "interterritorial", "out-of-province", "jurisdiction"
    
    SPECIFIC terms (4-6 terms) - Precise legal/regulatory phrases:  
    - Examples: "interprovincial trade barriers", "territorial application within Canada", "cross-provincial commerce"
    
    Focus on terms related to:
    - Trade barriers between Canadian provinces and territories
    - Provincial regulatory authority over out-of-province goods/services
    - Interprovincial goods and services classification
    - Territorial boundaries within Canada
    
    Return the terms as a Python list format only, no explanations.
    Example format: ["broad_term1", "specific_phrase_here", "broad_term2", "detailed_regulatory_concept"]
    """
    
    response = bedrock.get_response(prompt)
    try:
        import ast
        terms = ast.literal_eval(response.strip())
        return terms
    except:
        # Fallback with mix of broad and specific
        return ["extraprovincial", "place of supply rules", "interprovincial", "cross-border commerce", 
                "out-of-province", "territorial application", "jurisdiction", "goods and services classification"]

# Generate new terms based on the original question and existing terms
def generate_additional_terms(question, existing_terms):
    existing_terms_str = ", ".join(existing_terms)
    
    prompt = f"""
    Based on this original question about British Columbia regulations:
    "{question}"
    
    We already have these search terms: {existing_terms_str}
    
    Generate 4-6 NEW search terms that maintain a balance of BROAD and SPECIFIC terms, focused on INTERPROVINCIAL/INTERTERRITORIAL trade within Canada only:
    
    BROAD terms (2-3 new terms) - Single words or short general concepts:
    - Examples: "territorial", "jurisdictional", "provincial"
    
    SPECIFIC terms (2-3 new terms) - Multi-word precise legal phrases:
    - Examples: "interprovincial supply chain regulations", "territorial compliance requirements", "inter-jurisdictional trade rules within Canada"
    
    Ensure these are different from existing terms and focus on:
    - Trade barriers between Canadian provinces and territories
    - Provincial regulatory authority over other provinces/territories
    - Interprovincial goods and services classification
    
    Stay grounded to the original question: "{question}"
    
    If no relevant new terms can be generated, return an empty list: []
    
    Return the terms as a Python list format only, no explanations.
    Example format: ["broad_term", "specific multi-word phrase", "another_broad_term"] or []
    """
    
    response = bedrock.get_response(prompt)
    try:
        import ast
        terms = ast.literal_eval(response.strip())
        return terms if terms else []
    except:
        return []

# Generate initial terms
all_terms = []
initial_terms = generate_initial_terms(question)
all_terms.extend(initial_terms)
print("Initial terms:", initial_terms)

# Loop to generate additional terms
max_iterations = 6  # Prevent infinite loops
iteration = 0

while iteration < max_iterations:
    new_terms = generate_additional_terms(question, all_terms)
    if not new_terms:  # No more terms to generate
        print(f"No more new terms generated after {iteration + 1} iterations")
        break
    
    all_terms.extend(new_terms)
    print(f"Iteration {iteration + 1} - New terms added:", new_terms)
    iteration += 1

print(f"Total terms generated: {len(all_terms)}")
print("All terms:", all_terms)

Initial terms: ['interprovincial', 'interterritorial', 'out-of-province', 'cross-provincial', 'interprovincial trade barriers', 'territorial application within Canada', 'provincial regulatory jurisdiction', 'interprovincial goods classification', 'cross-border services within Canada', 'internal trade agreement compliance']
Iteration 1 - New terms added: ['extraprovincial', 'intranational', 'interprovincial regulatory harmonization', 'cross-boundary trade compliance', 'provincial market access rules']
Iteration 2 - New terms added: ['extraterritorial', 'interjurisdictional', 'interprovincial regulatory reciprocity agreements', 'cross-border provincial business licensing', 'Canadian internal trade regulatory compliance']
Iteration 3 - New terms added: ['transboundary', 'interregional', 'provincial regulatory equivalency agreements', 'cross-jurisdiction goods classification', 'interprovincial regulatory alignment']
Iteration 4 - New terms added: ['intracanadian', 'subnational', 'interprov

#### 3. Refine and Expand Terms

In [None]:
def refine_terms(all_terms):
    refined_terms = []
    blacklisted_terms = []
    
    # First, generate blacklisted terms
    blacklist_prompt = f"""
    Based on this original question about British Columbia regulations:
    "{question}"
    
    Generate a list of blacklisted terms that would give FALSE POSITIVES for interprovincial/interterritorial trade within Canada.
    
    Focus on terms that relate to:
    - International trade (outside Canada)
    - Foreign countries or jurisdictions
    - Global commerce
    - Import/export with non-Canadian entities
    - Terms that might confuse interprovincial with international
    
    Stay grounded to the original question: "{question}"
    Keep in mind we want out-of-province but NOT out-of-country content.
    
    Return 5-8 blacklisted terms as a Python list format only, no explanations.
    Example format: ["international", "foreign trade", "import duties", "export regulations"]
    """
    
    blacklist_response = bedrock.get_response(blacklist_prompt)
    try:
        import ast
        blacklisted_terms = ast.literal_eval(blacklist_response.strip())
        blacklisted_terms = [term.lower() for term in blacklisted_terms]
    except:
        blacklisted_terms = ["international", "foreign", "import", "export", "overseas", "global trade"]
    
    print(f"Blacklisted terms: {blacklisted_terms}")
    
    # Then refine existing terms
    for term in all_terms:
        is_broad = len(term.split()) <= 2 and not any(keyword in term.lower() 
                     for keyword in ['rule', 'regulation', 'requirement', 'compliance', 'application'])
        
        expand_prompt = f"""
        Based on this original question about British Columbia regulations:
        "{question}"
        
        Expand this {"broad" if is_broad else "specific"} term: "{term}"
        
        Focus on interprovincial/interterritorial trade within Canada only (NOT international):
        - Trade barriers between Canadian provinces and territories
        - Out-of-province but within Canada regulations
        - Provincial regulatory authority over other provinces/territories
        
        Stay grounded to the original question: "{question}"
        
        Generate 2-4 related {"broad" if is_broad else "specific"} terms that enhance search coverage for:
        - Regulatory language alternatives  
        - Jurisdictional concepts within Canada
        {"- Keep terms general and short (1-2 words)" if is_broad else "- Keep terms specific and detailed (multi-word phrases)"}
        
        Return the terms as a Python list format only, no explanations.
        Example format: {["\"broad1\"", "\"broad2\"", "\"broad3\""] if is_broad else ["\"specific detailed phrase 1\"", "\"specific regulatory concept 2\"", "\"specific legal term 3\""]}
        """
        
        response = bedrock.get_response(expand_prompt)
        try:
            import ast
            expanded_terms = ast.literal_eval(response.strip())
            refined_terms.extend(expanded_terms)
        except:
            refined_terms.append(term)  # Keep original if expansion fails
    
    return refined_terms, blacklisted_terms

refined_terms_nested, blacklisted_terms = refine_terms(all_terms)
print("Refined terms (nested):", refined_terms_nested)
print("Blacklisted terms:", blacklisted_terms)

# Flatten nested list of terms to a single list
def flatten_terms(nested_terms):
    flattened = []
    for item in nested_terms:
        if isinstance(item, list):
            flattened.extend(item)
        else:
            flattened.append(item)
    return flattened

# Flatten the nested terms
flattened_terms = flatten_terms(refined_terms_nested)
print(f"Flattened terms ({len(flattened_terms)}): {flattened_terms}")

# Remove surrounding quotes from terms
def remove_quotes(terms):
    cleaned_terms = []
    for term in terms:
        # Remove surrounding quotes if they exist
        cleaned_term = term.strip('"\'')
        cleaned_terms.append(cleaned_term)
    return cleaned_terms

# Clean the terms
cleaned_terms = remove_quotes(flattened_terms)
print(f"Cleaned terms ({len(cleaned_terms)}): {cleaned_terms}")

# Convert all terms to lowercase and remove duplicates
def lowercase_terms(terms):
    lowercase_terms = list(set([term.lower() for term in terms]))
    return lowercase_terms

lowercased_terms = lowercase_terms(cleaned_terms)
print(f"Lowercased unique terms ({len(lowercased_terms)}): {lowercased_terms}")

# Filter out blacklisted terms
def filter_blacklisted_terms(terms, blacklisted_terms):
    filtered_terms = [term for term in terms if term not in blacklisted_terms]
    return filtered_terms

final_terms = filter_blacklisted_terms(lowercased_terms, blacklisted_terms)
print(f"Final filtered terms ({len(final_terms)}): {final_terms}")

Blacklisted terms: ['international', 'foreign', 'global', 'worldwide', 'customs', 'tariffs', 'non-canadian', 'overseas']
Refined terms (nested): ['"interprovincial"', '"cross-border"', '"extraprovincial"', '"interjurisdictional"', '"interprovincial"', '"extraprovincial"', '"cross-border"', '"interjurisdictional"', '"interprovincial"', '"cross-border"', '"extraterritorial"', '"jurisdiction"', '"interprovincial"', '"extraterritorial"', '"domestic trade"', '"provincial jurisdiction"', 'interprovincial trade restrictions', 'extraterritorial application of provincial regulations', 'mutual recognition of standards between provinces', 'harmonization of provincial regulatory frameworks', '"interprovincial trade regulations"', '"extraterritorial application of provincial laws"', '"cross-border goods and services within Canada"', '"mutual recognition of provincial standards"', '"interprovincial trade regulations"', '"extraterritorial application of provincial laws"', '"cross-border goods and ser

#### 4. Search Neo4j Database for Matching Nodes

In [15]:
# Use the generated terms for searching
terms = final_terms

neo4j_worker = Neo4jRetrieval(NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD)
nodes = neo4j_worker.search_many(terms)
print(f"{len(nodes)} nodes found using generated terms")
neo4j_worker.close()

2809 nodes found using generated terms


#### 5. Define Relevance Assessment Function

In [16]:
def create_prompt(question, node):
    prompt = f"""
    Your task is to determine if the following object is related to the question: "{question}".

    Object: {node}

    Focus on content that addresses:
    - Trade barriers between Canadian provinces and territories
    - Out-of-province (but within Canada) goods and services
    - Interprovincial regulatory compliance
    - NOT international or out-of-country trade

    Stay grounded to the original question about interprovincial/interterritorial regulations within Canada.

    If the object is related to interprovincial/interterritorial trade within Canada, return "1". 
    If it is not related or deals with international trade, return "0".
    Do not return any explanation, just the number.

    For example:
    Question: "What BC regulations have clauses about extraprovincial services?"
    Object: {{ "text": "This regulation applies to services provided from Alberta to British Columbia customers." }}
    Response: 1
    """
    return prompt

#### 6. Filter Nodes for Relevance Using LLM

In [17]:
related_nodes = []

# Use LLM to determine if the node is relevant
def add_or_dispose(node):
    prompt = create_prompt(question, node)
    bedrock_response = bedrock.get_response(prompt)
    try:
        print(bedrock_response.strip())
        if int(bedrock_response.strip()):
            related_nodes.append(node)
            print("Relevant:", node.get("text")[:100] + "...")
        else:
            print("Not relevant:", node.get("text")[:50] + "...")
    except:
        pass

# Process nodes in parallel
with ThreadPoolExecutor(10) as executor:
    executor.map(add_or_dispose, nodes)
    
print(f"{len(related_nodes)} relevant nodes found")

0
Not relevant: accident claim means a claim over which the tribun...
0
Not relevant: request for tribunal resolution means a request un...
0
Not relevant: initiating party means the person who made the ini...
0
Not relevant: tribunal means the civil resolution tribunal refer...
0
Not relevant: order giving effect to a final decision means a tr...
0
Not relevant: general authority of tribunal — claim categories 2...
0
Not relevant: child means a person who is under 19 years of age ...
0
Not relevant: 1 a person who has a claim that is within the juri...
0
Not relevant: cooperative association claim means a claim over w...
0
Not relevant: dispute means the claims that are to be resolved i...
0
Not relevant: 1 the government may not be a party to a tribunal ...
0
Not relevant: 1 the tribunal may refuse to resolve a claim or a ...
0
Not relevant: time limit — court determination about matters wit...
0
Not relevant: 4 if one or more of the persons served, or deemed ...
0
Not relevant: 2 su

#### 7. Save Relevant Nodes to File

In [18]:
# Save the related nodes for later use
with open("related_nodes.jsonl", "w") as f:
    for node in related_nodes:
        f.write(json.dumps(node) + "\n")

print("Relevant nodes saved to related_nodes.jsonl")

Relevant nodes saved to related_nodes.jsonl


#### 8. Generate Final Report

In [19]:
# Load nodes from file (if needed)
with open("related_nodes.jsonl", "r") as f:
    related_nodes = [json.loads(line) for line in f]
    related_nodes = related_nodes[:100]  # Limit for demonstration
    
    formatted_nodes = [{
        "document": node.get("regId") or node.get("actId"), 
        "section_number": node.get("sectionNumber")
    } for node in related_nodes]
    
    # Generate report table
    prompt = f"""
    You are a report generator. Generate a table outlining BC regulations with clauses about out-of-province goods/services.
    
    Create a Markdown table with two columns: Document and Section Number.
    Do not repeat the same combination of document and section number.
    
    Here is the data:
    {json.dumps(formatted_nodes, indent=2)}
    """
    
    bedrock_response = bedrock.get_response(prompt)
    print("Final Report:")
    print("=" * 50)
    print(bedrock_response.strip())

Final Report:
Here's the Markdown table with two columns: Document and Section Number, without repeating the same combination of document and section number:

| Document | Section Number |
|----------|----------------|
| Architects (Landscape) Act | 6 |
| Carbon Tax Act | 14 |
| Civil Forfeiture Act | 1 |
| Cooperative Association Act | 1 |
| Credit Union Incorporation Act | 1 |
| Emergency Health Services Act | 10 |
| Emergency Health Services Act | 1 |
| Emergency Health Services Act | 5.6 |
| Employment Standards Act | 119 |
| Enforcement of Canadian Judgments and Decrees Act | 1 |
| Environmental Assessment Act | 42 |
| Family Law Act | 191 |
| Financial Institutions Act | 1 |
| Financial Institutions Act | 157 |
| Financial Institutions Act | 164 |
| Financial Institutions Act | 160 |
| Financial Institutions Act | 165 |
| Financial Institutions Act | 211 |
| Fish and Seafood Act | 45 |
| Food and Agricultural Products Classification Act | 31 |
| Health Professions Act | 12.2 |
| 