In [None]:
# Check the file loading
with open('content_tree2.pkl', 'rb') as f:
    
    tree = pickle.load(f)
    print(tree.root.child_nodes[1].child_nodes[1].header_embedding)

In [None]:
def test_rag_functionality():
    """Test the RAG function with various queries."""
    print("="*80)
    print("TESTING RAG FUNCTIONALITY")
    print("="*80)

    print("Load content tree .....")
    with open('content_tree2.pkl', 'rb') as f:
        tree = pickle.load(f)
    
    print("\n" + "="*80)
    print("RAG QUERY TESTING")
    print("="*80)
    
    # Test queries - from basic to complex
    test_queries = [
        "What is chemistry?",
        "What are the phases of matter?",
        "How do you measure density?",
        "What is the scientific method?",
        "How do you calculate significant figures?",
        "What are atoms and molecules?",
        "What is temperature measurement?",
        "What is the difference between accuracy and precision?",
        "How do you perform dimensional analysis?",
        "What are the domains of chemistry?",
        "What is quantum mechanics?",  # Should not be found
        "How do you build a rocket?",  # Should not be found
    ]
    
    for i, query in enumerate(test_queries, 1):
        print(f"\n{'='*60}")
        print(f"Query {i}: {query}")
        print(f"{'='*60}")
        
        try:
            # Test with top-1 node (default)
            answer = tree.rag_query(query, top_k=1)
            print(f"\nAnswer (top-1 node):")
            print(f"{answer}")
            
            # For some queries, also test with top-3 nodes
            if i <= 5:  # Only for first 5 queries to save time
                print(f"\n{'-'*40}")
                print(f"Testing with top-3 nodes:")
                answer_top3 = tree.rag_query(query, top_k=3)
                print(f"\nAnswer (top-3 nodes):")
                print(f"{answer_top3}")
        
        except Exception as e:
            print(f"Error processing query '{query}': {e}")
    
    print("\n" + "="*80)
    print("RAG FUNCTIONALITY TEST COMPLETE!")
    print("="*80)

if __name__ == "__main__":
    test_rag_functionality()

In [None]:
print("Load content tree .....")
with open('content_tree2.pkl', 'rb') as f:
    tree = pickle.load(f)


# Test query with search analysis
#query = "What are the phases of matter?"
query = "What is the major elements of earth crust and air? What is the weight percentage of oxygen  on earth?"
print(f"\nAnalyzing query: {query}")

# Step 1: Show search results
search_results = tree.enhanced_search(query, max_results=1)
print(f"\nSearch results (top 1):")

all_nodes = tree.tree_node_iterator()
node_map = {node.node_id: node for node in all_nodes}

for i, (node_id, score) in enumerate(search_results, 1):
    if node_id in node_map:
        node = node_map[node_id]
        print(f"  {i}. [Node {node_id}] {node.header}")
        print(f"     Score: {score:.4f}")
        print(f"     Content preview: {node.content_text[:100]}...")
        print()

# Step 2: Show RAG answer
print(f"{'='*60}")
print("RAG Answer:")
print(f"{'='*60}")
answer = tree.rag_query(query, top_k=1)
print(answer)

print("\n" + "="*80)
print("SEARCH ANALYSIS COMPLETE!")
print("="*80)

In [None]:
# Test customized weights for scoring
from parameters import (
    DEFAULT_PARAMETERS, 
    SEMANTIC_FOCUSED_PARAMETERS,
    LEXICAL_FOCUSED_PARAMETERS,
    create_custom_parameters
)

custom_config = create_custom_parameters(
    # Semantic similarity weights
    semantic_header=0.2,
    semantic_summary=0.2,
    semantic_content=0.2,
    semantic_chunks=0.2,
    semantic_sentences=0.2,
    # N-gram weights  
    ngram_monogram=1.0,
    ngram_bigram=2.0,
    ngram_trigram=4.0,
    # Combined search weights
    combined_semantic=0.6,
    combined_lexical=0.4
)

#query = "Give me an example of heterogeneous mixture in daily life"
#query = "What is the major elements of earth crust and air? What is the weight percentage of oxygen  on earth?"
query = "What is periodic table? Who discovered it?"
query = "Show me the figure of the periodic table"
time1 = time.time()
answer = tree.rag_query(query, top_k=2, custom_params=custom_config, debug=True)
print(answer)
print("Time = ", time.time() - time1)
    

In [None]:
query = "Show me the figure of the periodic table"

def content_tree_copy(source_tree, target_tree):
    target_tree.root = source_tree.root
    target_tree._node_counter = source_tree._node_counter
    target_tree.inverse_index = source_tree.inverse_index
    target_tree.inverse_index_builder = source_tree.inverse_index_builder

target_tree = ContentTree()
content_tree_copy(tree, target_tree)

In [None]:
time1 = time.time()
answer = target_tree.rag_query(query, top_k=2, custom_params=custom_config, debug=True)
print("\n\nFinal answer")
print(answer)
print("Time = ", time.time() - time1)

The figure of the periodic table is shown in **Figure A1** in the provided content. Here is the relevant information:

- **Figure A1**: The Periodic Table
- **Image Link**:
![Image](Appendix_A_images/img-0.jpeg)

This figure is described as "The Periodic Table," a fundamental chart in chemistry organizing elements by atomic structure and properties. The image link provided in the content is the visual representation of the periodic table. 

**Answer**: The figure of the periodic table is **Figure A1**, and its image link is ![Image](Appendix_A_images/img-0.jpeg).

In [None]:
from openai import OpenAI
client = OpenAI()

response = client.responses.create(
    model="gpt-5",
    input="How much gold would it take to coat the Statue of Liberty in a 1mm layer?",
    reasoning={
        "effort": "minimal"
    }
)

print(response)