In [1]:
from rag_document_generator import RAGDocumentGenerator
import os

In [2]:
UPC_LIST = [
    "044000069919", #Oreo Cakester
    "044000006792", #Chips ahoy
    "016000189102", #Fruit roll ups
    "016000167100", #Fruit by the foot
    "028400596008", #Hot fries
    "028400433303", #Hot Cheetos
    "889392010190", #Celcius peach vibe
    "611269174526", #Red Bull cranberry
    "041548750927", #Outshine pomegranate
    "085002805155", #Johnny pops cotton candy
]

In [4]:
OUTPUT_DIRECTORY = "./../data"  # Directory where documents will be saved
RATE_LIMIT_DELAY = 1.0  # Delay between API calls (seconds)

# Initialize the RAG document generator
generator = RAGDocumentGenerator(
    rate_limit_delay=RATE_LIMIT_DELAY,
    timeout=10
)

In [5]:
print("=== RAG Document Generator ===")
print(f"Generating documents for {len(UPC_LIST)} UPC codes")
print(f"Output directory: {OUTPUT_DIRECTORY}")
print(f"Rate limit delay: {RATE_LIMIT_DELAY} seconds")
print("-" * 50)

# Generate the documents
successful_upcs, failed_upcs = generator.generate_rag_documents(
    upc_list=UPC_LIST,
    output_dir=OUTPUT_DIRECTORY
)

# Generate collection metadata
metadata_path = generator.generate_collection_metadata(
    upc_list=UPC_LIST,
    successful_upcs=successful_upcs,
    failed_upcs=failed_upcs,
    output_dir=OUTPUT_DIRECTORY
)

# Print summary
print("\n" + "=" * 50)
print("GENERATION COMPLETE")
print("=" * 50)
print(f"📁 Documents saved to: {os.path.abspath(OUTPUT_DIRECTORY)}")
print(f"📊 Metadata saved to: {os.path.abspath(metadata_path)}")
print(f"✅ Successful documents: {len(successful_upcs)}")
print(f"❌ Failed/Not found: {len(failed_upcs)}")
print(f"📈 Success rate: {(len(successful_upcs) / len(UPC_LIST) * 100):.1f}%")

if successful_upcs:
    print(f"\n✅ Successfully processed UPCs:")
    for upc in successful_upcs:
        print(f"   • {upc}")

if failed_upcs:
    print(f"\n❌ Failed or not found UPCs:")
    for upc in failed_upcs:
        print(f"   • {upc}")

print(f"\n🎯 Your RAG document collection is ready!")
print(f"   Each document contains comprehensive product information")
print(f"   formatted for optimal retrieval in RAG applications.")

=== RAG Document Generator ===
Generating documents for 10 UPC codes
Output directory: ./../data
Rate limit delay: 1.0 seconds
--------------------------------------------------
Generating RAG documents for 10 UPC codes...
Processing UPC 1/10: 044000069919
✅ Successfully generated document for UPC 044000069919
Processing UPC 2/10: 044000006792
✅ Successfully generated document for UPC 044000006792
Processing UPC 3/10: 016000189102
✅ Successfully generated document for UPC 016000189102
Processing UPC 4/10: 016000167100
✅ Successfully generated document for UPC 016000167100
Processing UPC 5/10: 028400596008
✅ Successfully generated document for UPC 028400596008
Processing UPC 6/10: 028400433303
✅ Successfully generated document for UPC 028400433303
Processing UPC 7/10: 889392010190
✅ Successfully generated document for UPC 889392010190
Processing UPC 8/10: 611269174526
✅ Successfully generated document for UPC 611269174526
Processing UPC 9/10: 041548750927
✅ Successfully generated docume