In [5]:
from rag_document_generator import RAGDocumentGenerator
import os

In [None]:
UPC_LIST = [
    "044000069919", #Oreo Cakester
    "044000006792", #Chips ahoy
    "016000189102", #Fruit roll ups
    "016000167100", #Fruit by the foot
    "028400596008", #Hot fries
    "028400433303", #Hot Cheetos
    "889392010190", #Celcius peach vibe
    "611269174526", #Red Bull cranberry
    "041548750927", #Outshine pomegranate
    "077567021984", #Scribblers popsicles
    "028400704045", #Doritos Minis Nacho Cheese
    "028400160148", #Lay's Classic Potato Chips 10oz
    "060410050910", #Lay's Classic 28g
    "028400090094", #Lay's Potato Chips
    "028400091565", #Lay's Classic 1.5oz
    "028400199988", #Lay's Cheddar & Sour Cream
    "038000138607", #Pringles Salt & Vinegar
    "028400070560", #Doritos Nacho Cheese
    "028400443753", #Doritos Nacho Cheese (case pack)
    "028400443593", #Lay's Classic (case pack)
    "044000026912", #Oreo Double Stuff Creamsicle
    "044000025397", #Golden Oreos
    "044000002572", #Oreo Double Stuff Cookie Barz
    "044000003289", #Oreo Double Stuff Cookie Barz (variant)
    "044000057152", #Nabisco Variety Pack (Oreo, Nutter Butter, Chips Ahoy)
    "019320001987", #Oreo Original Cookies sleeve pack
    "044000069919", #Oreo Cakesters (from your original list)
    "044000006792", #Chips Ahoy (from your original list)
    "076840000661", #Ben & Jerry's Cookie-style product
    "076840000654", #Ben & Jerry's Cinnamon Buns related
    "076840101320", #Ben & Jerry's Half Baked
    "076840101184", #Ben & Jerry's Pistachio
    "076840100125", #Ben & Jerry's Chubby Hubby
    "076840100040", #Ben & Jerry's Ice Cream
    "076840101344", #Ben & Jerry's Everything But The...
    "077567254238", #Breyers Natural Vanilla
    "076840200153", #Ben & Jerry's Cherry Garcia 4oz
    "076840200160", #Ben & Jerry's Chocolate Fudge Brownie 3.6oz
    "076840100118", #Ben & Jerry's Coffee Toffee Bar Crunch
    "076840101542", #Ben & Jerry's Karamel Sutra Core
    "611269818994", #Red Bull Energy Drink 12oz
    "180854000507", #Red Bull Energy Drink Original
    "611269115482", #Red Bull Yellow Edition Tropical
    "070847811169", #Monster Energy Drink 16 fl oz
    "070847028291", #Monster Energy Original 6-pack
    "070847028406", #Monster Energy 473ml
    "611269836486", #Red Bull Sugar Free 24-pack
    "611269841237", #Red Bull Winter Edition Plum Twist
    "611269841244", #Red Bull Plum Twist Winter Edition 24-pack
]

In [7]:
OUTPUT_DIRECTORY = "./../data"  # Directory where documents will be saved
RATE_LIMIT_DELAY = 1.0  # Delay between API calls (seconds)

# Initialize the RAG document generator
generator = RAGDocumentGenerator(
    rate_limit_delay=RATE_LIMIT_DELAY,
    timeout=10
)

In [8]:
print("=== RAG Document Generator ===")
print(f"Generating documents for {len(UPC_LIST)} UPC codes")
print(f"Output directory: {OUTPUT_DIRECTORY}")
print(f"Rate limit delay: {RATE_LIMIT_DELAY} seconds")
print("-" * 50)

# Generate the documents
successful_upcs, failed_upcs = generator.generate_rag_documents(
    upc_list=UPC_LIST,
    output_dir=OUTPUT_DIRECTORY
)

# Generate collection metadata
metadata_path = generator.generate_collection_metadata(
    upc_list=UPC_LIST,
    successful_upcs=successful_upcs,
    failed_upcs=failed_upcs,
    output_dir=OUTPUT_DIRECTORY
)

# Print summary
print("\n" + "=" * 50)
print("GENERATION COMPLETE")
print("=" * 50)
print(f"📁 Documents saved to: {os.path.abspath(OUTPUT_DIRECTORY)}")
print(f"📊 Metadata saved to: {os.path.abspath(metadata_path)}")
print(f"✅ Successful documents: {len(successful_upcs)}")
print(f"❌ Failed/Not found: {len(failed_upcs)}")
print(f"📈 Success rate: {(len(successful_upcs) / len(UPC_LIST) * 100):.1f}%")

if successful_upcs:
    print(f"\n✅ Successfully processed UPCs:")
    for upc in successful_upcs:
        print(f"   • {upc}")

if failed_upcs:
    print(f"\n❌ Failed or not found UPCs:")
    for upc in failed_upcs:
        print(f"   • {upc}")

print(f"\n🎯 Your RAG document collection is ready!")
print(f"   Each document contains comprehensive product information")
print(f"   formatted for optimal retrieval in RAG applications.")

=== RAG Document Generator ===
Generating documents for 39 UPC codes
Output directory: ./../data
Rate limit delay: 1.0 seconds
--------------------------------------------------
Generating RAG documents for 39 UPC codes...
Processing UPC 1/39: 028400704045
✅ Successfully generated document for UPC 028400704045
Processing UPC 2/39: 028400160148
✅ Successfully generated document for UPC 028400160148
Processing UPC 3/39: 060410050910
✅ Successfully generated document for UPC 060410050910
Processing UPC 4/39: 028400090094
✅ Successfully generated document for UPC 028400090094
Processing UPC 5/39: 028400091565
✅ Successfully generated document for UPC 028400091565
Processing UPC 6/39: 028400199988
✅ Successfully generated document for UPC 028400199988
Processing UPC 7/39: 038000138607
✅ Successfully generated document for UPC 038000138607
Processing UPC 8/39: 028400070560
✅ Successfully generated document for UPC 028400070560
Processing UPC 9/39: 028400443753
Error fetching data for UPC 028