# Catalog Maintenance Manual Testing

This notebook provides interactive testing for the catalog maintenance system.

## Features
- Test Pinecone index connections
- Run queries against dense/sparse/hybrid indices
- Test LLM services and prompt generation
- Debug product ingestion workflows
- Monitor system statistics

**Initial Brand**: specialized.com


In [1]:
# Setup and imports
import sys
import os
import asyncio
import json
from datetime import datetime

# Add src to path
sys.path.append('../src')

# Import our modules
from configs.settings import get_settings
from pinecone_client import get_pinecone_client
from llm import get_llm_router
from llm.prompt_manager import PromptManager
from descriptor import get_descriptor_generator
from models.product import Product
from models.product_manager import ProductManager

print("✅ Imports successful!")
print(f"Timestamp: {datetime.now()}")


ModuleNotFoundError: No module named 'configs'

In [None]:
# Initialize configuration
settings = get_settings()
print(f"Environment: {settings.env}")
print(f"GCP Project: {settings.gcp.project_id}")
print(f"Bucket: {settings.gcp.get_bucket_name()}")

# Test brand index naming
brand = "specialized.com"
dense_index = settings.pinecone.get_index_name(brand, "dense")
sparse_index = settings.pinecone.get_index_name(brand, "sparse")

print(f"\nIndex names for {brand}:")
print(f"Dense: {dense_index}")
print(f"Sparse: {sparse_index}")


In [None]:
# Test Pinecone client
try:
    pinecone_client = get_pinecone_client()
    print("✅ Pinecone client initialized")
    
    # Test index connections
    dense_stats = pinecone_client.get_index_stats(dense_index)
    print(f"\n📊 Dense index ({dense_index}) stats:")
    print(json.dumps(dense_stats, indent=2))
    
    sparse_stats = pinecone_client.get_index_stats(sparse_index)
    print(f"\n📊 Sparse index ({sparse_index}) stats:")
    print(json.dumps(sparse_stats, indent=2))
    
except Exception as e:
    print(f"❌ Pinecone connection failed: {e}")
    print("Make sure PINECONE_API_KEY is set and indices exist")


In [None]:
# Test LLM router
try:
    llm_router = get_llm_router()
    print("✅ LLM router initialized")
    
    # Test a simple completion
    response = await llm_router.chat_completion(
        system="You are a helpful assistant.",
        messages=[{"role": "user", "content": "Say hello and confirm you're working!"}],
        task="test",
        max_tokens=50
    )
    
    print(f"\n🤖 LLM Response: {response}")
    
except Exception as e:
    print(f"❌ LLM test failed: {e}")
    print("Make sure OPENAI_API_KEY is set")

# Test Prompt Manager
try:
    prompt_manager = PromptManager()
    print("\n✅ Prompt manager initialized")
    
    # Get descriptor prompt
    descriptor_prompt = prompt_manager.get_descriptor_prompt()
    print(f"\n📝 Descriptor prompt preview:")
    print(descriptor_prompt[:200] + "...")
    
    # Get sizing prompt  
    sizing_prompt = prompt_manager.get_sizing_prompt()
    print(f"\n📏 Sizing prompt preview:")
    print(sizing_prompt[:200] + "...")
    
except Exception as e:
    print(f"❌ Prompt manager test failed: {e}")


In [None]:
# Test vertical auto-detection
try:
    from product_ingestor import ProductIngestor
    ingestor = ProductIngestor()
    
    # Test different product types for vertical detection
    test_cases = [
        {
            "brand": "specialized.com",
            "products": [
                Product(categories=["Road Bikes", "Performance"]),
                Product(categories=["Mountain Bikes", "Trail"]),
                Product(categories=["Helmets", "Safety"])
            ],
            "expected": "cycling"
        },
        {
            "brand": "fashion-store.com", 
            "products": [
                Product(categories=["Clothing", "Dresses"]),
                Product(categories=["Shoes", "Sneakers"]),
                Product(categories=["Accessories", "Bags"])
            ],
            "expected": "fashion"
        },
        {
            "brand": "beauty-brand.com",
            "products": [
                Product(categories=["Skincare", "Serums"]),
                Product(categories=["Cosmetics", "Foundation"]),
                Product(categories=["Beauty", "Moisturizer"])
            ],
            "expected": "beauty"
        }
    ]
    
    print("🔍 Testing Vertical Auto-Detection:")
    for case in test_cases:
        detected = ingestor._detect_vertical_from_products(case["products"])
        brand_detected = ingestor._detect_vertical_from_brand(case["brand"])
        
        print(f"\\n📊 Brand: {case['brand']}")
        print(f"   Product categories: {[p.categories for p in case['products']]}")
        print(f"   Detected from products: {detected}")
        print(f"   Detected from brand: {brand_detected}")
        print(f"   Expected: {case['expected']}")
        print(f"   ✅ Match: {detected == case['expected'] or brand_detected == case['expected']}")
    
except Exception as e:
    print(f"❌ Vertical detection test failed: {e}")


In [None]:
# Test descriptor generation with auto-detection
try:
    descriptor_gen = get_descriptor_generator()
    print("✅ Descriptor generator initialized")
    
    # Create sample products for different verticals to test auto-detection
    sample_products = [
        Product(
            id="test-bike-123",
            name="Tarmac SL7 Expert",
            brand="specialized.com",
            categories=["Road Bikes", "Performance"],
            price=4000,
            colors=["Gloss Red", "Satin Black"],
            sizes=["52cm", "54cm", "56cm", "58cm"],
            features=["Carbon Frame", "Shimano 105", "Tubeless Ready"]
        ),
        Product(
            id="test-fashion-456",
            name="Premium Cotton T-Shirt",
            brand="fashion-brand.com",
            categories=["Clothing", "Casual Wear"],
            price=45,
            colors=["Black", "White", "Navy"],
            sizes=["S", "M", "L", "XL"],
            features=["100% Cotton", "Pre-shrunk", "Classic Fit"]
        )
    ]
    
    for product in sample_products:
        print(f"\n🧪 Testing with sample product: {product.name}")
        print(f"Categories: {product.categories}")
        
        # Generate descriptor
        descriptor = await descriptor_gen.generate_descriptor(product)
        print(f"\n📝 Generated descriptor:")
        print(descriptor)
        
        # Test sizing generation
        sample_sizing = {
            "S": "Fits 34-36 inch chest" if "fashion" in product.id else "50cm frame for 160-170cm riders",
            "M": "Fits 38-40 inch chest" if "fashion" in product.id else "52cm frame for 170-175cm riders",
            "L": "Fits 42-44 inch chest" if "fashion" in product.id else "54cm frame for 175-180cm riders"
        }
        
        sizing = await descriptor_gen.generate_sizing(product, sample_sizing)
        print(f"\n📏 Generated sizing:")
        print(json.dumps(sizing, indent=2))
        print("-" * 50)
    
except Exception as e:
    print(f"❌ Descriptor generation failed: {e}")
