In [1]:
from pinecone import Pinecone, ServerlessSpec
import time
import os
import json
from datetime import datetime, timedelta
from pinecone import Pinecone, ServerlessSpec
from openai import OpenAI
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
# Initialize Pinecone (you'll need to set your API key)
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
# Your promotional records data
records = [
    {
        "_id": "rec1",
        "title": "Exp X",
        "type": "Simple Promotion",
        "start_date": "2025-06-18",
        "end_date": "2025-06-30",
        "items": [
            {
                "promotion_id": "PROMO437",
                "component_id": "COMP437",
                "item_id": "ITEM001",
                "discount_type": "% Off",
                "discount_value": "30"
            },
            {
                "promotion_id": "PROMO437",
                "component_id": "COMP437",
                "item_id": "ITEM021",
                "discount_type": "% Off",
                "discount_value": "30"
            },
            {
                "promotion_id": "PROMO437",
                "component_id": "COMP437",
                "item_id": "ITEM041",
                "discount_type": "% Off",
                "discount_value": "30"
            }
        ]
    },
    # Add more promotional records here as needed
    {
        "_id": "rec2",
        "title": "Summer Sale",
        "type": "Bundle Promotion",
        "start_date": "2025-07-01",
        "end_date": "2025-07-31",
        "items": [
            {
                "promotion_id": "PROMO500",
                "component_id": "COMP500",
                "item_id": "ITEM100",
                "discount_type": "Fixed Amount",
                "discount_value": "50"
            },
            {
                "promotion_id": "PROMO500",
                "component_id": "COMP500",
                "item_id": "ITEM200",
                "discount_type": "Fixed Amount",
                "discount_value": "25"
            }
        ]
    },
    {
        "_id": "rec3",
        "title": "Flash Weekend Deal",
        "type": "Time Limited",
        "start_date": "2025-06-21",
        "end_date": "2025-06-23",
        "items": [
            {
                "promotion_id": "PROMO600",
                "component_id": "COMP600",
                "item_id": "ITEM300",
                "discount_type": "% Off",
                "discount_value": "50"
            }
        ]
    }
]



In [2]:
def create_searchable_text(record):
    """
    Convert promotional record into searchable text for embedding
    """
    # Extract basic info
    title = record.get("title", "")
    promo_type = record.get("type", "")
    start_date = record.get("start_date", "")
    end_date = record.get("end_date", "")
    
    # Extract items information
    items_text = []
    for item in record.get("items", []):
        item_text = f"Item {item.get('item_id', '')} with {item.get('discount_value', '')} {item.get('discount_type', '')}"
        items_text.append(item_text)
    
    # Combine all information into searchable text
    searchable_text = f"{title} {promo_type} promotion from {start_date} to {end_date}. " + " ".join(items_text)
    
    return searchable_text



In [3]:
def prepare_records_for_pinecone(records):
    """
    Transform promotional records into format suitable for Pinecone with embeddings
    """
    prepared_records = []
    
    for record in records:
        # Create searchable text for embedding
        chunk_text = create_searchable_text(record)
        
        # Prepare the record with the required structure
        prepared_record = {
            "_id": record["_id"],
            "chunk_text": chunk_text,  # This will be used for embedding
            "title": record["title"],
            "type": record["type"],
            "start_date": record["start_date"],
            "end_date": record["end_date"],
            "items_count": len(record["items"]),
            # Store original items as metadata (you might want to serialize this differently)
            "promotion_ids": [item["promotion_id"] for item in record["items"]],
            "item_ids": [item["item_id"] for item in record["items"]],
            "discount_types": [item["discount_type"] for item in record["items"]],
            "discount_values": [item["discount_value"] for item in record["items"]]
        }
        
        prepared_records.append(prepared_record)
    
    return prepared_records



In [4]:
# Prepare records for Pinecone
prepared_records = prepare_records_for_pinecone(records)

# Print sample of prepared records to see the structure
print("Sample prepared record:")
print(prepared_records[0])
print("\nSearchable text:")
print(prepared_records[0]["chunk_text"])

# Set up Pinecone index
index_name = "promotional-search-index-2"

# Initialize Pinecone (uncomment and add your API key)
# pc = Pinecone(api_key="your-pinecone-api-key")



Sample prepared record:
{'_id': 'rec1', 'chunk_text': 'Exp X Simple Promotion promotion from 2025-06-18 to 2025-06-30. Item ITEM001 with 30 % Off Item ITEM021 with 30 % Off Item ITEM041 with 30 % Off', 'title': 'Exp X', 'type': 'Simple Promotion', 'start_date': '2025-06-18', 'end_date': '2025-06-30', 'items_count': 3, 'promotion_ids': ['PROMO437', 'PROMO437', 'PROMO437'], 'item_ids': ['ITEM001', 'ITEM021', 'ITEM041'], 'discount_types': ['% Off', '% Off', '% Off'], 'discount_values': ['30', '30', '30']}

Searchable text:
Exp X Simple Promotion promotion from 2025-06-18 to 2025-06-30. Item ITEM001 with 30 % Off Item ITEM021 with 30 % Off Item ITEM041 with 30 % Off


In [5]:
# Create index if it doesn't exist
def setup_pinecone_index(pc, index_name, prepared_records):
    if not pc.has_index(index_name):
        pc.create_index_for_model(
            name=index_name,
            cloud="aws",
            region="us-east-1",
            embed={
                "model": "llama-text-embed-v2",
                "field_map": {"text": "chunk_text"}
            }
        )
    
    # Target the index
    dense_index = pc.Index(index_name)
    
    # Upsert the records into a namespace
    dense_index.upsert_records("promotional-namespace-2", prepared_records)
    
    # Wait for the upserted vectors to be indexed
    time.sleep(10)
    
    # View stats for the index
    stats = dense_index.describe_index_stats()
    print("Index stats:", stats)
    
    return dense_index



In [6]:
def search_promotions(dense_index, query, top_k=5, use_rerank=True):
    """
    Search for promotions based on query
    """
    search_params = {
        "namespace": "promotional-namespace",
        "query": {
            "top_k": top_k,
            "inputs": {
                'text': query
            }
        }
    }
    
    if use_rerank:
        search_params["rerank"] = {
            "model": "bge-reranker-v2-m3",
            "top_n": top_k,
            "rank_fields": ["chunk_text"]
        }
    
    results = dense_index.search(**search_params)
    return results



In [7]:
def print_promotion_results(results):
    """
    Print search results in a readable format
    """
    print("\nSearch Results:")
    print("-" * 80)
    
    for i, hit in enumerate(results['result']['hits'], 1):
        print(f"{i}. ID: {hit['_id']}")
        print(f"   Title: {hit['fields']['title']}")
        print(f"   Type: {hit['fields']['type']}")
        print(f"   Period: {hit['fields']['start_date']} to {hit['fields']['end_date']}")
        print(f"   Items: {hit['fields']['items_count']} items")
        print(f"   Item IDs: {', '.join(hit['fields']['item_ids'])}")
        print(f"   Discounts: {', '.join([f'{v} {t}' for v, t in zip(hit['fields']['discount_values'], hit['fields']['discount_types'])])}")
        print(f"   Score: {round(hit['_score'], 3)}")
        print(f"   Searchable Text: {hit['fields']['chunk_text'][:100]}...")
        print("-" * 80)



In [10]:

 

# Set up the index and upsert data
dense_index = setup_pinecone_index(pc, index_name, prepared_records)

# Example searches
search_queries = [
    "30% discount promotion",
    "weekend deals",
    "ITEM001 promotion", 
    "summer sale bundle",
    "percentage off items"
]

for query in search_queries:
    print(f"\n{'='*60}")
    print(f"SEARCHING FOR: {query}")
    print(f"{'='*60}")
    
    # Search without reranking
    results = search_promotions(dense_index, query, top_k=3, use_rerank=False)
    print("\nResults WITHOUT reranking:")
    print_promotion_results(results)
    
    # Search with reranking
    results = search_promotions(dense_index, query, top_k=3, use_rerank=True)
    print("\nResults WITH reranking:")
    print_promotion_results(results)

# Clean up (uncomment to delete index when done)
# pc.delete_index(index_name)

print("\n" + "="*60)
print("SETUP COMPLETE")
print("="*60)
print("1. Add your Pinecone API key")
print("2. Uncomment the example usage section")
print("3. Run the code to perform semantic search on promotional data")
print("4. The system will search for promotions based on:")
print("   - Promotion titles and types")
print("   - Date ranges") 
print("   - Item IDs and discount information")
print("   - Any combination of the above")

Index stats: {'dimension': 1024,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'promotional-namespace-2': {'vector_count': 3}},
 'total_vector_count': 3,
 'vector_type': 'dense'}

SEARCHING FOR: 30% discount promotion

Results WITHOUT reranking:

Search Results:
--------------------------------------------------------------------------------

Results WITH reranking:

Search Results:
--------------------------------------------------------------------------------

SEARCHING FOR: weekend deals

Results WITHOUT reranking:

Search Results:
--------------------------------------------------------------------------------

Results WITH reranking:

Search Results:
--------------------------------------------------------------------------------

SEARCHING FOR: ITEM001 promotion

Results WITHOUT reranking:

Search Results:
--------------------------------------------------------------------------------

Results WITH reranking:

Search Results:
-------------------------------------