In [None]:
import os
import time
from pinecone import Pinecone, ServerlessSpec
from openai import OpenAI

# It is highly recommended to set these environment variables before running the script.
# export PINECONE_API_KEY="YOUR_API_KEY"
# export OPENAI_API_KEY="YOUR_API_KEY"

# Retrieve API keys from environment variables
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
if not PINECONE_API_KEY:
    raise ValueError("PINECONE_API_KEY environment variable not set.")

OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
if not OPENAI_API_KEY:
    raise ValueError("OPENAI_API_KEY environment variable not set.")

# Initialize the OpenAI and Pinecone clients
openai_client = OpenAI(api_key=OPENAI_API_KEY)
pinecone_client = Pinecone(api_key=PINECONE_API_KEY)

# Define the records and query provided by the user
records = [
    {
        "_id": "rec1",
        "title": "Exp X",
        "type": "Simple Promotion",
        "start_date": "2025-06-18",
        "end_date": "2025-06-30",
        "items": [
            {
                "promotion_id": "PROMO437",
                "component_id": "COMP437",
                "item_id": "ITEM001",
                "discount_type": "% Off",
                "discount_value": "30"
            },
            {
                "promotion_id": "PROMO437",
                "component_id": "COMP437",
                "item_id": "ITEM021",
                "discount_type": "% Off",
                "discount_value": "30"
            },
            {
                "promotion_id": "PROMO437",
                "component_id": "COMP437",
                "item_id": "ITEM041",
                "discount_type": "% Off",
                "discount_value": "30"
            }
        ]
    }
]
query_text = "What is the duration of the promotion?"

# Define Pinecone index details
index_name = "semantic-search-demo"
dimension = 1536  # Dimension for OpenAI's text-embedding-ada-002 model
metric = "cosine"
cloud = "aws"
region = "us-east-1"




In [None]:
def create_embeddings(texts):
    """
    Creates OpenAI embeddings for a list of texts.
    """
    response = openai_client.embeddings.create(
        input=texts,
        model="text-embedding-ada-002"
    )
    return [embedding.embedding for embedding in response.data]



In [None]:
def upsert_records_to_pinecone(index):
    """
    Upserts records to the Pinecone index.
    """
    print("Creating and upserting records...")
    to_upsert = []
    # In a real-world scenario, you would embed more than just the title.
    # We are using the title here for a simple, clear example.
    texts_to_embed = [rec['title'] for rec in records]
    embeddings = create_embeddings(texts_to_embed)

    for i, record in enumerate(records):
        vector_id = record['_id']
        vector_embedding = embeddings[i]
        vector_metadata = {
            "title": record["title"],
            "type": record["type"],
            "start_date": record["start_date"],
            "end_date": record["end_date"]
        }
        to_upsert.append((vector_id, vector_embedding, vector_metadata))
    
    index.upsert(vectors=to_upsert)
    print("Upsert complete.")
    time.sleep(3) # Wait for upsert to be visible



In [None]:
def query_pinecone_index(index):
    """
    Queries the Pinecone index with the given query text.
    """
    print(f"Querying with text: '{query_text}'")
    query_vector = create_embeddings([query_text])[0]

    search_results = index.query(
        vector=query_vector,
        top_k=5,
        include_values=False,  # Set to True if you want the vectors back
        include_metadata=True
    )

    print("\n--- Search Results ---")
    if not search_results.matches:
        print("No matches found.")
    else:
        for match in search_results.matches:
            print(f"ID: {match.id}")
            print(f"Score: {match.score}")
            print("Metadata:")
            for key, value in match.metadata.items():
                print(f"  {key}: {value}")
            print("-" * 20)



In [None]:
def main():
    """
    Main function to run the full semantic search workflow.
    """
    # Check if the index exists. If not, create it.
    if index_name not in pinecone_client.list_indexes().names:
        print(f"Creating Pinecone index '{index_name}'...")
        pinecone_client.create_index(
            name=index_name,
            dimension=dimension,
            metric=metric,
            spec=ServerlessSpec(cloud=cloud, region=region)
        )
        print("Index created successfully.")
        # Give the index time to initialize
        while not pinecone_client.describe_index(index_name).status['ready']:
            time.sleep(1)
            print("Waiting for index to be ready...")
    
    # Connect to the index
    index = pinecone_client.Index(index_name)
    print(f"Connected to index '{index_name}'")
    
    # Run the upsert and query functions
    upsert_records_to_pinecone(index)
    query_pinecone_index(index)
    
    # Clean up the index
    print(f"\nDeleting index '{index_name}'...")
    pinecone_client.delete_index(index_name)
    print("Index deleted. Script finished.")