In [12]:
from dotenv import load_dotenv
import os

# Load environment variables from env.local
load_dotenv(dotenv_path='../.env.local')

# Check if the environment variable is loaded correctly
api_key = os.getenv("PINECONE_API_KEY")
if not api_key:
    raise ValueError("PINECONE_API_KEY not found")


In [13]:
# Continue with your Pinecone and OpenAI initialization
from pinecone import Pinecone, ServerlessSpec
from openai import AzureOpenAI

# Initialize Pinecone
pc = Pinecone(api_key=api_key)

# Create a Pinecone index
# pc.create_index(
#     name="rag",
#     dimension=1536,
#     metric="cosine",
#     spec=ServerlessSpec(cloud="aws", region="us-east-1"),
# )


In [14]:
# Load the review data
import json
data = json.load(open("../reviews.json"))

processed_data = []
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version="2023-12-01-preview",
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
)

# Create embeddings for each review
for review in data["reviews"]:
    response = client.embeddings.create(
        input=review['review'], model="text-embedding-ada-002"
    )
    embedding = response.data[0].embedding
    processed_data.append(
        {
            "values": embedding,
            "id": review["professor"],
            "metadata":{
                "review": review["review"],
                "subject": review["subject"],
                "stars": review["stars"],
            }
        }
    )

# Insert the embeddings into the Pinecone index
index = pc.Index("rag")
upsert_response = index.upsert(
    vectors=processed_data,
    namespace="ns1",
)
print(f"Upserted count: {upsert_response['upserted_count']}")

# Print index statistics
print(index.describe_index_stats())

Upserted count: 6
{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}
