In [33]:
from dotenv import load_dotenv
load_dotenv()
from pinecone import Pinecone, ServerlessSpec
from openai import OpenAI
import os
import json

In [40]:
# Initialize Pinecone
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

# Create a Pinecone index
pc.create_index(
    name="rag",
    dimension=1536,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)
#make sure dimension of pincone index match the model your using (update in future). 1536=openai
#metric is used for similairty search( there are better depending on use case)


In [35]:
data = json.load(open("reviews.json"))

processed_data = []
client = OpenAI()

In [None]:
# Create embeddings for each review
#code is different if use other models
for review in data["reviews"]:
    response = client.embeddings.create(
        #specific for openai
        input=review['review'], model="text-embedding-3-small"
    )
    embedding = response.data[0].embedding
    #specific for pinecone
    processed_data.append(
        {
            "values": embedding,
            "id": review["professor"],
            "metadata":{
                "review": review["review"],
                "subject": review["subject"],
                "stars": review["stars"],
            }
        }
    )

In [None]:
# Insert the embeddings into the Pinecone index
index = pc.Index("rag")
upsert_response = index.upsert(
    vectors=processed_data,
    #can potentially be used as user id
    namespace="ns1",
)

In [None]:
index.describe_index_stats()