In [9]:
from dotenv import load_dotenv
import os 
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec

load_dotenv()

True

In [10]:
pc = Pinecone(api_key=os.getenv('NEXT_PUBLIC_PINECONE_API_KEY'))

pc.create_index(
    name="rag",
    dimension=1024,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

In [11]:
import json 
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")

# Load the review data
data = json.load(open("reviews.json"))

processed_data = []

# Create embeddings for each review
for review in data["reviews"]:

    embedding = model.encode(review['review'])
    processed_data.append(
        {
            "values": embedding,
            "id": review["professor"],
            "metadata":{
                "department": review["department"],
                "review": review["review"],
                "courses": review["courses"],
                "stars": review["stars"],
            }
        }
    )

# Insert the embeddings into the Pinecone index
index = pc.Index("rag")
upsert_response = index.upsert(
    vectors=processed_data,
    namespace="ns1",
)
print(f"Upserted count: {upsert_response['upserted_count']}")

# Print index statistics
print(index.describe_index_stats())

Upserted count: 55
{'dimension': 1024,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}


In [12]:
processed_data[0]

{'values': [0.3294692933559418,
  0.37688982486724854,
  0.427005797624588,
  -0.4528385400772095,
  0.08677235245704651,
  0.09601366519927979,
  -0.05145647004246712,
  0.22674274444580078,
  -0.07773838937282562,
  1.2639356851577759,
  1.0435891151428223,
  -0.33394554257392883,
  0.4691813886165619,
  -0.5374332070350647,
  0.6439455151557922,
  -0.27894043922424316,
  -0.23750928044319153,
  -0.5952653884887695,
  -1.041365146636963,
  0.06714443117380142,
  -0.06467659771442413,
  0.30855265259742737,
  -1.2345449924468994,
  -0.4124090373516083,
  -0.08441699296236038,
  0.43223902583122253,
  0.6469802260398865,
  0.2579810619354248,
  1.161767840385437,
  0.8246235847473145,
  0.3771064877510071,
  0.4417230486869812,
  -0.07171962410211563,
  -0.730801522731781,
  0.05908423662185669,
  -0.03799481689929962,
  1.1758638620376587,
  -1.0631815195083618,
  0.7459645867347717,
  -0.7332612872123718,
  0.20426088571548462,
  -0.036499250680208206,
  0.32761672139167786,
  -1.186