In [1]:
from dotenv import load_dotenv
load_dotenv('.env.local')
import os
from sentence_transformers import SentenceTransformer
from pinecone import Pinecone, ServerlessSpec 

  from tqdm.autonotebook import tqdm, trange


In [4]:
pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY'))
pc.create_index(
    name="rag", dimension=384, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)


In [5]:
import json

data = json.load(open("reviews.json"))

In [8]:
processed_data = []
model = SentenceTransformer('all-MiniLM-L6-v2')


for review in data["reviews"]:
    embedding = model.encode(review['review']).tolist()

    processed_data.append(
        {
            "values": embedding,
            "id": review["professor"],
            "metadata": {
                "review": review["review"],
                "subject": review["subject"],
                "stars": review["stars"],
            }
        }
    )

index = pc.Index("rag")
upsert_response = index.upsert(
    vectors=processed_data,
    namespace="ns1",
)



In [10]:
print(f"Upserted count: {upsert_response['upserted_count']}")

Upserted count: 20


In [11]:
print(index.describe_index_stats())

{'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}
