In [None]:
from dotenv import load_dotenv
load_dotenv()
import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec

In [None]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index_name = "rag"

pc.create_index(
    name=index_name,
    dimension=1536,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

In [None]:
import json
data = json.load(open("reviews.json"))
data['reviews']

In [None]:
import json
data = json.load(open("reviews.json"))
processed_data = []
client = OpenAI()

for review in data['reviews']:
    response = client.embeddings.create(
        input=review['review'],
        model="text-embedding-3-small"
    )

    embedding = response.data[0].embedding
    processed_data.append({
        "values": embedding,
        "id": review["professor"],
        "metadata": {
            "review": review['review'],
            "subject": review["subject"],
            "stars": review["stars"]
        }
    })

In [None]:
processed_data[0]

In [None]:
import base64

processed_data = [
    {
        "id": base64.urlsafe_b64encode(vec["id"].encode('utf-8')).decode('ascii'),
        "values": vec["values"],
        "metadata": vec["metadata"]
    }
    for vec in processed_data
]

index = pc.Index("rag")
upsert_response = index.upsert(
    vectors=processed_data,
    namespace="ns1",
)
print(f"Upserted count: {upsert_response['upserted_count']}")
# index = pc.Index("rag")
# upsert_response = index.upsert(
#     vectors=processed_data,
#     namespace="ns1",
# )
# print(f"Upserted count: {upsert_response['upserted_count']}")

In [None]:
index.describe_index_stats()