In [None]:
from dotenv import load_dotenv
load_dotenv(".env.local")
import os
from google.generativeai import Client
print("Import successful!")

# Initialize the Google Gemini API client
Client.configure(api_key=os.getenv("GOOGLE_API_KEY"))
from pinecone import Pinecone, ServerlessSpec

In [None]:
# Initialize Pinecone with the API key
api_key = os.getenv("PINECONE_API_KEY")
pc = Pinecone(api_key=api_key)

# Delete the existing index
pc.delete_index("rag")

# Now create the new index
pc.create_index(
    name="rag",
    dimension=1536,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)

In [None]:
import json
data = json.load(open("reviews.json"))
data['reviews']

In [None]:
processed_data = []
client = Client()

# Create embeddings for each review
for review in data["reviews"]:
    response = client.embeddings.create(
        input=review['review'], model="text-embedding-3-small"
    )
    embedding = response.data[0].embedding
    processed_data.append(
        {
            "values": embedding,
            "id": review["professor"],
            "metadata":{
                "review": review["review"],
                "subject": review["subject"],
                "stars": review["stars"],
            }
        }
    )

In [None]:
index = pc.Index('rag')
index.upsert(
    vectors=processed_data,
    namespace="ns1"
)
print(f"Upserted count: {upsert_response['upserted_count']}")