In [1]:
from dotenv import load_dotenv
load_dotenv()
from pinecone import Pinecone, ServerlessSpec
import google.generativeai as genai
import os, json

  from tqdm.autonotebook import tqdm


In [2]:
# Load the review data
data = json.load(open("reviews.json"))

In [3]:
# Embed the data with Gemini AI

processed_data = []

genai.configure(api_key=os.environ["GEMINIAI_API_KEY"])

model = genai.GenerativeModel('gemini-1.5-flash')

# Create embeddings for each review
for review in data["reviews"]:
    response = genai.embed_content(
        model="models/text-embedding-004",
        content="What is the meaning of life?"
    )
    embedding = response['embedding']
    processed_data.append(
        {
            "values": embedding,
            "id": review["professor"],
            "metadata":{
                "review": review["reviews"],
                "subject": review["course"],
                "stars": review["rating"],
            }
        }
    )

In [4]:
# Initialize Pinecone
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

In [6]:
# Create a Pinecone index
pc.create_index(
    name="rateprof",
    dimension=768,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)

In [5]:
# Insert embedded data into Pinecone
index = pc.Index("rateprof")
index.upsert(
    vectors=processed_data,
    namespace = "test1"
)


{'upserted_count': 20}