In [53]:
from dotenv import load_dotenv
load_dotenv()
import os
from pinecone import Pinecone, ServerlessSpec
import google.generativeai as genai

In [55]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="rag", dimension=768, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [56]:
import json
data = json.load(open("reviews.json"))
data["reviews"]

[{'professor': 'Dr. Sarah Thompson',
  'subject': 'Physics',
  'stars': 4,
  'review': 'Engaging lectures and challenging assignments. Dr. Thompson makes complex concepts accessible.'},
 {'professor': 'Prof. Michael Chen',
  'subject': 'Computer Science',
  'stars': 5,
  'review': "Brilliant instructor! Prof. Chen's passion for coding is contagious. Highly recommended."},
 {'professor': 'Dr. Emily Rodriguez',
  'subject': 'Biology',
  'stars': 3,
  'review': 'Knowledgeable but sometimes moves too fast. Office hours are helpful.'},
 {'professor': 'Prof. David Lee',
  'subject': 'Mathematics',
  'stars': 4,
  'review': 'Clear explanations and patient with questions. Homework can be quite difficult.'},
 {'professor': 'Dr. Lisa Patel',
  'subject': 'Chemistry',
  'stars': 4,
  'review': 'Excellent lab instructor. Dr. Patel emphasizes safety and practical applications.'},
 {'professor': 'Prof. John Doe',
  'subject': 'English Literature',
  'stars': 2,
  'review': 'Lectures are dry and assi

In [57]:
processed_data = []
genai.configure(api_key=os.getenv("API_KEY"))

for review in data["reviews"]:
    response = genai.embed_content(
        model="models/text-embedding-004",
        content=review['review'],
    )
    embedding = response["embedding"]
    processed_data.append({
        "values": embedding,
        "id": review["professor"],
        "metadata": {
            "review": review["review"],
            "subject": review["subject"],
            "stars": review["stars"]
        }
    })

In [58]:
processed_data[0]

{'values': [0.037808172,
  0.04678748,
  -0.041641057,
  -0.02479065,
  0.029291766,
  0.0045132143,
  0.03334672,
  0.018155279,
  -0.017182892,
  0.014400714,
  0.06357568,
  0.026951972,
  0.0058171754,
  0.0049397815,
  -0.014280658,
  -0.043240145,
  -0.031903483,
  0.047975257,
  -0.08894821,
  0.01425542,
  0.0044737677,
  -0.04188488,
  0.009126484,
  -0.012664177,
  0.0076171667,
  -0.021249566,
  0.018732546,
  -0.035346154,
  0.038443197,
  0.002043098,
  0.0124998335,
  0.021188758,
  -0.042081613,
  -0.026782118,
  -0.045298256,
  0.0046460144,
  0.01287123,
  -0.00075212267,
  0.020541018,
  -0.03545438,
  0.00045587466,
  -0.033908967,
  -0.04419232,
  -0.009243631,
  -0.06074259,
  -0.0050010006,
  -0.013954398,
  0.08188842,
  -0.010665319,
  0.08019424,
  0.024881111,
  0.026741873,
  -0.07600686,
  0.039044026,
  -0.023643829,
  -0.025965523,
  -0.069621466,
  -0.05559937,
  0.074150704,
  0.018439423,
  -0.06666082,
  -0.014965733,
  -0.040926255,
  -0.09182629,
  0

In [60]:
index = pc.Index('rag')
index.upsert(
    vectors=processed_data,
    namespace="ns1"
)

{'upserted_count': 20}

In [61]:
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}