In [1]:
from dotenv import load_dotenv
load_dotenv()
import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec

  from tqdm.autonotebook import tqdm


In [2]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="rmp-reviews", dimension=1536, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [8]:
import json
data = json.load(open("reviews.json"))
data['reviews']

[{'professor': 'Dr. Emily Johnson',
  'subject': 'Introduction to Computer Science',
  'stars': 4,
  'review': "Dr. Johnson is an excellent professor. Her lectures are engaging and she genuinely cares about her students' learning. Highly recommended!"},
 {'professor': 'Professor Michael Lee',
  'subject': 'Organic Chemistry',
  'stars': 3,
  'review': "Professor Lee's class is challenging, but he is knowledgeable and willing to help students who put in the effort. The grading can be tough, but it's fair."},
 {'professor': 'Dr. Sarah Patel',
  'subject': 'World History',
  'stars': 5,
  'review': 'Dr. Patel is passionate about the subject and her lectures are informative and thought-provoking. She encourages class discussions and makes the material interesting.'},
 {'professor': 'Professor John Wilson',
  'subject': 'Principles of Marketing',
  'stars': 4,
  'review': 'Professor Wilson is knowledgeable and provides relevant real-world examples. His grading is fair, and he is accessible 

In [9]:
processed_data = []
client = OpenAI()

for review in data['reviews']:
    response = client.embeddings.create(
        input=review['review'],
        model="text-embedding-3-small"
    )
    embedding = response.data[0].embedding
    processed_data.append({
        "values": embedding,
        "id": review["professor"],
        "metadata": {
            "review": review["review"],
            "subject": review["subject"],
            "stars": review["stars"]
        }

    })

In [None]:
processed_data[0]

In [11]:
index = pc.Index("rag")
index.upsert(
    vectors=processed_data,
    namespace="ns1"
)

{'upserted_count': 20}

In [12]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}