In [17]:
from dotenv import load_dotenv
load_dotenv()
import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec

In [5]:
pc = Pinecone(Api_Key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="rag", dimension=1536, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [7]:
import json
data = json.load(open("reviews.json"))
data['reviews']

[{'professor': 'Dr. John Smith',
  'subject': 'CS101: Introduction to Programming',
  'stars': 5,
  'review': 'Excellent professor with clear and concise lectures. Highly recommended!'},
 {'professor': 'Dr. Emily Johnson',
  'subject': 'MATH150: Calculus I',
  'stars': 4,
  'review': 'Very knowledgeable and approachable, but the exams are tough.'},
 {'professor': 'Dr. Michael Davis',
  'subject': 'HIST101: World History I',
  'stars': 3,
  'review': 'Interesting lectures but heavy reading load.'},
 {'professor': 'Dr. Sarah Lee',
  'subject': 'PHYS101: General Physics',
  'stars': 5,
  'review': 'Dr. Lee explains complex concepts very clearly. Great professor!'},
 {'professor': 'Dr. Robert Brown',
  'subject': 'CHEM101: General Chemistry',
  'stars': 4,
  'review': 'Good teacher, but the lab work is intense.'},
 {'professor': 'Dr. Karen Taylor',
  'subject': 'PSY101: Introduction to Psychology',
  'stars': 4,
  'review': 'Engaging and insightful lectures. Participation is encouraged.'},

In [18]:
processed_data = []
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

for review in data['reviews']:
    response = client.embeddings.create(
        input=review['review'],
        model="text-embedding-3-small"
    )
    embedding = response.data[0].embedding
    processed_data.append({
        "values": embedding,
        "id": review['professor'],
        "metadata": {
            "review": review['review'],
            "subject": review['subject'],
            "stars": review['stars']
        }
    })
    

In [20]:
index = pc.Index('rag')
index.upsert(
    vectors=processed_data,
    namespace='ns1'
)

{'upserted_count': 20}

In [21]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}