In [7]:
from dotenv import load_dotenv
import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec

In [4]:
pc=Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="rag", dimension=1536, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [6]:
import json
data= json.load(open("reviews.json"))
data['reviews']

[{'professor': 'Dr. John Smith',
  'subject': 'Introduction to Computer Science',
  'stars': 4,
  'review': 'Great professor! Makes complex topics easy to understand, but the exams are tough.'},
 {'professor': 'Dr. Emily Johnson',
  'subject': 'Organic Chemistry',
  'stars': 2,
  'review': 'Very knowledgeable, but her lectures are hard to follow and the grading is strict.'},
 {'professor': 'Dr. Michael Brown',
  'subject': 'Calculus I',
  'stars': 5,
  'review': 'Amazing teacher! Clear explanations and very approachable for extra help.'},
 {'professor': 'Dr. Sarah Davis',
  'subject': 'Modern History',
  'stars': 3,
  'review': 'Good lectures, but relies heavily on textbook reading. Not very engaging.'},
 {'professor': 'Dr. David Wilson',
  'subject': 'Microeconomics',
  'stars': 4,
  'review': 'Challenging course, but he provides plenty of resources and is very helpful.'},
 {'professor': 'Dr. Jennifer Moore',
  'subject': 'Introduction to Psychology',
  'stars': 1,
  'review': "Lectur

In [9]:
processed_data=[]
client= OpenAI()

for review in data['reviews']:
    response=client.embeddings.create(
        input=review['review'],
        model="text-embedding-3-small",
    )
    embedding=response.data[0].embedding
    processed_data.append({
        "values":embedding,
        "id":review["professor"],
        "metadata":{
            "review":review["review"],
            "subject": review["subject"],
            "stars":review["stars"]
        }
    })

In [10]:
index=pc.Index('rag')
index.upsert(
    vectors=processed_data,
    namespace="ns1"
)

{'upserted_count': 20}

In [11]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}