In [11]:
from dotenv import load_dotenv
load_dotenv()
import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec

In [7]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="rag",dimension=1536, metric="cosine",spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [8]:
import json
data= json.load(open("reviews.json"))
data["reviews"]


[{'professor': 'Dr. Emily Johnson',
  'subject': 'Physics',
  'stars': 4,
  'review': 'Dr. Johnson is an excellent lecturer who makes complex concepts easy to understand. Her enthusiasm for physics is contagious.'},
 {'professor': 'Prof. Michael Chen',
  'subject': 'Computer Science',
  'stars': 5,
  'review': 'Prof. Chen is incredibly knowledgeable and always available for extra help. His programming assignments are challenging but rewarding.'},
 {'professor': 'Dr. Sarah Martinez',
  'subject': 'Biology',
  'stars': 3,
  'review': 'Dr. Martinez knows her subject well, but her lectures can be dry at times. The lab work is interesting and hands-on.'},
 {'professor': 'Prof. David Thompson',
  'subject': 'History',
  'stars': 2,
  'review': "Prof. Thompson's lectures are disorganized and hard to follow. However, his reading list is excellent and provides good insights into historical events."},
 {'professor': 'Dr. Olivia Parker',
  'subject': 'Psychology',
  'stars': 5,
  'review': 'Dr. P

In [13]:
processed_data =[]
client = OpenAI()

for review in data['reviews']:
    response = client.embeddings.create(
        input=review["review"],
        model="text-embedding-3-small"
    )
    embedding = response.data[0].embedding
    processed_data.append({
        "values": embedding,
        "id": review['professor'],
        "metadata": {
            "review": review["review"],
            "subject": review["subject"],
            "stars": review["stars"]
        }
    })

In [14]:
processed_data[0]

{'values': [-0.00066192716,
  -0.008173707,
  -0.020685522,
  0.03249128,
  0.022975432,
  0.01092796,
  -0.007346795,
  0.04442426,
  -8.408264e-05,
  -0.00826912,
  0.02176687,
  0.023064485,
  -0.039742664,
  0.006058721,
  0.018713655,
  0.02872565,
  -0.0431012,
  -0.016627293,
  0.03869948,
  0.04450059,
  0.018510107,
  -0.025710603,
  0.042287007,
  -0.011780316,
  -0.0332037,
  -0.053329464,
  0.034730304,
  0.01889176,
  0.049283955,
  -0.0026333968,
  0.07811138,
  0.008103738,
  -0.010896156,
  -0.011239642,
  -0.019973105,
  0.04437337,
  -0.008720742,
  0.008695298,
  -0.010043801,
  -0.010215544,
  -0.022937266,
  -0.012912549,
  -0.027046384,
  0.019273411,
  0.033916116,
  0.0033171894,
  -0.013446862,
  -0.011080621,
  0.04798634,
  0.052922368,
  -0.019998549,
  -0.018192064,
  0.048240773,
  -0.00753126,
  -0.028318556,
  0.012950715,
  0.034882966,
  0.034297764,
  0.0026842835,
  -0.04177814,
  0.028852869,
  -0.0027765161,
  -0.024743753,
  0.0082182335,
  -0.006

In [15]:
index = pc.Index('rag')
index.upsert(
    vectors=processed_data,
    namespace="ns1"
)

{'upserted_count': 20}

In [16]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}