In [17]:
from dotenv import load_dotenv
load_dotenv()
import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec


In [8]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

pc.create_index(
    name="rag", dimension=1536, metric="cosine", spec=ServerlessSpec(cloud="aws",region="us-east-1")
)

In [18]:
import json
data = json.load(open('reviews.json'))
data['reviews']

[{'professor': 'Dr. Emily Johnson',
  'subject': 'Introduction to Cognitive Science',
  'stars': 5,
  'review': 'Dr. Johnson is an outstanding professor. Her lectures are clear and engaging, and she really cares about her students.'},
 {'professor': 'Dr. Michael Smith',
  'subject': 'Ethics in Technology',
  'stars': 4,
  'review': "Dr. Smith's class was challenging but very rewarding. The material was tough, but he was always available for help."},
 {'professor': 'Dr. Susan Lee',
  'subject': 'Philosophy of Mind',
  'stars': 3,
  'review': 'Dr. Lee is knowledgeable, but her lectures can be a bit dry. The readings are interesting, though.'},
 {'professor': 'Dr. John Kim',
  'subject': 'Artificial Intelligence',
  'stars': 4,
  'review': 'Dr. Kim is a great teacher with real-world experience. The projects were tough but very informative.'},
 {'professor': 'Dr. Rachel Green',
  'subject': 'Logic and Reasoning',
  'stars': 5,
  'review': 'Dr. Green is an amazing professor! Her explanation

In [19]:
processed_data=[]
client=OpenAI()

for review in data['reviews']:
    response = client.embeddings.create(
        input=review['review'],
        model='text-embedding-3-small',
    )
    embedding = response.data[0].embedding
    processed_data.append({
        "values": embedding,
        "id": review['professor'],
        "metadata": {
            "review": review['review'],
            "subject": review['subject'],
            "stars": review['stars']
        }
    })

In [20]:
processed_data[0]

{'values': [0.020210233,
  -0.0031630741,
  0.017646331,
  0.04958363,
  0.009575615,
  -0.005465012,
  0.036095276,
  0.046908252,
  -0.0032383192,
  0.010495275,
  0.036518876,
  -0.012462791,
  -0.050653778,
  0.041624386,
  -0.0032829088,
  0.034311693,
  -0.01576242,
  -0.0038068364,
  0.029830437,
  0.039484084,
  0.048022993,
  -0.01747912,
  0.022395123,
  -0.029429132,
  -0.032416634,
  -0.036006097,
  0.020845635,
  0.014681123,
  0.03199303,
  -0.011938863,
  0.081465185,
  0.007964816,
  -0.013644415,
  -0.018749924,
  -0.014647681,
  0.008488744,
  0.0032466797,
  0.013488352,
  0.006783192,
  0.0053117354,
  -0.013644415,
  0.0046289572,
  0.0008834313,
  0.007295972,
  0.04429976,
  0.0064209015,
  0.007596952,
  -0.0043251906,
  0.057475984,
  0.045013193,
  -0.027021293,
  -0.0054900935,
  0.033219248,
  -0.023431832,
  -0.03504742,
  0.0025541475,
  0.030722229,
  0.046685304,
  0.0070284344,
  -0.038837537,
  0.03796804,
  0.0022740692,
  -0.030945178,
  -0.004202569

In [15]:
index = pc.Index('rag')
index.upsert(
    vectors=processed_data,
    namespace='ns1'
)

{'upserted_count': 20}

In [21]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}