In [7]:
from dotenv import load_dotenv
import os

# Get the current file's directory
current_dir = os.getcwd()
# Construct the path to the .env file
env_path = os.path.join(current_dir, '.env')

load_dotenv(env_path)


import openai
from openai import OpenAI
import pinecone
from pinecone import Pinecone, ServerlessSpec





In [4]:
# Now try to get the API key
api_key = os.getenv('PINECONE_API_KEY')
print(api_key)

# If the API key is still not set, you can set it manually
if not api_key:
    os.environ['PINECONE_API_KEY'] = "1feb0045-c3e6-44fb-8e8b-d5313a80f9d1"
    api_key = os.getenv('PINECONE_API_KEY')

pc =Pinecone(api_key)

pc.create_index(
    name="rag", dimension=1536, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)





In [5]:
import json
data = json.load(open('reviews.json'))

data['reviews']


[{'professor': 'Dr. Emily Johnson',
  'subject': 'Chemistry',
  'stars': 4,
  'review': "Dr. Johnson's lectures are engaging and she explains complex concepts clearly. Her office hours are very helpful."},
 {'professor': 'Prof. Michael Chen',
  'subject': 'Computer Science',
  'stars': 5,
  'review': "Brilliant professor! Prof. Chen's passion for coding is contagious. His projects are challenging but incredibly rewarding."},
 {'professor': 'Dr. Sarah Williams',
  'subject': 'Psychology',
  'stars': 3,
  'review': 'Dr. Williams knows her subject well, but her lectures can be a bit dry. More interactive sessions would be great.'},
 {'professor': 'Prof. David Martinez',
  'subject': 'History',
  'stars': 4,
  'review': 'Prof. Martinez brings history to life with his storytelling. His exams are tough but fair.'},
 {'professor': 'Dr. Laura Thompson',
  'subject': 'Biology',
  'stars': 5,
  'review': 'Dr. Thompson is an exceptional educator. Her lab sessions are well-organized and her enthus

In [8]:
processed_data = []
client = OpenAI()

for review in data['reviews']:
    response = client.embeddings.create(
        input=review['review'], 
        model="text-embedding-3-small"
    )

    embedding = response.data[0].embedding
    processed_data.append({
        "values": embedding,
        "id": review['professor'],
        "metadata": {
            "subject": review['subject'],
            "review": review['review'],
            "stars": review['stars']
        }
    })

print(processed_data)


[{'values': [-0.012724451, 0.009392628, 0.054968588, 0.032877438, 0.008219359, 0.0063589844, 0.020263186, 0.063006446, -0.0023400544, 0.027976938, 0.021922614, 0.0039022514, -0.013949576, 0.0012380896, -0.012244772, 0.031503223, -0.03777794, -0.005049591, 0.035470296, 0.06451031, 0.02263565, -0.0014050049, 0.041926514, -0.0059311627, -0.03342194, -0.040111512, 0.006086734, 0.01223829, 0.046308443, -0.009600056, 0.07011088, -0.0021893445, -0.026421223, -0.035185084, -0.05056073, 0.042989586, 0.012108647, 0.016036827, 0.026758295, 0.009340771, -0.0078109847, 0.027121294, -0.027043508, -0.0027143983, 0.03611851, 0.012828165, 0.0005254589, -0.012698522, 0.049782872, 0.04711223, -0.030258654, -0.0024259428, 0.0039087334, -0.02738058, -0.041356087, -0.003442019, 0.013806969, 0.0247229, 0.0021958265, -0.022415258, 0.057250306, -0.019848328, -0.01704804, -0.0110002, -0.017968506, -0.028988153, -0.009950093, 0.009269468, -0.031321723, -0.059480164, 0.02275233, 0.030751295, -0.034070153, -0.0015

In [9]:
processed_data[0]


{'values': [-0.012724451,
  0.009392628,
  0.054968588,
  0.032877438,
  0.008219359,
  0.0063589844,
  0.020263186,
  0.063006446,
  -0.0023400544,
  0.027976938,
  0.021922614,
  0.0039022514,
  -0.013949576,
  0.0012380896,
  -0.012244772,
  0.031503223,
  -0.03777794,
  -0.005049591,
  0.035470296,
  0.06451031,
  0.02263565,
  -0.0014050049,
  0.041926514,
  -0.0059311627,
  -0.03342194,
  -0.040111512,
  0.006086734,
  0.01223829,
  0.046308443,
  -0.009600056,
  0.07011088,
  -0.0021893445,
  -0.026421223,
  -0.035185084,
  -0.05056073,
  0.042989586,
  0.012108647,
  0.016036827,
  0.026758295,
  0.009340771,
  -0.0078109847,
  0.027121294,
  -0.027043508,
  -0.0027143983,
  0.03611851,
  0.012828165,
  0.0005254589,
  -0.012698522,
  0.049782872,
  0.04711223,
  -0.030258654,
  -0.0024259428,
  0.0039087334,
  -0.02738058,
  -0.041356087,
  -0.003442019,
  0.013806969,
  0.0247229,
  0.0021958265,
  -0.022415258,
  0.057250306,
  -0.019848328,
  -0.01704804,
  -0.0110002,
  -0

In [10]:
index = pc.Index("rag")
index.upsert(
    vectors=processed_data,
    namespace="ns1"
)


{'upserted_count': 20}

In [11]:
index.describe_index_stats(namespace="ns1")


{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}