In [16]:
from dotenv import load_dotenv
load_dotenv()
import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec

In [13]:
api_key = os.getenv("PINECONE_API_KEY")
pc = Pinecone(api_key)

pc.create_index(
    name = "rag", dimension=1536, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)   


In [15]:
import json
data = json.load(open("reviews.json"))
data['reviews']

[{'professor': 'Dr. Emily Johnson',
  'subject': 'Biology',
  'stars': 4,
  'review': 'Engaging lectures and fair grading. Highly recommended!'},
 {'professor': 'Prof. Michael Chen',
  'subject': 'Computer Science',
  'stars': 5,
  'review': 'Brilliant instructor who makes complex topics easy to understand.'},
 {'professor': 'Dr. Sarah Thompson',
  'subject': 'Psychology',
  'stars': 3,
  'review': 'Interesting material, but assignments can be unclear at times.'},
 {'professor': 'Prof. David Rodriguez',
  'subject': 'Mathematics',
  'stars': 4,
  'review': 'Challenging course, but Dr. Rodriguez is always willing to help.'},
 {'professor': 'Dr. Amelia Patel',
  'subject': 'Chemistry',
  'stars': 5,
  'review': 'Excellent lab sessions and thorough explanations. A fantastic educator!'},
 {'professor': 'Prof. Robert Wilson',
  'subject': 'History',
  'stars': 2,
  'review': 'Lectures are dry and the workload is excessive.'},
 {'professor': 'Dr. Lisa Anderson',
  'subject': 'English Literat

In [17]:
process_data =[]
client = OpenAI()

for review in data['reviews']:
    response = client.embeddings.create(
        input=review['review'],
        model = "text-embedding-3-small",
    )

    embedding = response.data[0].embedding
    process_data.append({
        "values": embedding,
        "id": review["professor"],
        "metadata":{
            "review": review["review"],
            "subject": review["subject"],
            "stars": review["stars"]
        }
    })

In [18]:
process_data[0]

{'values': [-0.043106467,
  0.014164959,
  -0.0317745,
  0.023184137,
  0.039141685,
  0.0013233516,
  -0.020400353,
  -0.003834732,
  -0.0025605888,
  0.032618072,
  0.041447446,
  -0.007124658,
  -0.027275456,
  -0.004702907,
  0.025082171,
  0.013975156,
  -0.020259758,
  -0.00061642175,
  0.026699016,
  0.038832374,
  0.034783237,
  -0.011226521,
  0.036020473,
  0.004414687,
  -0.024885338,
  -0.084694505,
  -0.006186186,
  0.0062178196,
  -0.003217871,
  0.0069207954,
  0.06208681,
  -0.015943488,
  -0.013033168,
  -0.035964236,
  -0.031352714,
  0.064055145,
  0.020597186,
  0.06197433,
  0.027598824,
  -0.021229865,
  0.016393391,
  -0.02165165,
  -0.017799344,
  -0.018839747,
  0.0055289036,
  0.012034943,
  0.026909908,
  -0.030537263,
  0.04293775,
  0.023887113,
  -0.060287192,
  0.003841762,
  0.05519765,
  -0.026952086,
  -0.035654925,
  0.019233413,
  -0.011338998,
  0.035992354,
  0.00030689282,
  -0.00019353798,
  0.030930929,
  0.018938163,
  -0.0017337137,
  0.033630

In [19]:
index = pc.Index('rag')
index.upsert(
    vectors=process_data,
    namespace="ns1"
)


{'upserted_count': 20}

In [20]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}