In [8]:
from dotenv import load_dotenv
load_dotenv()
import os
from openai import OpenAI,embeddings
from pinecone import Pinecone, ServerlessSpec

In [9]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="rag", dimension=1536, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [21]:
import json 
data = json.load(open("reviews.json"))
data['reviews']

[{'professor': 'David Cantor',
  'subject': 'Behavioral Operations Management',
  'stars': 5,
  'review': "Professor Cantor's classes are highly engaging. His real-world examples from motor carrier safety make complex concepts accessible and relevant.",
  'research': ['Environmental Management',
   'Motor Carrier Safety',
   'Behavioral Operations Management']},
 {'professor': 'Haozhe Chen',
  'subject': 'International Logistics',
  'stars': 5,
  'review': "Dr. Chen's lectures are comprehensive and provide a global perspective on supply chain management. His teaching style is particularly praised by students.",
  'research': ['Supply chain integration',
   'Supply chain relationships',
   'International logistics',
   'Reverse logistics']},
 {'professor': 'Meltem Denizel',
  'subject': 'Process Analysis',
  'stars': 4,
  'review': "Professor Denizel's hands-on approach to teaching process analysis is highly valued. Her courses offer practical insights directly applicable to industry ch

In [22]:
processed_data = []
client = OpenAI()

for review in data["reviews"]:
    response = client.embeddings.create(
        input=review['review'], model="text-embedding-3-small"
    )
    embedding = response.data[0].embedding
    processed_data.append(
        {
            "values": embedding,
            "id": review["professor"],
            "metadata":{
                "review": review["review"],
                "subject": review["subject"],
                "stars": review["stars"],
                "research": review["research"],
            }
        }
    )



In [27]:
processed_data[0]

{'values': [0.013308416,
  -0.006887905,
  0.0077119935,
  0.023947768,
  0.011512641,
  0.005335051,
  0.011592589,
  0.06410671,
  0.00579937,
  0.012754924,
  0.030060785,
  0.0064820102,
  -0.022582488,
  0.027428621,
  0.005636397,
  0.016444873,
  -0.050355505,
  -0.027256424,
  0.063860714,
  0.06868225,
  0.055988826,
  -0.016014379,
  0.028658604,
  -0.01730586,
  -0.024747258,
  -0.05505404,
  0.015780682,
  0.014144804,
  0.033283338,
  -0.0074905963,
  0.060957957,
  0.0055779726,
  -0.0360631,
  -0.019950325,
  -0.004406414,
  0.022201193,
  -0.0013806559,
  0.0020955836,
  0.003188731,
  0.006641908,
  0.035620306,
  -0.019236933,
  -0.026297037,
  0.00016681645,
  0.059629574,
  0.021414004,
  0.013665111,
  -0.014821295,
  0.0433692,
  0.05001111,
  -0.03812947,
  0.0124043785,
  0.0040927683,
  -0.018511243,
  -0.026174039,
  -0.013406815,
  -0.0062544635,
  0.036259897,
  -0.03694869,
  -0.01875724,
  0.039187256,
  0.0020187097,
  -0.017490357,
  -0.008511483,
  -0.0

In [23]:
index = pc.Index('rag')
index.upsert(
    vectors=processed_data,
    namespace="ISU Suply chain management professors"
)

{'upserted_count': 8}

In [24]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ISU Suply chain management professors': {'vector_count': 8}},
 'total_vector_count': 8}