In [13]:
from dotenv import load_dotenv
load_dotenv()
import os
from pinecone import Pinecone, ServerlessSpec
import google.generativeai as genai

In [14]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="rag", dimension=768, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [56]:
import json
data = json.load(open("reviews.json"))
data["reviews"]

[{'professor': 'Dr. Sarah Thompson',
  'subject': 'Physics',
  'stars': 4,
  'review': 'Engaging lectures and challenging assignments. Dr. Thompson makes complex concepts accessible.'},
 {'professor': 'Prof. Michael Chen',
  'subject': 'Computer Science',
  'stars': 5,
  'review': "Brilliant instructor! Prof. Chen's passion for coding is contagious. Highly recommended."},
 {'professor': 'Dr. Emily Rodriguez',
  'subject': 'Biology',
  'stars': 3,
  'review': 'Knowledgeable but sometimes moves too fast. Office hours are helpful.'},
 {'professor': 'Prof. David Lee',
  'subject': 'Mathematics',
  'stars': 4,
  'review': 'Clear explanations and patient with questions. Homework can be quite difficult.'},
 {'professor': 'Dr. Lisa Patel',
  'subject': 'Chemistry',
  'stars': 4,
  'review': 'Excellent lab instructor. Dr. Patel emphasizes safety and practical applications.'},
 {'professor': 'Prof. John Doe',
  'subject': 'English Literature',
  'stars': 2,
  'review': 'Lectures are dry and assi

In [57]:
processed_data = []
genai.configure(api_key=os.getenv("API_KEY"))

for review in data["reviews"]:
    response = genai.embed_content(
        model="models/text-embedding-004",
        content=review['review'],
    )
    embedding = response["embedding"]
    processed_data.append({
        "values": embedding,
        "id": review["professor"],
        "metadata": {
            "review": review["review"],
            "subject": review["subject"],
            "stars": review["stars"]
        }
    })

In [58]:
processed_data[0]

{'values': [0.037808172,
  0.04678748,
  -0.041641057,
  -0.02479065,
  0.029291766,
  0.0045132143,
  0.03334672,
  0.018155279,
  -0.017182892,
  0.014400714,
  0.06357568,
  0.026951972,
  0.0058171754,
  0.0049397815,
  -0.014280658,
  -0.043240145,
  -0.031903483,
  0.047975257,
  -0.08894821,
  0.01425542,
  0.0044737677,
  -0.04188488,
  0.009126484,
  -0.012664177,
  0.0076171667,
  -0.021249566,
  0.018732546,
  -0.035346154,
  0.038443197,
  0.002043098,
  0.0124998335,
  0.021188758,
  -0.042081613,
  -0.026782118,
  -0.045298256,
  0.0046460144,
  0.01287123,
  -0.00075212267,
  0.020541018,
  -0.03545438,
  0.00045587466,
  -0.033908967,
  -0.04419232,
  -0.009243631,
  -0.06074259,
  -0.0050010006,
  -0.013954398,
  0.08188842,
  -0.010665319,
  0.08019424,
  0.024881111,
  0.026741873,
  -0.07600686,
  0.039044026,
  -0.023643829,
  -0.025965523,
  -0.069621466,
  -0.05559937,
  0.074150704,
  0.018439423,
  -0.06666082,
  -0.014965733,
  -0.040926255,
  -0.09182629,
  0

In [60]:
index = pc.Index('rag')
index.upsert(
    vectors=processed_data,
    namespace="ns1"
)

{'upserted_count': 20}

In [61]:
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [16]:
import json
data = json.load(open("professors.json"))
data["professors"]

[{'professor': 'Dr. Alice Thompson',
  'subjects': ['Algorithms', 'Data Structures', 'Machine Learning'],
  'ratings': 5,
  'email': 'alice.thompson@cs.edu.com',
  'review': 'Dr. Thompson’s lectures are incredibly insightful and engaging. Her method of breaking down complex topics makes them easier to understand, and she fosters a stimulating learning environment.',
  'about': "Dr. Alice Thompson is a highly respected professor at the university, where she has been teaching for over 8 years. With a strong focus on algorithms and machine learning, Dr. Thompson's expertise is evident in her engaging and insightful lectures. She earned her PhD from the prestigious Massachusetts Institute of Technology (MIT), where she conducted groundbreaking research in artificial intelligence. Throughout her academic career, Dr. Thompson has published numerous research papers in top-tier journals, earning her recognition as one of the leading experts in her field. Her ability to simplify complex topics 

In [17]:
processed_data = []
genai.configure(api_key=os.getenv("API_KEY"))

for review in data["professors"]:
    response = genai.embed_content(
        model="models/text-embedding-004",
        content=review['review'],
    )
    embedding = response["embedding"]
    processed_data.append({
        "values": embedding,
        "id": review["professor"],
        "metadata": {
            "review": review["review"],
            "subjects": review["subjects"],
            "ratings": review["ratings"],
            "email": review["email"],
            "review": review["review"],
            "about": review["about"],
            "achievements": review["achievements"],
            "experience": review["experience"],
            "title": review["title"],
            "softSkills": review["softSkills"],
            "officeHours": review["officeHours"],
            "teachingStyle": review["teachingStyle"],
        }
    })

In [7]:
processed_data[0]

{'values': [0.069942385,
  0.013362019,
  -0.033648707,
  -0.02925679,
  0.027547007,
  -0.010117985,
  0.02162155,
  0.030796407,
  -0.014316467,
  -0.008339958,
  0.07182632,
  0.044717662,
  -0.00050951255,
  0.0016501612,
  -0.053573698,
  -0.04344169,
  -0.018067116,
  0.07406821,
  -0.106690265,
  0.030691076,
  -0.015335025,
  -0.050913334,
  0.00458741,
  -0.0054825107,
  0.012874085,
  -0.059890438,
  0.039877675,
  -0.04486681,
  0.058666084,
  -0.019358044,
  0.030163135,
  0.010041933,
  -0.003982178,
  -0.047260456,
  -0.037021734,
  0.0054510953,
  0.022107866,
  -0.031233355,
  0.028024849,
  -0.03223371,
  0.016847357,
  -0.045982737,
  -0.032235537,
  0.0017630924,
  -0.06283581,
  -0.009338265,
  -0.024229119,
  0.08071151,
  0.017951058,
  0.04121557,
  0.03279842,
  0.01895458,
  -0.049416106,
  0.041070446,
  -0.032453693,
  -0.019336933,
  -0.05471865,
  -0.045853727,
  0.05551024,
  -0.010552639,
  -0.057639487,
  -0.0024816387,
  -0.058127515,
  -0.08484874,
  0

In [18]:
index = pc.Index('rag')
index.upsert(
    vectors=processed_data,
    namespace="ns1"
)

{'upserted_count': 19}

In [19]:
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 19}},
 'total_vector_count': 19}