In [59]:
from dotenv import load_dotenv
load_dotenv(".env.local")
import os
import google.generativeai as gemini
from pinecone import Pinecone, ServerlessSpec
# Import successful
print("Import successful!")

# Initialized the Google Gemini API client
api_key = os.getenv("GEM_API_KEY")
gemini.configure(api_key=api_key)

print("Executed successful!")



Import successful!
Executed successful!


In [68]:
# Initialize Pinecone with the API key                                                                                                                   n
api_key = os.getenv("PINECONE_API_KEY")
pc = Pinecone(api_key=api_key)

# Delete the existing index if so
pc.delete_index("rag")

# Now create the new index
pc.create_index(
    name="rag",
    dimension=768,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)

In [69]:
import json
data = json.load(open("reviews.json"))
data['reviews']

[{'professor': 'Dr. Emily Carter',
  'subject': 'Physics',
  'stars': 5,
  'review': 'Great professor, very clear explanations.'},
 {'professor': 'Dr. John Smith',
  'subject': 'Mathematics',
  'stars': 4,
  'review': 'Good lectures, but assignments are tough.'},
 {'professor': 'Dr. Alice Wong',
  'subject': 'Computer Science',
  'stars': 3,
  'review': 'Sometimes difficult to follow, but overall knowledgeable.'},
 {'professor': 'Dr. Michael Thompson',
  'subject': 'Chemistry',
  'stars': 5,
  'review': 'Engaging and passionate about the subject.'},
 {'professor': 'Dr. Sarah Lee',
  'subject': 'Biology',
  'stars': 2,
  'review': 'The class was hard, and the lectures were boring.'},
 {'professor': 'Dr. David Brown',
  'subject': 'History',
  'stars': 4,
  'review': 'Interesting lectures, but too much reading.'},
 {'professor': 'Dr. Jessica Green',
  'subject': 'English Literature',
  'stars': 5,
  'review': 'Fantastic professor, very approachable.'},
 {'professor': 'Dr. Robert Johnson'

In [70]:
processed_data = []
client = gemini

reviews_list = data["reviews"]

model_name = "models/embedding-gecko-001"

for review in reviews_list:
    try:
        response = client.generate_embeddings(
            model=model_name,
            text=review["review"]
        )

        print(f"Response for {review['professor']}: {response}")

        embedding = response["embedding"]

        processed_data.append({
            "values": embedding,
            "id": review["professor"],
            "metadata": {
                "review": review["review"],
                "subject": review["subject"],
                "stars": review["stars"],
            }
        })
    except Exception as e:
        print(f"Error processing review for {review['professor']}: {e}")


Response for Dr. Emily Carter: {'embedding': [-0.012160118, -0.03821817, -0.04508161, 0.0673221, -0.022399396, -0.036765702, 0.053459223, -0.016499247, -0.054476965, -0.007054707, 0.014444655, 0.014219714, -0.042614445, 0.016135778, -0.018666465, -0.046304867, -0.098454244, -0.0483679, 0.008696982, -0.003992415, -0.06796401, -0.020842759, 0.026407298, 0.012933466, -0.0058151167, -0.09981331, -0.010703949, 0.03576243, -0.015184365, 0.0044371015, 0.020314075, 0.019419309, -0.051834486, -0.000421056, 0.02396164, 0.024324782, -0.029344607, 0.054693766, 0.023208247, 0.04337092, -0.012766319, -0.0066432618, 0.055445924, -0.020309309, -0.01038053, -0.02779823, -0.025178155, 0.059009608, 0.0065448517, -0.008251002, -0.013810188, -0.08606493, 0.056464452, 0.011896319, -0.003667951, 0.014186119, -0.027732233, -0.038950946, -0.04616642, -0.019397376, 0.012113541, -0.007820745, -0.010637909, -0.09922318, -0.030973228, 0.03998603, 0.06311092, -0.012825049, 0.0043581612, -0.0022907972, -0.0044720084

In [72]:
processed_data[10]

{'values': [-0.02673015,
  0.018193176,
  -0.010784866,
  -0.027833618,
  -0.0021705239,
  -0.02461549,
  0.013317454,
  0.013534869,
  -0.035647053,
  -0.021709675,
  0.017479483,
  -0.007538037,
  0.028689781,
  0.0017066764,
  -0.03311783,
  0.00788241,
  -0.042055763,
  -0.019975735,
  -0.028973715,
  0.015860816,
  -0.09737537,
  0.011722332,
  0.00043172753,
  0.01675223,
  -0.009384432,
  -0.079113185,
  0.017864566,
  0.014205064,
  -0.014470407,
  -0.02072885,
  -0.002309446,
  0.015553041,
  -0.032243364,
  -0.048198402,
  0.027635053,
  0.05567045,
  0.02607508,
  -0.026165532,
  0.028153725,
  0.019846229,
  -0.014906889,
  -0.014527309,
  0.004532337,
  -0.014498893,
  -0.015686296,
  -0.0050893794,
  -0.025684895,
  0.033473056,
  0.0022862775,
  -0.0427758,
  -0.02177891,
  -0.009254305,
  0.011105654,
  0.013801726,
  -0.039595097,
  0.029340224,
  -0.07280034,
  -0.092238575,
  -0.030854115,
  -0.010993525,
  0.036326863,
  -0.0063288813,
  -0.056494467,
  -0.0854439,


In [73]:
index=pc.Index("rag")
index.upsert(
    vectors=processed_data,
    namespace='ms1')

{'upserted_count': 20}

In [74]:
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {'ms1': {'vector_count': 20}},
 'total_vector_count': 20}

In [75]:
# To list available models
try:
    model_list = list(client.list_models())
    print("Available model names:")
    for model in model_list:
        print(model.name)
except Exception as e:
    print(f"Error listing models: {e}")


Available model names:
models/chat-bison-001
models/text-bison-001
models/embedding-gecko-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro
models/gemini-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro
models/gemini-1.5-pro-exp-0801
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash
models/gemini-1.5-flash-001-tuning
models/embedding-001
models/text-embedding-004
models/aqa
