In [1]:
from dotenv import load_dotenv
load_dotenv('.env.local')
from pinecone import Pinecone, ServerlessSpec
import os
import json
# import anthropic
import groq

  from tqdm.autonotebook import tqdm


In [2]:
# pip install anthropic

In [3]:
import os
from pinecone import Pinecone, ServerlessSpec

api_key = os.getenv("PINECONE_API_KEY")

pc = Pinecone(api_key=api_key)

if 'rag' in pc.list_indexes().names():
    print("Deleting existing 'rag' index...")
    pc.delete_index("rag")

print("Creating new 'rag' index...")
pc.create_index(
    name='rag',
    dimension=1536, 
    metric='cosine',
    spec=ServerlessSpec(
        cloud='aws',
        region='us-east-1'
    )
)

# Verify the new index
index = pc.Index("rag")
index_stats = index.describe_index_stats()
print(f"New index dimension: {index_stats.dimension}")

Deleting existing 'rag' index...
Creating new 'rag' index...
New index dimension: 1536


In [5]:
import json
import os
import time
from groq import Groq

# Check if the API key is set
if not os.getenv("GROQ_API_KEY"):
    raise ValueError("GROQ_API_KEY environment variable is not set")

# Initialize GROQ client
groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))

def get_embedding(text):
    try:
        response = groq_client.chat.completions.create(
            model="llama-3.1-70b-versatile",
            messages=[
                {"role": "system", "content": "You are an AI assistant that generates embeddings. For the given input, respond with a comma-separated list of exactly 1536 floating-point numbers representing the embedding."},
                {"role": "user", "content": f"Generate an embedding for the following text: {text}"}
            ],
            temperature=0,
            max_tokens=3072,
            timeout=30  # Set a 30-second timeout
        )

        # Extract the content from the response
        embedding_string = response.choices[0].message.content.strip()

        # Attempt to split the response into a list of numbers
        embedding = [float(x) for x in embedding_string.split(',') if x.strip()]

        # Ensure we have exactly 1536 dimensions
        if len(embedding) < 1536:
            embedding.extend([0.0] * (1536 - len(embedding)))
        elif len(embedding) > 1536:
            embedding = embedding[:1536]

        return embedding
    except ValueError as e:
        print(f"Error processing embedding: {e}")
        print(f"Received string: {embedding_string}")
        return [0.0] * 1536  # Return a zero vector in case of failure
    except Exception as e:
        print(f"Unexpected error: {e}")
        return [0.0] * 1536  # Return a zero vector in case of failure

def get_embedding_with_retry(text, max_retries=3):
    for attempt in range(max_retries):
        try:
            return get_embedding(text)
        except Exception as e:
            print(f"Error on attempt {attempt + 1}: {e}")
            if attempt < max_retries - 1:
                time.sleep(2 ** attempt)  # Exponential backoff
            else:
                raise

# Load your data
with open("reviews.json") as f:
    data = json.load(f)

# Process data and generate embeddings
processed_data = []
for i, review in enumerate(data["reviews"]):
    print(f"Processing review {i+1}/{len(data['reviews'])}")
    embedding = get_embedding_with_retry(review['review'])
    processed_data.append(
        {
            "values": embedding,
            "id": review["professor"],
            "metadata": {
                "review": review["review"],
                "subject": review["subject"],
                "stars": review["stars"],
            }
        }
    )

Processing review 1/20
Processing review 2/20
Processing review 3/20
Processing review 4/20
Processing review 5/20
Processing review 6/20
Processing review 7/20
Processing review 8/20
Processing review 9/20
Processing review 10/20
Processing review 11/20
Processing review 12/20
Processing review 13/20
Processing review 14/20
Processing review 15/20
Processing review 16/20
Processing review 17/20
Processing review 18/20
Processing review 19/20
Processing review 20/20


In [6]:
processed_data[0]

{'values': [0.0123,
  0.0456,
  0.0987,
  0.1234,
  0.1567,
  0.189,
  0.2345,
  0.2678,
  0.3011,
  0.3456,
  0.3789,
  0.4123,
  0.4567,
  0.5012,
  0.5345,
  0.5678,
  0.6123,
  0.6456,
  0.6789,
  0.7234,
  0.7567,
  0.789,
  0.8345,
  0.8678,
  0.9011,
  0.9456,
  0.9789,
  0.0123,
  0.0456,
  0.0987,
  0.1234,
  0.1567,
  0.189,
  0.2345,
  0.2678,
  0.3011,
  0.3456,
  0.3789,
  0.4123,
  0.4567,
  0.5012,
  0.5345,
  0.5678,
  0.6123,
  0.6456,
  0.6789,
  0.7234,
  0.7567,
  0.789,
  0.8345,
  0.8678,
  0.9011,
  0.9456,
  0.9789,
  0.0123,
  0.0456,
  0.0987,
  0.1234,
  0.1567,
  0.189,
  0.2345,
  0.2678,
  0.3011,
  0.3456,
  0.3789,
  0.4123,
  0.4567,
  0.5012,
  0.5345,
  0.5678,
  0.6123,
  0.6456,
  0.6789,
  0.7234,
  0.7567,
  0.789,
  0.8345,
  0.8678,
  0.9011,
  0.9456,
  0.9789,
  0.0123,
  0.0456,
  0.0987,
  0.1234,
  0.1567,
  0.189,
  0.2345,
  0.2678,
  0.3011,
  0.3456,
  0.3789,
  0.4123,
  0.4567,
  0.5012,
  0.5345,
  0.5678,
  0.6123,
  0.6456,
  0.678

In [7]:
response = groq_client.models.list()
print(response)

ModelListResponse(data=[Model(id='distil-whisper-large-v3-en', created=1693721698, object='model', owned_by='Groq', active=True, context_window=1500, public_apps=None), Model(id='gemma2-9b-it', created=1693721698, object='model', owned_by='Google', active=True, context_window=8192, public_apps=None), Model(id='gemma-7b-it', created=1693721698, object='model', owned_by='Google', active=True, context_window=8192, public_apps=None), Model(id='llama-3.1-70b-versatile', created=1693721698, object='model', owned_by='Meta', active=True, context_window=131072, public_apps=None), Model(id='llama-3.1-8b-instant', created=1693721698, object='model', owned_by='Meta', active=True, context_window=131072, public_apps=None), Model(id='llama3-70b-8192', created=1693721698, object='model', owned_by='Meta', active=True, context_window=8192, public_apps=None), Model(id='llama3-8b-8192', created=1693721698, object='model', owned_by='Meta', active=True, context_window=8192, public_apps=None), Model(id='llam

In [8]:
import time
index = pc.Index("rag")
upsert_response = index.upsert(
    vectors=processed_data,
    namespace="ns1",
)
print(f"Upserted count: {upsert_response['upserted_count']}")

# Wait for a few seconds
time.sleep(5)

# Check stats again
print(index.describe_index_stats())

Upserted count: 20
{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}


In [9]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}