In [7]:
import os
import time
import numpy as np
from typing import List
from dotenv import load_dotenv

# Imports
from pinecone import Pinecone, ServerlessSpec
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.embeddings import Embeddings
from langchain_pinecone import PineconeVectorStore
from huggingface_hub import InferenceClient

# 1. LOAD KEYS
load_dotenv()

hf_api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")
pinecone_api_key = os.getenv("PINECONE_API_KEY")

if not all([hf_api_key, pinecone_api_key]):
    raise ValueError("Error: Missing keys in .env file.")

print("Keys loaded successfully.")

# 2. LOAD & SPLIT DOCUMENTS
print("Loading PDFs...")
loader = PyPDFDirectoryLoader("../pdfs/") 
documents = loader.load()

if not documents:
    print("Warning: No documents found in ../pdfs/")
    final_documents = []
else:
    print(f"Loaded {len(documents)} documents.")
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    final_documents = text_splitter.split_documents(documents)
    print(f"Split into {len(final_documents)} chunks.")

# 3. SETUP EMBEDDINGS (YOUR WORKING CLASS)
class RobustHuggingFaceEmbeddings(Embeddings):
    def __init__(self, api_key, model_name="sentence-transformers/all-MiniLM-L6-v2"):
        self.model_name = model_name
        self.client = InferenceClient(token=api_key)

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        embeddings_list = []
        for text in texts:
            try:
                response = self.client.feature_extraction(text, model=self.model_name)
                if isinstance(response, np.ndarray):
                    if response.ndim == 2:
                        embedding = np.mean(response, axis=0).tolist()
                    elif response.ndim == 1:
                        embedding = response.tolist()
                    else:
                        raise ValueError(f"Unexpected array dimensions: {response.ndim}")
                    embeddings_list.append(embedding)
                elif isinstance(response, list):
                    if len(response) > 0 and isinstance(response[0], list):
                         embedding = np.mean(response, axis=0).tolist()
                    else:
                         embedding = response
                    embeddings_list.append(embedding)
                else:
                    raise ValueError(f"Unexpected response type: {type(response)}")
            except Exception as e:
                print(f"Error embedding text: {e}")
                raise
        return embeddings_list

    def embed_query(self, text: str) -> List[float]:
        result = self.embed_documents([text])
        return result[0]

print("Connecting to Hugging Face Embeddings...")
embeddings = RobustHuggingFaceEmbeddings(
    api_key=hf_api_key,
    model_name="sentence-transformers/all-MiniLM-L6-v2" 
)

# 4. TEST EMBEDDINGS
query_text = "Hello, world!"
vector = embeddings.embed_query(query_text)
print(f"Generated vector of length: {len(vector)}")

# ---------------------------------------------------------
# 6. PINECONE SETUP (UPDATED: CLEARS DB FIRST)
# ---------------------------------------------------------
index_name = "groq-rag-demo"
pc = Pinecone(api_key=pinecone_api_key)

existing_indexes = [index.name for index in pc.list_indexes()]

if index_name not in existing_indexes:
    print(f"Creating index '{index_name}'...")
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )
    # Wait for index to initialize
    while not pc.describe_index(index_name).status['ready']:
        time.sleep(1)
    time.sleep(2)
else:
    print(f"Index '{index_name}' already exists.")
    # --- NEW CODE: CLEAR INDEX ---
    print("‚ö†Ô∏è Clearing all existing vectors to prevent duplication...")
    index = pc.Index(index_name)
    index.delete(delete_all=True)
    print("‚úÖ Index cleared.")

# 7. UPLOAD TO PINECONE
if final_documents:
    print("Uploading vectors to Pinecone...")
    vectorstore = PineconeVectorStore.from_documents(
        documents=final_documents,
        embedding=embeddings,
        index_name=index_name
    )
    print("Upload complete!")
else:
    print("No documents to upload (Index is now empty).")
    vectorstore = PineconeVectorStore.from_existing_index(
        index_name=index_name,
        embedding=embeddings
    )

# ---------------------------------------------------------
# 8. SIMILARITY SEARCH IMPLEMENTATION
# ---------------------------------------------------------
query = "What is the summary of the document?"
print(f"\nüîç Performing Similarity Search for: '{query}'")

# Method A: Standard Similarity Search (Get Top 3)
results = vectorstore.similarity_search(query, k=3)

print("\n--- üìÑ Standard Search Results ---")
for i, doc in enumerate(results):
    print(f"\n[Result {i+1}]")
    print(doc.page_content[:200] + "...") 

# Method B: Similarity Search with Scores (Cosine Similarity)
results_with_scores = vectorstore.similarity_search_with_score(query, k=3)

print("\n--- üìä Search Results with Scores ---")
for i, (doc, score) in enumerate(results_with_scores):
    print(f"\n[Result {i+1}] Score: {score:.4f}")
    print(doc.page_content[:200] + "...")

print("\n‚úÖ Search Complete.")

Keys loaded successfully.
Loading PDFs...
Loaded 15 documents.
Split into 52 chunks.
Connecting to Hugging Face Embeddings...
Generated vector of length: 384
Index 'groq-rag-demo' already exists.
‚ö†Ô∏è Clearing all existing vectors to prevent duplication...
‚úÖ Index cleared.
Uploading vectors to Pinecone...
Upload complete!

üîç Performing Similarity Search for: 'What is the summary of the document?'

--- üìÑ Standard Search Results ---

[Result 1]
[25] Mitchell P Marcus, Mary Ann Marcinkiewicz, and Beatrice Santorini. Building a large annotated
corpus of english: The penn treebank. Computational linguistics, 19(2):313‚Äì330, 1993.
[26] David McCl...

[Result 2]
3.2 Attention
An attention function can be described as mapping a query and a set of key-value pairs to an output,
where the query, keys, values, and output are all vectors. The output is computed as ...

[Result 3]
Table 3: Variations on the Transformer architecture. Unlisted values are identical to those of the base
model