## 🔄 Step 5: Re-ranking with Cohere

Now we'll implement re-ranking using Cohere's cross-encoder model to improve retrieval quality.

**Progress**: Setting up Cohere re-ranker and implementing re-ranking function...

In [7]:
# Re-ranking with Cohere
import cohere
from langchain_pinecone import PineconeVectorStore
from langchain_community.embeddings import SentenceTransformerEmbeddings
import numpy as np
import os
from dotenv import load_dotenv
import re
from typing import Dict, List, Tuple
import uuid
from pinecone import Pinecone
from langchain_openai import ChatOpenAI

# Load environment variables from .env file
load_dotenv()

# Verify environment variables are loaded
required_vars = ['PINECONE_API_KEY', 'PINECONE_INDEX', 'PINECONE_URL', 'OPENAI_API_KEY','COHERE_API_KEY']

print("Environment Variables Status:")
print("-" * 30)
for var in required_vars:
    value = os.getenv(var)
    if value:
        print(f"✅ {var}: Set")
    else:
        print(f"❌ {var}: Missing")

# Check if all required variables are present
missing_vars = [var for var in required_vars if not os.getenv(var)]

if missing_vars:
    print(f"\n❌ Missing variables: {missing_vars}")
    print("Please create a .env file and add all required variables")
else:
    print(f"\n🎉 All environment variables loaded successfully!")
    print(f"📋 Pinecone Index: {os.getenv('PINECONE_INDEX')}")




print("✅ OpenAI LLM initialized successfully")
print(f"🤖 Model: gpt-4o-mini")
print(f"🌡️ Temperature: 0.1")
print(f"📡 Streaming: False")

# Get OpenAI API key
openai_api_key = os.getenv("OPENAI_API_KEY")

if not openai_api_key:
    raise ValueError("OPENAI_API_KEY not found in environment variables or .env file")

print("✅ OpenAI API key loaded successfully")


llm = ChatOpenAI(
    model="gpt-4o-mini",  # Using GPT-4o-mini for cost efficiency
    openai_api_key=openai_api_key,
    temperature=0.1,
    streaming=False
)
# Initialize Pinecone
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index_name = os.getenv("PINECONE_INDEX")
index = pc.Index(index_name)

# Initialize embedding model
embedding_model = SentenceTransformerEmbeddings(
    model_name='intfloat/multilingual-e5-large'
)

# Create VectorStore
vectorstore = PineconeVectorStore(
    index=index,
    embedding=embedding_model
)

# Create retriever with similarity search and k=5
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 5}
)


def get_rag_answer(query: str, retriever, llm) -> str:
    """
    Retrieve relevant chunks and generate answer using DeepSeek LLM

    Args:
        query: User's question
        retriever: Pinecone retriever or custom retrieval function
        llm: DeepSeek LLM instance

    Returns:
        Generated answer based on retrieved context
    """

    # Method 1: Try using LangChain retriever first
    try:
        query_embedding = embedding_model.embed_query(query)
        response = index.query(
            vector=query_embedding,
            top_k=5,
            include_metadata=True,
            include_values=False
        )
        chunks = []
        metadata_list = []
        for match in response.matches:
            chunk_text = match.metadata.get('chunk_text', '')
            if chunk_text:
                chunks.append(chunk_text)
                metadata_list.append(match.metadata)
        print(f"✅ Retrieved {len(chunks)} chunks using direct Pinecone query")
        if not chunks:
          return "No relevant information found in the database."
    except Exception as e:
         print("Exception",e)
    # Print retrieved metadata for transparency
    print("\n📋 RETRIEVED CHUNKS METADATA:")
    print("-" * 50)
    for i, metadata in enumerate(metadata_list, 1):
        company = metadata.get('company', 'Unknown').replace(' (1)', '')
        year = metadata.get('year', 'Unknown')
        chunk_id = metadata.get('chunk_id', 'Unknown')
        source = metadata.get('section', 'Unknown')
        chunk_text=metadata.get('chunk_text', 'Unknown')
        print(f"Chunk {i}: {company.title()} ({year}) - ID: {chunk_id} - {source}")
        #print(f"Chunk text {i}: {chunk_text}")

    # Combine chunks into context
    context = "\n\n".join([f"Document {i+1}:\n{chunk}" for i, chunk in enumerate(chunks)])

    # Create prompt for DeepSeek
    prompt = f"""Based on the following documents, please answer the user's question accurately and comprehensively.

QUESTION: {query}

CONTEXT DOCUMENTS:
{context}

INSTRUCTIONS:
- Use only the information provided in the context documents
- If the information is not sufficient to answer the question, state this clearly
- Provide specific details and numbers when available
- Structure your answer clearly and concisely
- If data spans multiple years or sources, organize it logically

ANSWER:"""

    # Send to DeepSeek LLM
    try:
        response = llm.invoke(prompt)
        answer = response.content.strip()

        print(f"\n🤖 DeepSeek LLM Response Generated ({len(answer)} characters)")
        return answer

    except Exception as e:
        return f"Error generating answer with DeepSeek: {str(e)}"



def get_rag_answer_with_cohere_rerank(query: str, retriever, llm) -> str:
    """
    Retrieve relevant chunks, re-rank them using Cohere, and generate answer using OpenAI LLM

    Args:
        query: User's question
        retriever: Pinecone retriever or custom retrieval function
        llm: OpenAI LLM instance

    Returns:
        Generated answer based on re-ranked retrieved context
    """

    # Initialize Cohere client
    try:
        cohere_api_key = os.getenv("COHERE_API_KEY")
        if not cohere_api_key:
            return "COHERE_API_KEY not found in environment variables"

        co = cohere.Client(cohere_api_key)
    except Exception as e:
        return f"Error initializing Cohere client: {str(e)}"

    # Method 1: Retrieve the chunks
    try:
        query_embedding = embedding_model.embed_query(query)
        response = index.query(
            vector=query_embedding,
            top_k=10,  # Get more chunks initially for re-ranking
            include_metadata=True,
            include_values=False
        )
        chunks = []
        metadata_list = []
        documents_for_rerank = []

        for match in response.matches:
            chunk_text = match.metadata.get('chunk_text', '')
            if chunk_text:
                chunks.append(chunk_text)
                metadata_list.append(match.metadata)
                documents_for_rerank.append(chunk_text)

        print(f"✅ Retrieved {len(chunks)} chunks using direct Pinecone query")

        if not chunks:
            return "No relevant information found in the database."

    except Exception as e:
        print("Exception", e)
        return f"Error during retrieval: {str(e)}"

    # Print chunks BEFORE re-ranking
    print("\n📋 CHUNKS BEFORE RE-RANKING:")
    print("-" * 50)
    for i, metadata in enumerate(metadata_list, 1):
        company = metadata.get('company', 'Unknown').replace(' (1)', '')
        year = metadata.get('year', 'Unknown')
        chunk_id = metadata.get('chunk_id', 'Unknown')
        source = metadata.get('section', 'Unknown')
        print(f"Chunk {i}: {company.title()} ({year}) - ID: {chunk_id} - {source}")

    # Re-rank using Cohere
    try:
        rerank_response = co.rerank(
            model='rerank-english-v3.0',
            query=query,
            documents=documents_for_rerank,
            top_n=5,
            return_documents=True
        )

        # Get re-ranked chunks and their metadata
        reranked_chunks = []
        reranked_metadata = []

        for result in rerank_response.results:
            original_index = result.index
            reranked_chunks.append(chunks[original_index])
            reranked_metadata.append(metadata_list[original_index])

        print(f"✅ Re-ranked to top {len(reranked_chunks)} most relevant chunks")

    except Exception as e:
        print(f"Exception during re-ranking: {e}")
        # Fallback to original chunks if re-ranking fails
        reranked_chunks = chunks[:5]
        reranked_metadata = metadata_list[:5]

    # Print chunks AFTER re-ranking
    print("\n📋 CHUNKS AFTER RE-RANKING:")
    print("-" * 50)
    for i, metadata in enumerate(reranked_metadata, 1):
        company = metadata.get('company', 'Unknown').replace(' (1)', '')
        year = metadata.get('year', 'Unknown')
        chunk_id = metadata.get('chunk_id', 'Unknown')
        source = metadata.get('section', 'Unknown')
        chunk_text = metadata.get('chunk_text', 'Unknown')
        print(f"Chunk {i}: {company.title()} ({year}) - ID: {chunk_id} - {source}")
        #print(f"Chunk text {i}: {chunk_text}")

    # Combine chunks into context
    context = "\n\n".join([f"Document {i+1}:\n{chunk}" for i, chunk in enumerate(reranked_chunks)])

    # Create prompt for OpenAI
    prompt = f"""Based on the following documents, please answer the user's question accurately and comprehensively.

QUESTION: {query}

CONTEXT DOCUMENTS:
{context}

INSTRUCTIONS:
- Use only the information provided in the context documents
- These documents were selected using both semantic similarity and keyword relevance
- If the information is not sufficient to answer the question, state this clearly
- Provide specific details and numbers when available
- Structure your answer clearly and concisely
- If data spans multiple years or sources, organize it logically

ANSWER:"""

    # Send to OpenAI LLM
    try:
        response = llm.invoke(prompt)
        answer = response.content.strip()

        print(f"\n🤖 OpenAI LLM Response Generated ({len(answer)} characters)")
        return answer

    except Exception as e:
        return f"Error generating answer with OpenAI: {str(e)}"

def evaluate_answers(answer1: str, answer2: str, llm, query: str = None) -> str:
    """
    Evaluate and compare two answers using LLM to determine which is better

    Args:
        answer1: Answer generated without re-ranking
        answer2: Answer generated with Cohere re-ranking
        llm: OpenAI LLM instance for evaluation
        query: Original query (optional, for context)

    Returns:
        Detailed comparison and evaluation from the LLM
    """

    # Create evaluation prompt
    prompt = f"""You are an expert evaluator tasked with comparing two AI-generated answers to determine which one is better. Please analyze both answers carefully and provide a detailed comparison.

{f"ORIGINAL QUERY: {query}" if query else ""}

ANSWER 1 (Without Re-ranking):
{answer1}

ANSWER 2 (With Re-ranking):
{answer2}

EVALUATION CRITERIA:
Please evaluate both answers based on the following criteria and provide a detailed analysis:

1. **ACCURACY & FACTUAL CORRECTNESS**
   - Which answer contains more accurate information?
   - Are there any factual errors or inconsistencies?

2. **COMPLETENESS & COMPREHENSIVENESS**
   - Which answer provides more complete coverage of the topic?
   - Does one answer miss important aspects that the other covers?

3. **RELEVANCE & FOCUS**
   - Which answer stays more focused on the specific question asked?
   - Does one contain more irrelevant or tangential information?

4. **CLARITY & ORGANIZATION**
   - Which answer is clearer and easier to understand?
   - How well is the information structured and organized?

5. **SPECIFIC DETAILS & EVIDENCE**
   - Which answer provides more specific details, numbers, or concrete examples?
   - How well does each answer support its claims with evidence?

6. **OVERALL QUALITY & USEFULNESS**
   - Which answer would be more helpful to someone seeking this information?
   - Consider the practical value and actionability of each response.

COMPARISON FORMAT:
Please structure your evaluation as follows:

**WINNER: [Answer 1 / Answer 2 / Tie]**

**DETAILED ANALYSIS:**

**Accuracy & Factual Correctness:**
- Answer 1: [Analysis]
- Answer 2: [Analysis]
- Winner: [Answer 1/Answer 2/Tie] - [Brief reason]

**Completeness & Comprehensiveness:**
- Answer 1: [Analysis]
- Answer 2: [Analysis]
- Winner: [Answer 1/Answer 2/Tie] - [Brief reason]

**Relevance & Focus:**
- Answer 1: [Analysis]
- Answer 2: [Analysis]
- Winner: [Answer 1/Answer 2/Tie] - [Brief reason]

**Clarity & Organization:**
- Answer 1: [Analysis]
- Answer 2: [Analysis]
- Winner: [Answer 1/Answer 2/Tie] - [Brief reason]

**Specific Details & Evidence:**
- Answer 1: [Analysis]
- Answer 2: [Analysis]
- Winner: [Answer 1/Answer 2/Tie] - [Brief reason]

**KEY DIFFERENCES:**
- [List 3-5 most significant differences between the answers]

**FINAL VERDICT:**
- Overall Winner: [Answer 1/Answer 2/Tie]
- Confidence Level: [High/Medium/Low]
- Main Reasons: [2-3 key reasons for the decision]

**RECOMMENDATIONS:**
- [Suggestions for improving the weaker answer or both answers]

Be objective, thorough, and specific in your analysis. Focus on concrete differences rather than general statements."""

    # Send to LLM for evaluation
    try:
        response = llm.invoke(prompt)
        evaluation = response.content.strip()

        print(f"\n🔍 Answer Evaluation Completed ({len(evaluation)} characters)")
        print("\n" + "="*80)
        print("📊 ANSWER COMPARISON EVALUATION")
        print("="*80)
        print(evaluation)
        print("="*80)

        return evaluation

    except Exception as e:
        return f"Error during answer evaluation: {str(e)}"

# Test the re-ranking function
print("🧪 Testing re-ranking function...")
test_query = "Summarize Amazon R&D spending in 2024"
answer = get_rag_answer(test_query, retriever, llm)
print("\n" + "="*80)
print("🎯 BASIC RAG ANSWER:")
print("="*80)
print(answer)
print("="*80)

answer_with_rerank = get_rag_answer_with_cohere_rerank(test_query, retriever, llm)
print("\n" + "="*80)
print("🎯 RE-RANKED RAG ANSWER:")
print("="*80)
print(answer_with_rerank)
print("="*80)

# Evaluate the answers
evaluate_answers(answer, answer_with_rerank, llm, test_query)

print(f"\n✅ Step 5 Complete: Re-ranking implementation finished!")

Environment Variables Status:
------------------------------
✅ PINECONE_API_KEY: Set
✅ PINECONE_INDEX: Set
✅ PINECONE_URL: Set
✅ OPENAI_API_KEY: Set
✅ COHERE_API_KEY: Set

🎉 All environment variables loaded successfully!
📋 Pinecone Index: advance-rag
✅ OpenAI LLM initialized successfully
🤖 Model: gpt-4o-mini
🌡️ Temperature: 0.1
📡 Streaming: False
✅ OpenAI API key loaded successfully
🧪 Testing re-ranking function...


  return forward_call(*args, **kwargs)


✅ Retrieved 5 chunks using direct Pinecone query

📋 RETRIEVED CHUNKS METADATA:
--------------------------------------------------
Chunk 1: Tesla (2024.0) - ID: tesla_2024_financial_statements_tesla_2024_financial_statements_530 - Financial Statements
Chunk 2: Tesla (2024.0) - ID: tesla_2024_financial_statements_tesla_2024_financial_statements_530 - Financial Statements
Chunk 3: Tesla (2024.0) - ID: tesla_2024_financial_statements_tesla_2024_financial_statements_530 - Financial Statements
Chunk 4: Nvidia (2024.0) - ID: nvidia_2024_financial_statements_nvidia_2024_business_635 - Business
Chunk 5: Nvidia (2024.0) - ID: nvidia_2024_financial_statements_nvidia_2024_business_635 - Business

🤖 DeepSeek LLM Response Generated (553 characters)

🎯 BASIC RAG ANSWER:
The context documents do not provide specific information regarding Amazon's R&D spending in 2024. However, they do mention that during fiscal year 2024, Amazon spent $1.1 billion on capital expenditures. There is no direct mention of

  return forward_call(*args, **kwargs)


✅ Retrieved 10 chunks using direct Pinecone query

📋 CHUNKS BEFORE RE-RANKING:
--------------------------------------------------
Chunk 1: Tesla (2024.0) - ID: tesla_2024_financial_statements_tesla_2024_financial_statements_530 - Financial Statements
Chunk 2: Tesla (2024.0) - ID: tesla_2024_financial_statements_tesla_2024_financial_statements_530 - Financial Statements
Chunk 3: Tesla (2024.0) - ID: tesla_2024_financial_statements_tesla_2024_financial_statements_530 - Financial Statements
Chunk 4: Nvidia (2024.0) - ID: nvidia_2024_financial_statements_nvidia_2024_business_635 - Business
Chunk 5: Nvidia (2024.0) - ID: nvidia_2024_financial_statements_nvidia_2024_business_635 - Business
Chunk 6: Nvidia (2024.0) - ID: nvidia_2024_financial_statements_nvidia_2024_business_635 - Business
Chunk 7: Nvidia (2024.0) - ID: nvidia_2024_financial_statements_nvidia_2024_executive_compensation_609 - Executive Compensation
Chunk 8: Nvidia (2024.0) - ID: nvidia_2024_financial_statements_nvidia_2024_exe