In [1]:
import sys
import functools
import openai
from collections import defaultdict
from typing import List, Dict, Callable, Any
from document_processor import _load_local_documents, split_documents_to_text_chunks
from vector_store import FaissManager
from config import (
    OPENAI_API_KEY,
    DEFAULT_MODEL,
    TEST_PDFS_DIR,
    DEFAULT_TOP_K,
)


def load_client(api_key: str = OPENAI_API_KEY) -> openai.OpenAI:
    """
    Initialize and return OpenAI client with error handling.
    """
    try:
        # Initialize the client
        if not api_key:
            raise ValueError("OPENAI_API_KEY is missing from configuration.")

        client = openai.OpenAI(api_key=api_key)

        # A "ping" check to verify connectivity/quota immediately
        client.models.list()

        return client

    except openai.APIConnectionError as e:
        print(f"Error: The server could not be reached. {e}")
        sys.exit(1)
    except openai.AuthenticationError as e:
        print(f"Error: Your OpenAI API key or token is invalid. {e}")
        sys.exit(1)
    except openai.RateLimitError as e:
        print(f"Error: You have hit your OpenAI rate limit or quota: {e}")
        sys.exit(1)
    except Exception as e:
        print(f"An unexpected error occurred during OpenAI initialization: {e}")
        sys.exit(1)


client = load_client()


def handle_openai_errors(func: Callable) -> Callable:
    """
    Decorator to handle OpenAI API exceptions and network issues.
    """
    @functools.wraps(func)
    def wrapper(*args, **kwargs) -> Any:
        try:
            return func(*args, **kwargs)
        except openai.APIConnectionError as e:
            # Handles network issues (DNS, no internet, connection refused)
            raise ConnectionError(f"Could not connect to OpenAI API: {e}")
        except openai.APITimeoutError as e:
            # Handles cases where the request takes too long
            raise TimeoutError(f"OpenAI API request timed out: {e}")
        except openai.RateLimitError as e:
            # Handles 429 errors (Quota exceeded or too many requests)
            raise RuntimeError(f"Rate limit hit: {e}. Check your credits or throughput limits.")
        except openai.AuthenticationError as e:
            # Handles 401 errors (Invalid API Key)
            raise ValueError(f"Authentication failed: {e}")
        except openai.BadRequestError as e:
            # Handles 400 errors (Wrong model name, invalid parameters, etc.)
            raise ValueError(f"Invalid request to OpenAI: {e}")
        except openai.APIStatusError as e:
            # Handles 5xx errors (OpenAI server-side issues)
            raise RuntimeError(f"OpenAI server returned an error (Status {e.status_code}): {e.response}")
        except Exception as e:
            # Fallback for any other unexpected errors
            raise RuntimeError(f"An unexpected error occurred: {e}")
    return wrapper


@handle_openai_errors
def generate_query_reformulations(
        original_query: str,
        model: str = DEFAULT_MODEL,
        num_reformulations: int = 3,
        temperature: float = 0.8,
        max_tokens: int = 300
) -> List[str]:
    """
    Generate query reformulations using LLM

    Args:
        original_query: Original user query
        model: name of model to use
        num_reformulations: Number of reformulations to generate (default 3)
        temperature: Temperature parameter for diversity (default 0.8)
        max_tokens: Maximum tokens for the response (default 300)

    Returns:
        List of reformulated queries
    """

    system_prompt = ("You are a query reformulation assistant. Generate alternative phrasings "
                     "of the given query that would help retrieve relevant information.")

    user_prompt = f"""\
Given the following query, generate {num_reformulations} different reformulations that:
1. Express the same intent but use different wording
2. May use synonyms or related terms
3. Could be phrased as questions or statements
4. Help retrieve relevant information from a document search system

Original Query: {original_query}

Generate exactly {num_reformulations} reformulations, one per line, without numbering or bullets."""

    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=temperature,
        max_tokens=max_tokens,  # prevents runaway costs
    )

    reformulations_text = response.choices[0].message.content.strip()

    # Parse reformulations (split by newlines and clean)
    reformulations = []
    for line in reformulations_text.strip().split('\n'):
        line = line.strip()
        # Remove numbering if present (e.g., "1. ", "- ", etc.)
        for prefix in ['1.', '2.', '3.', '4.', '5.', '-', '*', '•']:
            if line.startswith(prefix):
                line = line[len(prefix):].strip()
        if line and len(line) > 5:  # Filter out very short lines
            reformulations.append(line)
            if len(reformulations) == num_reformulations:
                break

    # Return exactly num_reformulations, or pad with original if needed
    while len(reformulations) < num_reformulations:
        reformulations.append(original_query)

    return reformulations


def reciprocal_rank_fusion(search_results_list: List[List[Dict]], k: int = 60) -> List[Dict]:
    """
    Apply Reciprocal Rank Fusion (RRF) to combine multiple search result lists

    Args:
        search_results_list: List of search result lists (each from a different query)
        k: RRF constant (default 60)

    Returns:
        Reranked list of results with combined scores
    """
    # Dictionary to store RRF scores: {chunk_id: rrf_score}
    rrf_scores = defaultdict(float)
    chunk_data = {}  # Store chunk data by ID

    # Process each search result list
    for results in search_results_list:
        for rank, result in enumerate(results, start=1):
            chunk_id = result.get('id')
            if chunk_id is not None:
                # RRF score: 1 / (k + rank)
                rrf_score = 1.0 / (k + rank)
                rrf_scores[chunk_id] += rrf_score

                # Store chunk data (use first occurrence or best score)
                if chunk_id not in chunk_data:
                    chunk_data[chunk_id] = result
                else:
                    # Keep the one with better original score
                    if result.get('score', 0) > chunk_data[chunk_id].get('score', 0):
                        chunk_data[chunk_id] = result

    # Sort by RRF score (descending)
    sorted_chunks = sorted(rrf_scores.items(), key=lambda x: x[1], reverse=True)

    # Build final results with RRF scores
    final_results = []
    for chunk_id, rrf_score in sorted_chunks:
        result = chunk_data[chunk_id].copy()
        result['rrf_score'] = rrf_score
        result['score'] = rrf_score
        final_results.append(result)

    return final_results


@handle_openai_errors
def generate_answer(
        context_text: str,
        user_question: str,
        model: str = DEFAULT_MODEL,
        temperature: float = 0.7,
        max_tokens: int = 1500
) -> str:
    """
    Generate answer based on context information

    Args:
        context_text: Context information (usually retrieved document chunks)
        user_question: User question
        model: name of the model to use, defaults to environment variable
        temperature: Temperature parameter, default 0.7
        max_tokens: Maximum tokens for the answer (curb the cost), default 1500

    Returns:
        Generated answer text
    """
    system_prompt = ("You are a professional Q&A assistant. "
                     "Please answer user questions accurately based on the provided context information.")

    user_prompt = f"""\
Context Information:
{context_text}

User Question: {user_question}

Requirements:
1. Only answer based on the provided context information, do not make up information
2. If there is no relevant information in the context, please clearly state so
3. Answers should be accurate, concise, and well-organized
4. You are encouraged to cite specific document sources

Please answer:"""

    # Call Open AI API
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=temperature,
        max_tokens=max_tokens,  # adjust based on your needs
    )

    answer = response.choices[0].message.content.strip()

    if not answer:
        raise ValueError("LLM returned empty answer")

    return answer

In [2]:
index_manager = FaissManager()
test_documents = _load_local_documents(TEST_PDFS_DIR)
test_chunks = split_documents_to_text_chunks(test_documents)
index_manager.add_chunks(test_chunks)

test_question = "Why do language models follow instructions? Is Human feedback also reducing hallucination?"
test_reformulations = generate_query_reformulations(test_question)

C:\DAHOU\Business\go_tech\chat-pdf\data\test_data\AttentionIsAllYouNeed.pdf
C:\DAHOU\Business\go_tech\chat-pdf\data\test_data\TrainingLanguageModelsToFollowInstructionsWithHumanFeedback.pdf
Loaded 2 document(s) from 'C:\DAHOU\Business\go_tech\chat-pdf\data\test_data'


In [3]:
all_search_results = []
initial_result = index_manager.search(test_question)
all_search_results.append(initial_result)
for reformed_query in test_reformulations:
    reformed_result = index_manager.search(query=reformed_query)
    all_search_results.append(reformed_result)

In [5]:
all_search_results[0]

[{'id': 26,
  'score': 0.559683084487915,
  'file_name': 'TrainingLanguageModelsToFollowInstructionsWithHumanFeedback.pdf',
  'text': 'the language modeling objective is misaligned. Averting these unintended behaviors is especially\nimportant for language models that are deployed and used in hundreds of applications.\nWe make progress on aligning language models by training them to act in accordance with the user’s\nintention (Leike et al., 2018). This encompasses both explicit intentions such as following instructions\nand implicit intentions such as staying truthful, and not being biased, toxic, or otherwise harmful.\nUsing the language of Askell et al. (2021), we want language models to be helpful (they should\nhelp the user solve their task), honest (they shouldn’t fabricate information or mislead the user), and\nharmless (they should not cause physical, psychological, or social harm to people or the environment).\nWe elaborate on the evaluation of these criteria in Section 3.6.\nW