# RAG - advanced data retrieval

In [1]:
import os

from dotenv import load_dotenv
from langchain_community.document_loaders import DirectoryLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

load_dotenv(override=True)

  from .autonotebook import tqdm as notebook_tqdm


True

üéØ METADATA FILTERING DEMONSTRATION

In [2]:
print("\n1Ô∏è‚É£ Loading documents with rich metadata:")

# Load existing scientist documents
data_dir = "data/scientists_bios"
loader = DirectoryLoader(data_dir, glob="*.txt")
raw_documents = loader.load()

print(f"   Loaded {len(raw_documents)} raw documents")


1Ô∏è‚É£ Loading documents with rich metadata:
   Loaded 5 raw documents


In [3]:
def extract_enhanced_metadata(doc):
    """Extract rich metadata from document content and filename."""
    source_file = doc.metadata.get('source', '')
    filename = os.path.basename(source_file).replace('.txt', '')
    content = doc.page_content

    # Extract scientist information
    scientist_info = {
        'scientist_name': filename,
        'content_type': 'biography',
        'source_type': 'text_file',
        'language': 'english'
    }

    # Extract time periods from content
    birth_death_info = {}
    if '(' in content and ')' in content:
        # Look for birth-death years in parentheses
        import re
        years = re.findall(r'\((\d{4})-(\d{4})\)', content)
        if years:
            birth_year, death_year = years[0]
            birth_death_info.update({
                'birth_year': int(birth_year),
                'death_year': int(death_year),
                'century': f"{birth_year[:2]}th century" if birth_year.startswith(
                    '18') else f"{birth_year[:2]}th century",
                'time_period': 'historical'
            })

    # Extract scientific fields
    field_keywords = {
        'mathematics': ['mathematician', 'algorithm', 'analytical', 'computation'],
        'physics': ['physicist', 'relativity', 'Nobel Prize', 'photoelectric', 'radioactivity'],
        'chemistry': ['chemist', 'chemical', 'elements', 'research'],
        'computer_science': ['computer', 'programming', 'algorithm', 'machine']
    }

    fields = []
    content_lower = content.lower()
    for field, keywords in field_keywords.items():
        if any(keyword in content_lower for keyword in keywords):
            fields.append(field)

    scientist_info['scientific_fields'] = fields
    scientist_info['primary_field'] = fields[0] if fields else 'unknown'

    # Add document quality metrics
    scientist_info.update({
        'word_count': len(content.split()),
        'character_count': len(content),
        'completeness': 'high' if len(content) > 200 else 'medium' if len(content) > 100 else 'low'
    })

    # Merge with existing metadata
    doc.metadata.update(scientist_info)
    return doc

In [4]:
extract_enhanced_metadata(raw_documents[0]).metadata

{'source': 'data/scientists_bios/Ada Lovelace.txt',
 'scientist_name': 'Ada Lovelace',
 'content_type': 'biography',
 'source_type': 'text_file',
 'language': 'english',
 'scientific_fields': ['mathematics', 'computer_science'],
 'primary_field': 'mathematics',
 'word_count': 576,
 'character_count': 3744,
 'completeness': 'high'}

In [5]:
# Apply enhanced metadata extraction
enhanced_documents = []
for doc in raw_documents:
    enhanced_doc = extract_enhanced_metadata(doc)
    enhanced_documents.append(enhanced_doc)

print("\n   Enhanced metadata for each document:")
for doc in enhanced_documents:
    print(f"   üìÑ {doc.metadata['scientist_name']}:")
    print(f"      ‚Ä¢ Fields: {', '.join(doc.metadata['scientific_fields'])}")
    print(f"      ‚Ä¢ Primary: {doc.metadata['primary_field']}")
    print(f"      ‚Ä¢ Birth year: {doc.metadata.get('birth_year', 'unknown')}")
    print(f"      ‚Ä¢ Word count: {doc.metadata['word_count']}")
    break


   Enhanced metadata for each document:
   üìÑ Ada Lovelace:
      ‚Ä¢ Fields: mathematics, computer_science
      ‚Ä¢ Primary: mathematics
      ‚Ä¢ Birth year: unknown
      ‚Ä¢ Word count: 576


In [6]:
# 2. Text Splitting with Metadata Preservation
print("\n2Ô∏è‚É£ Chunking with metadata preservation:")

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=100,
    separators=["\n\n", "\n", ". ", " ", ""]
)

chunks = text_splitter.split_documents(enhanced_documents)

# Add chunk-specific metadata
for i, chunk in enumerate(chunks):
    chunk.metadata.update({
        'chunk_id': f"chunk_{i + 1}",
        'chunk_size': len(chunk.page_content),
        'chunk_position': 'start' if i < len(chunks) // 3 else 'middle' if i < 2 * len(chunks) // 3 else 'end'
    })

print(f"   Created {len(chunks)} chunks with enhanced metadata")
print("   Sample chunk metadata:")
sample_chunk = chunks[0]
for key, value in sample_chunk.metadata.items():
    print(f"      {key}: {value}")


2Ô∏è‚É£ Chunking with metadata preservation:
   Created 28 chunks with enhanced metadata
   Sample chunk metadata:
      source: data/scientists_bios/Ada Lovelace.txt
      scientist_name: Ada Lovelace
      content_type: biography
      source_type: text_file
      language: english
      scientific_fields: ['mathematics', 'computer_science']
      primary_field: mathematics
      word_count: 576
      character_count: 3744
      completeness: high
      chunk_id: chunk_1
      chunk_size: 756
      chunk_position: start


In [7]:
# 3. Create Vector Store with Rich Metadata
print("\n3Ô∏è‚É£ Building vector store with metadata indexing:")

embeddings = AzureOpenAIEmbeddings(model="text-embedding-3-small")
vector_store = InMemoryVectorStore(embeddings)
vector_store.add_documents(documents=chunks)

print(f"   ‚úÖ Indexed {len(chunks)} chunks with full metadata")


3Ô∏è‚É£ Building vector store with metadata indexing:
   ‚úÖ Indexed 28 chunks with full metadata


In [8]:
# 4. Metadata Filtering Examples

# Create filtered retrievers using custom search functions
def search_with_field_filter(query, target_field, k=3):
    """Search with field filtering."""
    all_results = vector_store.similarity_search(query, k=k * 3)

    filtered_results = []
    for result in all_results:
        if result.metadata.get('primary_field') == target_field:
            filtered_results.append(result)
        if len(filtered_results) >= k:
            break

    return filtered_results

Uwaga! W momencie wywo≈Çywania similarity_search podajemy wiƒôkszƒÖ liczbƒô wynik√≥w (k*3), aby mieƒá pewno≈õƒá, ≈ºe po filtrowaniu zostanie wystarczajƒÖca liczba dokument√≥w do zwr√≥cenia.

Robimy to pozniewa≈º filtrowanie nastƒôpuje po wyszukiwaniu wektorowym, wiƒôc nie mamy gwarancji, ≈ºe pierwsze k wynik√≥w spe≈Çni warunki filtrowania.

In [9]:
print("\n4Ô∏è‚É£ Metadata filtering demonstrations:")

# Example 1: Field-specific retrieval
print("\n   üî¨ Field-specific retrieval (Physics only):")

physics_query = "What are the major scientific contributions?"
physics_results = search_with_field_filter(physics_query, "physics")
print(f"   Query: {physics_query}")
print(f"   Results: {len(physics_results)} physics-related chunks")
for i, result in enumerate(physics_results):
    scientist = result.metadata['scientist_name']
    field = result.metadata['primary_field']
    print(f"   {i + 1}. {scientist} ({field}):")
    print()
    print(f"{result.page_content[:100]}...")
    print()


4Ô∏è‚É£ Metadata filtering demonstrations:

   üî¨ Field-specific retrieval (Physics only):
   Query: What are the major scientific contributions?
   Results: 3 physics-related chunks
   1. Albert Einstein (physics):

# Albert Einstein (1879

1955)

Albert Einstein was a German-born theoretical physicist who develope...

   2. Albert Einstein (physics):

Einstein died on April 18, 1955, in Princeton, New Jersey. His brain was preserved for scientific st...

   3. Albert Einstein (physics):

**Photoelectric Effect

**: Explained the photoelectric effect in 1905, proposing that light consist...



In [10]:
# 5. Contextual RAG with Metadata
print("\n5Ô∏è‚É£ Building contextual RAG system:")

llm = AzureChatOpenAI(model="gpt-5-nano")


5Ô∏è‚É£ Building contextual RAG system:


In [11]:
def search_with_time_filter(query, target_century="19th", k=3):
    """Search with time period filtering."""
    # Get all results first
    all_results = vector_store.similarity_search(query, k=k * 3)

    # Filter by time period
    filtered_results = []
    for result in all_results:
        birth_year = result.metadata.get('birth_year')
        if birth_year:
            if target_century == "19th" and 1800 <= birth_year < 1900:
                filtered_results.append(result)
        if len(filtered_results) >= k:
            break

    return filtered_results


def search_with_field_filter(query, target_field, k=3):
    """Search with field filtering."""
    all_results = vector_store.similarity_search(query, k=k * 3)

    filtered_results = []
    for result in all_results:
        if result.metadata.get('primary_field') == target_field:
            filtered_results.append(result)
        if len(filtered_results) >= k:
            break

    return filtered_results


def search_with_quality_filter(query, min_completeness="high", k=3):
    """Search with document quality filtering."""
    all_results = vector_store.similarity_search(query, k=k * 2)

    filtered_results = []
    for result in all_results:
        if result.metadata.get('completeness') == min_completeness:
            filtered_results.append(result)
        if len(filtered_results) >= k:
            break

    return filtered_results

poni≈ºej przygotujemy bardzo prstry retriever, kt√≥ry szuka s≈Ç√≥w-kluczy w metadanych i zwraca tylko pasujƒÖce dokumenty

In [12]:
# Smart retriever that uses context to determine filters
def create_contextual_retriever(query, k=4):
    """Create a context-aware retriever based on query content."""
    query_lower = query.lower()

    # Determine appropriate filters based on query
    filters = {}

    # Field-specific keywords
    if any(word in query_lower for word in ['physics', 'relativity', 'einstein']):
        filters['primary_field'] = 'physics'
    elif any(word in query_lower for word in ['mathematics', 'algorithm', 'computation']):
        filters['primary_field'] = 'mathematics'
    elif any(word in query_lower for word in ['programming', 'computer', 'lovelace']):
        filters['primary_field'] = 'computer_science'

    # Time-based keywords
    historical_terms = ['historical', 'past', 'old', '19th century', 'early']
    if any(term in query_lower for term in historical_terms):
        # Use custom search for historical filtering
        return search_with_time_filter(query, "19th", k)

    # Quality-based keywords
    if any(word in query_lower for word in ['detailed', 'comprehensive', 'complete']):
        return search_with_quality_filter(query, "high", k)

    # Use filters if determined
    if 'primary_field' in filters:
        return search_with_field_filter(query, filters['primary_field'], k)
    else:
        return vector_store.similarity_search(query, k=k)

In [13]:
# Enhanced prompt that uses metadata
contextual_prompt = ChatPromptTemplate.from_template("""
You are an assistant for question-answering tasks about scientists and their contributions.
Use the following pieces of retrieved context to answer the question.
Pay attention to the metadata information about each source, including:
- The scientist's name and primary field
- Time period and historical context
- Document quality and completeness

If you don't know the answer, just say that you don't know.
Use three sentences maximum and keep the answer concise.

Question: {question}

Context with metadata:
{context}

Answer:
""")

In [14]:
def format_context_with_metadata(retrieved_docs):
    """Format retrieved documents with their metadata for the prompt."""
    formatted_context = []
    for i, doc in enumerate(retrieved_docs):
        metadata = doc.metadata
        scientist = metadata.get('scientist_name', 'Unknown')
        field = metadata.get('primary_field', 'Unknown')
        birth_year = metadata.get('birth_year', 'Unknown')

        context_entry = f"""
Source {i + 1}: {scientist} ({field}, born {birth_year})
Content: {doc.page_content}
"""
        formatted_context.append(context_entry)

    return "\n".join(formatted_context)


# Create contextual RAG chain
def contextual_rag_chain(question):
    """RAG chain with contextual retrieval and metadata-aware prompting."""
    # Get contextually relevant documents
    retrieved_docs = create_contextual_retriever(question)

    # Format context with metadata
    formatted_context = format_context_with_metadata(retrieved_docs)

    # Generate response
    response = llm.invoke(
        contextual_prompt.format(
            question=question,
            context=formatted_context
        )
    )

    return response.content, retrieved_docs

In [15]:
print("\n6Ô∏è‚É£ Testing contextual RAG system:")

question = "What physics discoveries were made by Einstein?"

try:
    answer, sources = contextual_rag_chain(question)
    print(f"   A{i}: {answer}")

    print(f"\n   üìö Sources used ({len(sources)} documents):")
    for j, source in enumerate(sources):
        scientist = source.metadata['scientist_name']
        field = source.metadata['primary_field']
        print(f"      {j + 1}. {scientist} ({field})")

except Exception as e:
    print(f"   A{i}: Error - {str(e)}")


6Ô∏è‚É£ Testing contextual RAG system:


   A2: Albert Einstein developed the special theory of relativity (1905) and the general theory of relativity (1915), reshaping physics of space, time, and gravity. He formulated the mass‚Äìenergy equivalence E=mc¬≤. He explained the photoelectric effect (1905), showing light as photons and helping establish quantum theory, and provided evidence for Brownian motion supporting atomic theory.

   üìö Sources used (4 documents):
      1. Albert Einstein (physics)
      2. Albert Einstein (physics)
      3. Albert Einstein (physics)
      4. Albert Einstein (physics)


üîÄ HYBRID SEARCH DEMONSTRATION

In [16]:
# 1. Load and Prepare Documents
print("\n1Ô∏è‚É£ Loading documents for hybrid search:")

data_dir = "data/scientists_bios"
loader = DirectoryLoader(data_dir, glob="*.txt")
documents = loader.load()

print(f"   Loaded {len(documents)} documents")

# Add metadata
for doc in documents:
    filename = os.path.basename(doc.metadata['source']).replace('.txt', '')
    doc.metadata.update({
        'scientist_name': filename,
        'word_count': len(doc.page_content.split())
    })

# Chunk documents
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=100
)
chunks = text_splitter.split_documents(documents)

print(f"   Created {len(chunks)} chunks for hybrid search")

# 2. Build Multiple Search Indexes
print("\n2Ô∏è‚É£ Building multiple search indexes:")

# Vector search setup
embeddings = AzureOpenAIEmbeddings(model="text-embedding-3-small")
vector_store = InMemoryVectorStore(embeddings)
vector_store.add_documents(documents=chunks)
print(f"   ‚úÖ Vector store: {len(chunks)} chunks embedded")


1Ô∏è‚É£ Loading documents for hybrid search:
   Loaded 5 documents
   Created 28 chunks for hybrid search

2Ô∏è‚É£ Building multiple search indexes:
   ‚úÖ Vector store: 28 chunks embedded


In [17]:
from rank_bm25 import BM25Okapi
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import os

In [18]:
# BM25 keyword search setup
chunk_texts = [chunk.page_content for chunk in chunks]
tokenized_chunks = [text.lower().split() for text in chunk_texts]
bm25 = BM25Okapi(tokenized_chunks)
print(f"   ‚úÖ BM25 index: {len(chunk_texts)} documents indexed")

   ‚úÖ BM25 index: 28 documents indexed


BM25 to jedna z najpopularniejszych funkcji rankingowych stosowanych w wyszukiwarkach, kt√≥ra ocenia, jak bardzo dany dokument jest trafny wzglƒôdem zapytania u≈ºytkownika. Model ten opiera siƒô na probabilistycznym podej≈õciu do wyszukiwania informacji.

$$
{\displaystyle {\text{score}}(D,Q)=\sum _{i=1}^{n}{\text{IDF}}(q_{i})\cdot {\frac {f(q_{i},D)\cdot (k_{1}+1)}{f(q_{i},D)+k_{1}\cdot \left(1-b+b\cdot {\frac {|D|}{\text{avgdl}}}\right)}}}
$$

$$
{\displaystyle {\text{IDF}}(q_{i})=\ln \left({\frac {N-n(q_{i})+0.5}{n(q_{i})+0.5}}+1\right)}
$$

IDF - Inverse Document Frequency

https://en.wikipedia.org/wiki/Okapi_BM25

TF-IDF - iloczyn Term frequency oraz Inverse Document Frequency

https://en.wikipedia.org/wiki/Tf%E2%80%93idf

In [19]:
# TF-IDF setup for additional keyword matching
tfidf_vectorizer = TfidfVectorizer(
    max_features=1000,
    stop_words='english',
    ngram_range=(1, 2)
)
tfidf_matrix = tfidf_vectorizer.fit_transform(chunk_texts)
print(f"   ‚úÖ TF-IDF index: {tfidf_matrix.shape[1]} features extracted")

   ‚úÖ TF-IDF index: 1000 features extracted


In [20]:
# Test each method
test_query = "What did Einstein discover about light and energy?"
print(f"\n   üîç Test query: {test_query}")


   üîç Test query: What did Einstein discover about light and energy?


In [21]:
def vector_search(query, k=5):
    """Semantic similarity search using embeddings."""
    results = vector_store.similarity_search_with_score(query, k=k)
    return [(doc, score) for doc, score in results]


print(f"\n   üß† Vector search results:")
vector_results = vector_search(test_query, k=3)
for i, (doc, score) in enumerate(vector_results):
    scientist = doc.metadata['scientist_name']
    preview = doc.page_content[:80] + "..."
    print(f"      {i + 1}. {scientist} (score: {score:.3f}): {preview}")


   üß† Vector search results:


      1. Albert Einstein (score: 0.582): **Photoelectric Effect

**: Explained the photoelectric effect in 1905, proposin...
      2. Albert Einstein (score: 0.557): # Albert Einstein (1879

1955)

Albert Einstein was a German-born theoretical ph...
      3. Albert Einstein (score: 0.459): Einstein died on April 18, 1955, in Princeton, New Jersey. His brain was preserv...


In [22]:
def bm25_search(query, k=5):
    """Keyword search using BM25."""
    query_tokens = query.lower().split()
    scores = bm25.get_scores(query_tokens)

    # Get top-k results
    top_indices = np.argsort(scores)[::-1][:k]
    results = []

    for idx in top_indices:
        if scores[idx] > 0:  # Only include non-zero scores
            results.append((chunks[idx], scores[idx]))

    return results


print(f"\n   üî§ BM25 search results:")
bm25_results = bm25_search(test_query, k=3)
for i, (doc, score) in enumerate(bm25_results):
    scientist = doc.metadata['scientist_name']
    preview = doc.page_content[:80] + "..."
    print(f"      {i + 1}. {scientist} (score: {score:.3f}): {preview}")


   üî§ BM25 search results:
      1. Ada Lovelace (score: 5.453): Lovelace's notes also included visionary insights about the potential of computi...
      2. Albert Einstein (score: 5.296): Einstein married twice, first to Mileva Mariƒá, with whom he had three children, ...
      3. Albert Einstein (score: 4.813): **Photoelectric Effect

**: Explained the photoelectric effect in 1905, proposin...


In [23]:
def tfidf_search(query, k=5):
    """TF-IDF based keyword search."""
    query_vec = tfidf_vectorizer.transform([query])
    similarities = cosine_similarity(query_vec, tfidf_matrix).flatten()

    top_indices = np.argsort(similarities)[::-1][:k]
    results = []

    for idx in top_indices:
        if similarities[idx] > 0:
            results.append((chunks[idx], similarities[idx]))

    return results


print(f"\n   üìä TF-IDF search results:")
tfidf_results = tfidf_search(test_query, k=3)
for i, (doc, score) in enumerate(tfidf_results):
    scientist = doc.metadata['scientist_name']
    preview = doc.page_content[:80] + "..."
    print(f"      {i + 1}. {scientist} (score: {score:.3f}): {preview}")


   üìä TF-IDF search results:
      1. Albert Einstein (score: 0.239): # Albert Einstein (1879

1955)

Albert Einstein was a German-born theoretical ph...
      2. Albert Einstein (score: 0.146): Einstein died on April 18, 1955, in Princeton, New Jersey. His brain was preserv...
      3. Albert Einstein (score: 0.125): Einstein married twice, first to Mileva Mariƒá, with whom he had three children, ...


Score fusion - ≈ÇƒÖczenie wynik√≥w (ranking√≥w)

In [24]:
# 5. Score Fusion Strategies
print("\n5Ô∏è‚É£ Score fusion strategies:")


def normalize_scores(scores, method='min_max'):
    """Normalize scores to 0-1 range."""
    scores = np.array(scores)
    if method == 'min_max':
        min_score, max_score = scores.min(), scores.max()
        if max_score > min_score:
            return (scores - min_score) / (max_score - min_score)
    elif method == 'z_score':
        mean, std = scores.mean(), scores.std()
        if std > 0:
            return (scores - mean) / std
    return scores


def reciprocal_rank_fusion(results_list, k=60):
    """Combine rankings using Reciprocal Rank Fusion."""
    doc_scores = {}

    for results in results_list:
        for rank, (doc, _) in enumerate(results):
            doc_id = id(doc)  # Use object id as unique identifier
            if doc_id not in doc_scores:
                doc_scores[doc_id] = {'doc': doc, 'score': 0}
            doc_scores[doc_id]['score'] += 1 / (k + rank + 1)

    # Sort by combined score
    sorted_results = sorted(doc_scores.values(), key=lambda x: x['score'], reverse=True)
    return [(item['doc'], item['score']) for item in sorted_results]


def weighted_score_fusion(vector_results, keyword_results, vector_weight=0.6):
    """Combine results using weighted score fusion."""
    # Normalize scores
    vector_scores = [score for _, score in vector_results]
    keyword_scores = [score for _, score in keyword_results]

    norm_vector_scores = normalize_scores(vector_scores)
    norm_keyword_scores = normalize_scores(keyword_scores)

    # Create combined results
    doc_scores = {}

    # Add vector results
    for i, (doc, _) in enumerate(vector_results):
        doc_id = id(doc)
        doc_scores[doc_id] = {
            'doc': doc,
            'vector_score': norm_vector_scores[i],
            'keyword_score': 0
        }

    # Add keyword results
    for i, (doc, _) in enumerate(keyword_results):
        doc_id = id(doc)
        if doc_id in doc_scores:
            doc_scores[doc_id]['keyword_score'] = norm_keyword_scores[i]
        else:
            doc_scores[doc_id] = {
                'doc': doc,
                'vector_score': 0,
                'keyword_score': norm_keyword_scores[i]
            }

    # Calculate combined scores
    for doc_id in doc_scores:
        doc_scores[doc_id]['combined_score'] = (
                vector_weight * doc_scores[doc_id]['vector_score'] +
                (1 - vector_weight) * doc_scores[doc_id]['keyword_score']
        )

    # Sort by combined score
    sorted_results = sorted(doc_scores.values(), key=lambda x: x['combined_score'], reverse=True)
    return [(item['doc'], item['combined_score']) for item in sorted_results]



5Ô∏è‚É£ Score fusion strategies:


Reciprocal Rank Fusion

$$
RRF = \sum{ \frac{1}{{k + rank + 1}} }
$$

Reciprocal Rank Fusion

$$
WSF = w * score_1 + (1-w) * score_2
$$

In [25]:
# Test fusion strategies
fusion_query = "Einstein's theory of relativity and light"
print(f"\n   üîç Fusion test query: {fusion_query}")

vector_results = vector_search(fusion_query, k=5)
bm25_results = bm25_search(fusion_query, k=5)


   üîç Fusion test query: Einstein's theory of relativity and light


In [26]:
print(f"\n   üîó Reciprocal Rank Fusion:")
rrf_results = reciprocal_rank_fusion([vector_results, bm25_results])
for i, (doc, score) in enumerate(rrf_results[:3]):
    scientist = doc.metadata['scientist_name']
    preview = doc.page_content[:60] + "..."
    print(f"      {i + 1}. {scientist} (RRF: {score:.3f}): {preview}")


   üîó Reciprocal Rank Fusion:
      1. Albert Einstein (RRF: 0.016): # Albert Einstein (1879

1955)

Albert Einstein was a German...
      2. Albert Einstein (RRF: 0.016): # Albert Einstein (1879

1955)

Albert Einstein was a German...
      3. Albert Einstein (RRF: 0.016): **Photoelectric Effect

**: Explained the photoelectric effe...


In [27]:
print(f"\n   ‚öñÔ∏è Weighted Score Fusion (60% vector, 40% keyword):")
wsf_results = weighted_score_fusion(vector_results, bm25_results, vector_weight=0.6)
for i, (doc, score) in enumerate(wsf_results[:3]):
    scientist = doc.metadata['scientist_name']
    preview = doc.page_content[:60] + "..."
    print(f"      {i + 1}. {scientist} (WSF: {score:.3f}): {preview}")


   ‚öñÔ∏è Weighted Score Fusion (60% vector, 40% keyword):
      1. Albert Einstein (WSF: 0.600): # Albert Einstein (1879

1955)

Albert Einstein was a German...
      2. Albert Einstein (WSF: 0.400): # Albert Einstein (1879

1955)

Albert Einstein was a German...
      3. Albert Einstein (WSF: 0.397): **Photoelectric Effect

**: Explained the photoelectric effe...


Query expansion

Query expansion (rozszerzanie zapyta≈Ñ) to technika, kt√≥ra polega na automatycznym "wzbogaceniu" zapytania u≈ºytkownika o dodatkowe s≈Çowa kluczowe, synonimy, terminy techniczne lub kontekstowe informacje, aby system wyszukiwania m√≥g≈Ç znale≈∫ƒá szerszy i bardziej trafny zestaw dokument√≥w. Problem, kt√≥ry rozwiƒÖzuje, jest prosty: ludzie czƒôsto wpisujƒÖ zapytania kr√≥tkie i nieprecyzyjne ("Newton‚Äôs work"), podczas gdy dokumenty w bazie u≈ºywajƒÖ zupe≈Çnie innych okre≈õle≈Ñ ("laws of motion", "gravity research").

Istnieje kilka klas strategii query expansion:

Proste metody leksykalne, takie jak synonimy, odmiany s≈Ç√≥w i s≈Çowa bliskoznaczne, poprawiajƒÖ podstawowe dopasowanie s≈Ç√≥w kluczowych. 

Metody konceptualne, oparte na mapie pojƒôƒá lub ontologii, dodajƒÖ terminy logicznie zwiƒÖzane z tematem (np. "Einstein ‚Üí relativity, spacetime"). 

LLM-based query expansion generuje nowe zapytania lub ich warianty, w tym techniczne, opisowe lub alternatywne, dziƒôki czemu lepiej pokrywa r√≥≈ºne sposoby opisu tego samego zjawiska. 

Zadanie: 

1. Wiƒôcej przyk≈Çad√≥w w `1_metadata_filtering.py`,`2_hybrid_search.py`
2. Plik `3_query_expansion.py`



Post-processing: re-ranking

Re-ranking to drugi etap wyszukiwania, kt√≥ry bierze dokumenty znalezione w pierwszym kroku (np. przez wektory lub BM25) i ponownie je sortuje wed≈Çug trafno≈õci - ale dok≈Çadniejszym, inteligentniejszym modelem.

üí° Po co robi siƒô re-ranking?
Bo same wektory nie sƒÖ idealne.

Embeddingi patrzƒÖ tylko na odleg≈Ço≈õƒá wektor√≥w, bez rozumienia sk≈Çadni.

BM25 patrzy tylko na s≈Çowa, nie na semantykƒô.

Cross-encoder czy LLM czyta ca≈Çe zdanie/dokument + pytanie i ocenia trafno≈õƒá.

Dlatego re-ranking daje najwy≈ºszƒÖ jako≈õƒá retrievalu w RAG.

üß† Czym jest cross-encoder?

Cross-encoder to rodzaj sieci neuronowej, kt√≥ry bierze dwa teksty naraz - np. pytanie + dokument - i przetwarza je wsp√≥lnie, w jednym ciƒÖgu token√≥w, ≈ºeby oceniƒá, jak bardzo do siebie pasujƒÖ.

Przyk≈Çad pary wej≈õciowej:

[CLS] What did Einstein discover? [SEP] Einstein developed the theory of relativity... [SEP]

Model czyta to jako jeden po≈ÇƒÖczony tekst i oblicza pojedynczy wynik (score), np. 0.91 - im wy≈ºej, tym bardziej dokument odpowiada zapytaniu.

To naprawdƒô jest sieƒá neuronowa, najczƒô≈õciej Transformer (np. BERT).

In [28]:
# 1. Load and Prepare Documents
print("\n1Ô∏è‚É£ Loading documents for re-ranking:")

data_dir = "data/scientists_bios"
loader = DirectoryLoader(data_dir, glob="*.txt")
documents = loader.load()

# Add metadata
for doc in documents:
    filename = os.path.basename(doc.metadata['source']).replace('.txt', '')
    doc.metadata['scientist_name'] = filename

# Chunk documents
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=100
)
chunks = text_splitter.split_documents(documents)

print(f"   Loaded {len(documents)} documents, created {len(chunks)} chunks")

# Build vector store
embeddings = AzureOpenAIEmbeddings(model="text-embedding-3-small")
vector_store = InMemoryVectorStore(embeddings)
vector_store.add_documents(documents=chunks)

print(f"   ‚úÖ Vector store ready with {len(chunks)} indexed chunks")


1Ô∏è‚É£ Loading documents for re-ranking:
   Loaded 5 documents, created 28 chunks
   ‚úÖ Vector store ready with 28 indexed chunks


In [29]:
from sentence_transformers import CrossEncoder

# 2. Load Cross-Encoder Models
print("\n2Ô∏è‚É£ Loading cross-encoder models for re-ranking:")

# Load different cross-encoder models for comparison
cross_encoders = {}

try:
    # Lightweight cross-encoder for general ranking
    cross_encoders['ms-marco'] = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
    print("   ‚úÖ MS-MARCO MiniLM cross-encoder loaded")
except Exception as e:
    print(f"   ‚ö†Ô∏è Failed to load MS-MARCO model: {e}")

try:
    # More specific cross-encoder for question-answering
    cross_encoders['qnli'] = CrossEncoder('cross-encoder/qnli-electra-base')
    print("   ‚úÖ QNLI Electra cross-encoder loaded")
except Exception as e:
    print(f"   ‚ö†Ô∏è Failed to load QNLI model: {e}")

if not cross_encoders:
    print("   ‚ö†Ô∏è No cross-encoders loaded, using fallback scoring")


2Ô∏è‚É£ Loading cross-encoder models for re-ranking:
   ‚úÖ MS-MARCO MiniLM cross-encoder loaded
   ‚úÖ QNLI Electra cross-encoder loaded


In [30]:
# Test Individual Re-ranking Methods
print("\n4Ô∏è‚É£ Testing individual re-ranking methods:")

# Get initial results for testing
test_query = "What did Einstein discover about the universe?"
initial_results = vector_store.similarity_search_with_score(test_query, k=6)

print(f"\n   üîç Test query: {test_query}")
print(f"\n   üìä Initial vector search results:")
for i, (doc, score) in enumerate(initial_results):
    scientist = doc.metadata['scientist_name']
    preview = doc.page_content[:30].replace("\n", "\\n") + "..."
    print(f"      {i + 1}. {scientist} (score: {score:.3f}): {preview}")


4Ô∏è‚É£ Testing individual re-ranking methods:

   üîç Test query: What did Einstein discover about the universe?

   üìä Initial vector search results:
      1. Albert Einstein (score: 0.513): # Albert Einstein (1879\n\n1955)...
      2. Albert Einstein (score: 0.493): Einstein died on April 18, 195...
      3. Albert Einstein (score: 0.465): **Photoelectric Effect\n\n**: Ex...
      4. Isaac Newton (score: 0.464): **Universal Gravitation\n\n**: N...
      5. Albert Einstein (score: 0.404): Einstein married twice, first ...
      6. Isaac Newton (score: 0.355): Known for his difficult person...


Cross-encoder model ranking

In [31]:
# Basic Re-ranking Functions
print("\n3Ô∏è‚É£ Implementing re-ranking functions:")


def cross_encoder_rerank(query, documents, model_name='ms-marco', top_k=None):
    """Re-rank documents using cross-encoder models."""
    if model_name not in cross_encoders:
        print(f"   ‚ö†Ô∏è Model {model_name} not available, returning original order")
        return documents

    model = cross_encoders[model_name]

    # Prepare query-document pairs
    query_doc_pairs = [(query, doc.page_content) for doc in documents]

    # Get relevance scores
    scores = model.predict(query_doc_pairs)

    # Sort documents by scores
    doc_score_pairs = list(zip(documents, scores))
    doc_score_pairs.sort(key=lambda x: x[1], reverse=True)

    # Return top_k or all documents
    if top_k:
        return doc_score_pairs[:top_k]
    else:
        return doc_score_pairs


3Ô∏è‚É£ Implementing re-ranking functions:


In [32]:
print(f"\n   üéØ Cross-encoder re-ranking:")
documents_only = [doc for doc, score in initial_results]

for model_name in cross_encoders:
    reranked = cross_encoder_rerank(test_query, documents_only, model_name, top_k=4)
    print(f"\n   {model_name.upper()} re-ranking:")
    for i, (doc, score) in enumerate(reranked):
        scientist = doc.metadata['scientist_name']
        preview = doc.page_content[:30].replace("\n", "\\n") + "..."
        print(f"      {i + 1}. {scientist} (score: {score:.3f}): {preview}")
        # find original id
        for idx, (orig_doc, orig_score) in enumerate(initial_results):
            if orig_doc == doc:
                original_id = idx
                break
        print(f"         (original rank: {original_id})")


   üéØ Cross-encoder re-ranking:



   MS-MARCO re-ranking:
      1. Albert Einstein (score: 3.893): # Albert Einstein (1879\n\n1955)...
         (original rank: 0)
      2. Albert Einstein (score: 1.770): Einstein died on April 18, 195...
         (original rank: 1)
      3. Albert Einstein (score: -1.362): **Photoelectric Effect\n\n**: Ex...
         (original rank: 2)
      4. Isaac Newton (score: -2.035): **Universal Gravitation\n\n**: N...
         (original rank: 3)

   QNLI re-ranking:
      1. Albert Einstein (score: 0.159): # Albert Einstein (1879\n\n1955)...
         (original rank: 0)
      2. Albert Einstein (score: 0.003): **Photoelectric Effect\n\n**: Ex...
         (original rank: 2)
      3. Isaac Newton (score: 0.002): **Universal Gravitation\n\n**: N...
         (original rank: 3)
      4. Albert Einstein (score: 0.001): Einstein married twice, first ...
         (original rank: 4)


LLM-as-a-judge

In [33]:
from pydantic import BaseModel, Field


class LLMResponse(BaseModel):
    score: int = Field(..., description="Relevance score from 1 to 10")
    explanation: str = Field(..., description="Brief explanation for the score")


def llm_relevance_scoring(query, documents, llm):
    """Use LLM to score document relevance."""
    relevance_prompt = ChatPromptTemplate.from_template("""
Rate the relevance of the following document to the query on a scale of 1-10.
Consider how well the document answers the question or provides relevant information.

Query: {query}

Document: {document}

Provide only a numeric score (1-10) with brief explanation
""")

    scored_documents = []

    for doc in documents:
        try:
            llm_with_structured_output = llm.with_structured_output(LLMResponse)

            response = llm_with_structured_output.invoke(
                relevance_prompt.format(
                    query=query,
                    document=doc.page_content[:500]  # Limit content for efficiency
                )
            )

            scored_documents.append((doc, response.score))

        except Exception as e:
            print(f"   ‚ö†Ô∏è LLM scoring failed for document: {e}")
            scored_documents.append((doc, 5.0))  # Default score

    # Sort by score
    scored_documents.sort(key=lambda x: x[1], reverse=True)
    return scored_documents

In [34]:
llm = AzureChatOpenAI(model="gpt-5-nano")
print(f"\n   ü§ñ LLM relevance scoring:")
llm_reranked = llm_relevance_scoring(test_query, documents_only[:4], llm)
for i, (doc, score) in enumerate(llm_reranked):
    scientist = doc.metadata['scientist_name']
    preview = doc.page_content[:30].replace("\n", "\\n") + "..."
    print(f"      {i + 1}. {scientist} (score: {score:.1f}): {preview}")
    # find original id
    for idx, (orig_doc, orig_score) in enumerate(initial_results):
        if orig_doc == doc:
            original_id = idx
            break
    print(f"         (original rank: {original_id})")


   ü§ñ LLM relevance scoring:
      1. Albert Einstein (score: 7.0): # Albert Einstein (1879\n\n1955)...
         (original rank: 0)
      2. Albert Einstein (score: 3.0): Einstein died on April 18, 195...
         (original rank: 1)
      3. Albert Einstein (score: 3.0): **Photoelectric Effect\n\n**: Ex...
         (original rank: 2)
      4. Isaac Newton (score: 1.0): **Universal Gravitation\n\n**: N...
         (original rank: 3)


Uwaga: Modele jƒôzykowe nie sƒÖ najlepsze to oceny w skali numerycznej i mogƒÖ byƒá kosztowne pod wzglƒôdem zasob√≥w.

Ensemble rerank

In [35]:
def ensemble_rerank(query, documents, methods=['cross_encoder', 'llm'], weights=None):
    """Combine multiple re-ranking methods."""
    if weights is None:
        weights = [1.0] * len(methods)

    # Normalize weights
    total_weight = sum(weights)
    weights = [w / total_weight for w in weights]

    # Store scores for each method as lists
    all_method_scores = []

    for method in methods:
        if method == 'cross_encoder' and cross_encoders:
            model_name = list(cross_encoders.keys())[0]  # Use first available
            reranked = cross_encoder_rerank(query, documents, model_name)

            # Normalize LLM scores to 0-1 range
            max_score = max(score for _, score in reranked) if reranked else 1
            min_score = min(score for _, score in reranked) if reranked else -1

            normalize = lambda s: (s - min_score) / (max_score - min_score) if max_score > min_score else 0.0

            # Create score list in same order as documents
            scores = []
            reranked_dict = {doc.page_content: normalize(score) for doc, score in reranked}
            for doc in documents:
                scores.append(reranked_dict.get(doc.page_content, 0))
            all_method_scores.append(scores)

        elif method == 'llm':
            reranked = llm_relevance_scoring(query, documents[:len(documents)], llm)
            # Normalize LLM scores to 0-1 range
            max_score = max(score for _, score in reranked) if reranked else 10
            min_score = min(score for _, score in reranked) if reranked else 0

            normalize = lambda s: (s - min_score) / (max_score - min_score) if max_score > min_score else 0.0

            scores = []
            reranked_dict = {doc.page_content: normalize(score) for doc, score in reranked}
            for doc in documents:
                scores.append(reranked_dict.get(doc.page_content, 0))
            all_method_scores.append(scores)

        print("Method:", method)
        print("Scores:", all_method_scores[-1])
        print()

    # Combine scores
    final_scores = []
    for i, doc in enumerate(documents):
        combined_score = 0
        for j, method_scores in enumerate(all_method_scores):
            if i < len(method_scores):
                combined_score += weights[j] * method_scores[i]
        final_scores.append((doc, combined_score))

    # Sort by combined scores
    final_scores.sort(key=lambda x: x[1], reverse=True)
    return final_scores

In [36]:
ensemble_reranked = ensemble_rerank(
    test_query,
    documents_only[:5],
    methods=['cross_encoder', 'llm'],
    weights=[0.6, 0.4]
)

Method: cross_encoder
Scores: [np.float32(1.0), np.float32(0.65593344), np.float32(0.14811605), np.float32(0.039062593), np.float32(0.0)]

Method: llm
Scores: [1.0, 0.5, 0.5, 0.0, 0.0]



In [37]:
for i, (doc, score) in enumerate(ensemble_reranked):
    scientist = doc.metadata['scientist_name']
    preview = doc.page_content[:30].replace("\n", "\\n") + "..."
    print(f"      {i + 1}. {scientist} (ensemble score: {score:.3f}): {preview}")
    # find original id
    for idx, (orig_doc, orig_score) in enumerate(initial_results):
        if orig_doc == doc:
            original_id = idx
            break
    print(f"         (original rank: {original_id})")

      1. Albert Einstein (ensemble score: 1.000): # Albert Einstein (1879\n\n1955)...
         (original rank: 0)
      2. Albert Einstein (ensemble score: 0.594): Einstein died on April 18, 195...
         (original rank: 1)
      3. Albert Einstein (ensemble score: 0.289): **Photoelectric Effect\n\n**: Ex...
         (original rank: 2)
      4. Isaac Newton (ensemble score: 0.023): **Universal Gravitation\n\n**: N...
         (original rank: 3)
      5. Albert Einstein (ensemble score: 0.000): Einstein married twice, first ...
         (original rank: 4)


Zadanie: Pozosta≈Ça czƒô≈õƒá skryptu `4_reranking.py`:

* performance & quality benchmark
* re-ranking RAG chain
* effectiveness analysis

RAG evaluation

Do oceny RAGa na zajƒôciach u≈ºyjemy biblioteki ragas, natomiast polecam zapoznaƒá siƒô z DeepEval.

| Cecha                        | **RAGAS**                                                      | **DeepEval**                                                                       |
| ---------------------------- | -------------------------------------------------------------- | ---------------------------------------------------------------------------------- |
| **Cel**                      | Ocenianie system√≥w **RAG**                                     | Ocenianie **dowolnych LLM** (RAG, generacja, reasoning, kod, bezpiecze≈Ñstwo, itd.) |
| **Zakres metryk**            | Tylko RAG (precision, recall, faithfulness, answer relevancy‚Ä¶) | RAG + generacja tekstu + generacja kodu + testy behawioralne + w≈Çasne metryki      |
| **Spos√≥b dzia≈Çania**         | G≈Ç√≥wnie **LLM-as-a-judge** + embeddingi                        | G≈Ç√≥wnie **LLM-as-a-judge**, ale standardowo z testami jednostkowymi (pytest/CI)    |
| **Zastosowania**             | Ewaluacja projekt√≥w RAG (akademicko, benchmarki)               | Testy jako≈õci LLM w produkcji (CI/CD)                                              |
| **Integracja z narzƒôdziami** | Minimalna (Python)                                             | Bardzo silna: pytest, GitHub Actions, CI/CD                                        |
| **Elastyczno≈õƒá**             | Metryki zdefiniowane ‚Äûna sztywno‚Äù                              | Mo≈ºesz budowaƒá **w≈Çasne metryki** i ‚Äûtest cases‚Äù                                   |
| **Z≈Ço≈ºono≈õƒá**                | ≈Åatwiejszy start                                               | Trochƒô wiƒôcej konfiguracji                                                         |
| **Typowa rola**              | ‚ÄûJak dobrze m√≥j RAG dzia≈Ça?‚Äù                                   | ‚ÄûCzy m√≥j LLM dzia≈Ça poprawnie, stabilnie i bezpiecznie?‚Äù                           |


In [38]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser


def load_and_chunk(data_dir):
    loader = DirectoryLoader(data_dir, glob="*.txt")
    docs = loader.load()
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    return splitter.split_documents(docs)


def build_rag_system(chunks):
    embeddings = AzureOpenAIEmbeddings(model="text-embedding-3-small")
    vector_store = InMemoryVectorStore(embeddings)
    vector_store.add_documents(documents=chunks)

    retriever = vector_store.as_retriever(
        search_type="similarity",
        search_kwargs={"k": 3}
    )

    llm = AzureChatOpenAI(model="gpt-4o-mini")

    prompt = ChatPromptTemplate.from_template("""
You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Use three sentences maximum and keep the answer concise.

Question: {question}

Context: {context}

Answer:
""")

    rag_chain = (
            {"context": retriever, "question": RunnablePassthrough()}
            | prompt
            | llm
            | StrOutputParser()
    )

    return rag_chain, retriever

Tworzymy teraz ground truth - LLM generuje odpowiedzi na podstawie pe≈Çnych dokument√≥w

In [39]:
def generate_ground_truths(questions, data_dir, expert_llm):
    loader = DirectoryLoader(data_dir, glob="*.txt")
    docs = loader.load()
    full_context = "\n\n".join([doc.page_content for doc in docs])

    ground_truths = []
    for q in questions:
        prompt = f"""You are a domain expert with complete knowledge of these scientists.
Based on the following complete biographies, provide a comprehensive, accurate answer.

Complete Biographies:
{full_context}

Question: {q}

Provide a detailed, factually accurate answer:"""
        ground_truths.append(expert_llm.invoke(prompt).content)
    return ground_truths

In [40]:
chunks = load_and_chunk(data_dir)
rag_chain, retriever = build_rag_system(chunks)
expert_llm = AzureChatOpenAI(model="gpt-5-nano")

Uwaga: do ka≈ºdego zadania wykorzystujemy inny model LLM

RAG system u≈ºywa modelu "gpt-4o-mini", podczas gdy generowanie odpowiedzi eksperckich wykorzystuje "gpt-5",
natomiast evaluacja u≈ºywa "gpt-4.1"

In [41]:
questions = [
    "What did Marie Curie win Nobel Prizes for?",
    "What is Einstein's theory of relativity about?",
    "What are Newton's three laws of motion?",
    "What did Charles Darwin discover?",
    "What was Ada Lovelace's contribution to computing?"
]

In [42]:
print("Generating ground truth answers using expert LLM...")
ground_truths = generate_ground_truths(questions, data_dir, expert_llm)

Generating ground truth answers using expert LLM...


In [43]:
from ragas import evaluate, EvaluationDataset
from ragas.metrics import ContextPrecision, ContextRecall, Faithfulness, AnswerRelevancy, FactualCorrectness
from ragas.llms import LangchainLLMWrapper
from ragas.dataset_schema import SingleTurnSample

In [44]:
samples = []
for q, gt in zip(questions, ground_truths):
    answer = rag_chain.invoke(q)
    contexts = [doc.page_content for doc in retriever.invoke(q)]
    samples.append(SingleTurnSample(
        user_input=q,  # user question
        response=answer,  # generated answer by our RAG system
        retrieved_contexts=contexts,  # retrieved contexts by our RAG system
        reference=gt  # ground truth answer - generated by expert LLM
    ))

https://docs.ragas.io/en/latest/concepts/metrics/available_metrics/#retrieval-augmented-generation

üîµ Context Precision

Mierzy, jaka czƒô≈õƒá zwr√≥conych przez retriever kontekst√≥w by≈Ça naprawdƒô potrzebna do odpowiedzi.
Im wy≈ºsza precision, tym mniej ‚Äû≈õmieciowych‚Äù chunk√≥w.

üü£ Context Recall

Mierzy, czy retriever zwr√≥ci≈Ç wszystkie potrzebne konteksty wymagane do odpowiedzi.
Niska recall oznacza, ≈ºe kluczowe informacje nie zosta≈Çy odnalezione.

üü¢ Faithfulness

Sprawdza, czy odpowied≈∫ modelu jest zgodna z przekazanym kontekstem, bez halucynacji.
Model nie mo≈ºe dodawaƒá rzeczy spoza retrieved contexts.

üü† Answer Relevancy

Mierzy, na ile odpowied≈∫ faktycznie odpowiada na pytanie u≈ºytkownika.
Ocena: ‚Äûczy odpowied≈∫ jest na temat?‚Äù.

üî¥ Factual Correctness

Sprawdza, czy odpowied≈∫ jest faktycznie poprawna, por√≥wnujƒÖc jƒÖ z ground truth.
Nie chodzi o zgodno≈õƒá z kontekstem, tylko z rzeczywisto≈õciƒÖ.

In [45]:
evaluator_llm = LangchainLLMWrapper(AzureChatOpenAI(model="gpt-4.1", temperature=0))
embeddings = AzureOpenAIEmbeddings(model="text-embedding-3-small")

  evaluator_llm = LangchainLLMWrapper(AzureChatOpenAI(model="gpt-4.1", temperature=0))


In [46]:
sample = SingleTurnSample(
    user_input="Where is the Eiffel Tower located?",
    retrieved_contexts=[
        "The Eiffel Tower is located in Paris.",
        "The Brandenburg Gate is located in Berlin.",
        "The Eiffel Tower is located in France. Paris is the capital city of France."
    ],
    reference="The Eiffel Tower is located in Paris."
)
sample_dataset = EvaluationDataset(samples=[sample])
results = evaluate(
    dataset=sample_dataset,
    llm=evaluator_llm,
    metrics=[ContextPrecision(llm=evaluator_llm)]
)
results

Evaluating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:07<00:00,  7.57s/it]


{'context_precision': 0.8333}

In [47]:
# rƒôcznie:
# https://docs.ragas.io/en/latest/concepts/metrics/available_metrics/context_precision/

precision_1 = 1 / (1 + 0)
v_1 = 1  # poniewa≈º trafili≈õmy na pasujƒÖcƒÖ informacjƒô w pierwszym kontek≈õcie

precision_2 = 1 / (1 + 1)
v_2 = 0  # poniewa≈º nie trafili≈õmy na pasujƒÖcƒÖ informacjƒô w

precision_3 = 2 / (2 + 1)
v_3 = 1  # poniewa≈º trafili≈õmy na pasujƒÖcƒÖ informacjƒô w tr

context_precision = (v_1 * precision_1 + v_2 * precision_2 + v_3 * precision_3) / (v_1 + v_2 + v_3)
print(f"Calculated Context Precision: {context_precision:.3f}")

Calculated Context Precision: 0.833


In [48]:
eval_dataset = EvaluationDataset(samples=samples)

metrics = [
    ContextPrecision(llm=evaluator_llm),
    ContextRecall(llm=evaluator_llm),
    Faithfulness(llm=evaluator_llm),
    AnswerRelevancy(llm=evaluator_llm),
    FactualCorrectness(llm=evaluator_llm)
]

In [49]:
result = evaluate(dataset=eval_dataset, metrics=metrics, llm=evaluator_llm, embeddings=embeddings)
df = result.to_pandas()

Evaluating:   0%|          | 0/25 [00:00<?, ?it/s]LLM returned 1 generations instead of requested 3. Proceeding with 1 generations.
Evaluating:   4%|‚ñç         | 1/25 [00:05<02:01,  5.08s/it]LLM returned 1 generations instead of requested 3. Proceeding with 1 generations.
LLM returned 1 generations instead of requested 3. Proceeding with 1 generations.
Evaluating:  32%|‚ñà‚ñà‚ñà‚ñè      | 8/25 [00:48<01:22,  4.88s/it]LLM returned 1 generations instead of requested 3. Proceeding with 1 generations.
Evaluating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 25/25 [01:46<00:00,  4.28s/it]


In [50]:
df

Unnamed: 0,user_input,retrieved_contexts,response,reference,context_precision,context_recall,faithfulness,answer_relevancy,factual_correctness(mode=f1)
0,What did Marie Curie win Nobel Prizes for?,[Marie Sklodowska - Curie (1867-1934) Marie Sk...,Marie Curie won the Nobel Prize in Physics in ...,Marie Curie won two Nobel Prizes:\n\n- 1903 No...,0.5,0.857143,1.0,0.879175,0.78
1,What is Einstein's theory of relativity about?,[# Albert Einstein (1879\n\n1955)\n\nAlbert Ei...,Einstein's theory of relativity comprises two ...,Einstein‚Äôs theory of relativity (often simply ...,1.0,0.277778,0.888889,0.725321,0.26
2,What are Newton's three laws of motion?,[Newton attended The King's School in Grantham...,Newton's three laws of motion are: 1) An objec...,Isaac Newton‚Äôs three laws of motion are:\n\n1)...,0.0,0.0,0.285714,1.0,0.26
3,What did Charles Darwin discover?,[# Charles Darwin (1809\n\n1882)\n\nCharles Ro...,Charles Darwin discovered the theory of evolut...,Charles Darwin‚Äôs flagship discovery was the th...,1.0,0.909091,0.8,0.774263,0.38
4,What was Ada Lovelace's contribution to comput...,"[Throughout her adult life, Lovelace struggled...",Ada Lovelace is known for her work on Charles ...,Ada Lovelace‚Äôs contribution to computing cente...,0.583333,0.8,1.0,0.86159,0.64


Zadanie domowe: w module multi_rag_evaluation znajduje siƒô kod do oceny systemu RAG z wykorzystaniem r√≥≈ºnych metryk i r√≥≈ºnych podej≈õƒá RAGowych