In [1]:
print('Hello!\nIt`s a Homework Project: Build a RAG (Retrieval-Augmented Generation) System')

Hello!
It`s a Homework Project: Build a RAG (Retrieval-Augmented Generation) System


## Load the prepared database ##

In [2]:
from langchain_community.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings

embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Load existing DB
db = Chroma(
    persist_directory="./chroma_db",
    embedding_function=embedding
)

print(f"Total documents in the database: {db._collection.count()}")

  embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm
  db = Chroma(


Total documents in the database: 172


In [3]:
# try to search for documents that are most relevant to a given query in terms of vector similarity
query = "Who became the new Chancellor of Germany in 2025?"
results = db.similarity_search(query, k=2)

for i, res in enumerate(results):
    print(f"\nRESULT {i+1}")
    print("TEXT:", res.page_content[:300])
    print("METADATA:", res.metadata)



RESULT 1
TEXT: However, both the government and the Bundestag will remain fully functional in terms of global issues in the transition phase until a new government is formed.

When Scholz was confirmed as the future chancellor in 2021, his predecessor Angela Merkel, while still in office, took the future German le
METADATA: {'date': '2024-12-17', 'source': 'article9_dw', 'topic': 'German Elections 2025', 'region': 'Germany', 'language': 'en', 'author': 'dw.com', 'url': 'https://www.dw.com/en/german-election-scholz-loses-confidence-vote/live-71063891', 'keywords': 'German election, German, loses confidence , vote, Scholz'}

RESULT 2
TEXT: Title: Friedrich Merz wins second vote to become Germany’s chancellor
Source: Financial Times
Date: May 6, 2025

Friedrich Merz wins second vote to become Germany’s chancellor.
METADATA: {'topic': 'German Elections 2025', 'language': 'en', 'keywords': 'Merz, wins, second vote, german, chancellor', 'source': 'article3_fin_times', 'region': 'Germany', '

## Stage 2: Retrieval-Augmented Generation (RAG) ##

In [None]:
# Step 1: Create a Retriever object from my database
retriever = db.as_retriever(
    search_type="similarity", 
    search_kwargs={"k": 4}
)

retriever_with_filter = db.as_retriever(
    search_type="similarity",
    search_kwargs={
        "k": 4,
        "filter": {
            "$and": [
                {"region": "Germany"},
                {"language": "en"},
                {"author": {"$in": ["The Guardian", "AP News", "dw.com"]}}
            ]
        }
    }
)

# Step 2: Connecting the Gemini-2.0-Flash model
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI

load_dotenv()

# Get API-Key from .env
api_key = os.getenv("GOOGLE_API_KEY")

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0.3,
    google_api_key=api_key
)

# Step 3: Create RAG Chain (connect Retriever and LLM into a RAG pipeline using LangChain)
from langchain.chains import RetrievalQA

rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever= retriever, #retriever, retriever_with_filter, multi_query_retriever
    chain_type="stuff",  #sstuff="put all chunks into one text and pass it to the model", Позже можно заменить на кастомную цепочку с продуманным prompt.
    return_source_documents=True  # return not only the answer, but also the documents from which it was obtained
)

# Step 4: Check the request
query = "How did Scholz's resignation affect political stability in Germany?" #"What led to the early parliamentary elections in Germany in 2025?" #"Who is the Chancellor of Germany?" # "Who became the new Chancellor of Germany in 2025?"
result = rag_chain.invoke(query)

print("ANSWER:")
print(result["result"])

print("\nSOURCES:")
for doc in result["source_documents"]:
    print(doc.metadata['source'], "-", doc.metadata.get('url', ''))

ANSWER:
Olaf Scholz lost a confidence vote on December 16, 2024, after his coalition government collapsed in November, paving the way for new elections in February. Following the loss of the vote, Scholz led a minority government. On March 25, 2025, German President Frank-Walter Steinmeier officially dismissed Chancellor Olaf Scholz and his 14 cabinet ministers. There was widespread discontent and not much enthusiasm for any of the candidates.

SOURCES:
article9_dw - https://www.dw.com/en/german-election-scholz-loses-confidence-vote/live-71063891
article9_dw - https://www.dw.com/en/german-election-scholz-loses-confidence-vote/live-71063891
article8_timesnownews - https://www.timesnownews.com/world/europe/german-president-dissolves-parliament-as-scholz-loses-majority-snap-polls-set-for-february-article-116714492
article5_apnews - https://apnews.com/article/germany-election-merz-scholz-far-right-afd-ebf16ed38e0beaff7fed9a6d29b32a24
article9_dw - https://www.dw.com/en/german-election-scho

## RAG-Chain + Memory + Prompt Engineering ##

In [None]:

from langchain.chains import ConversationalRetrievalChain

# New RAG-chain with memory
rag_chain_with_memory = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever, #retriever, retriever_with_filter, multi_query_retriever
    return_source_documents=True
)

# Initialize memory
chat_history = []

# Interactive mode: user can ask 3+ questions
print("\n🧠 RAG chat with memory. Enter 'exit' to quit.")

while True:
    query = input("❓ QUESTION: ")
    if query.lower() in ("exit", "quit"):
        print("👋 Exit chat.")
        break

    result = rag_chain_with_memory.invoke({
        "question": query,
        "chat_history": chat_history
    })

    answer = result["answer"]
    print("💬 ANSWER:", answer)

    print("\n📚 SOURCES:")
    for doc in result["source_documents"]:
        print(doc.metadata['source'], "-", doc.metadata.get('url', ''))

    # Update memory
    chat_history.append((query, answer))



🧠 RAG-чат с памятью. Введите 'exit' для выхода.
💬 Ответ: Friedrich Merz became the new Chancellor of Germany in 2025.

📚 Источники:
article9_dw - https://www.dw.com/en/german-election-scholz-loses-confidence-vote/live-71063891
article3_fin_times - https://www.ft.com/content/48665ff1-b741-44dc-903e-2f54322a7127
article4_washpost - https://www.washingtonpost.com/world/2025/05/06/germany-government-merz-coalition/419e2d84-2a2f-11f0-a724-3bc879c9f843_story.html
article6_wiki - https://en.wikipedia.org/wiki/2024_German_government_crisis
💬 Ответ: Aus dem Kontext geht hervor, dass Olaf Scholz die sozialdemokratische Partei (SPD) vertritt.

📚 Источники:
article6_wiki - https://en.wikipedia.org/wiki/2024_German_government_crisis
article4_washpost - https://www.washingtonpost.com/world/2025/05/06/germany-government-merz-coalition/419e2d84-2a2f-11f0-a724-3bc879c9f843_story.html
article7_deepnewz - https://deepnewz.com/germany/german-president-steinmeier-dismisses-chancellor-scholz-14-ministers-t

## Multi-Query Retrieval ##

In [25]:
# --- Multi-Query Retrieval Setup ---

from langchain.retrievers.multi_query import MultiQueryRetriever

# Wrap the retriever with MultiQueryRetriever to retrieve more diverse documents
multiquery_retriever = MultiQueryRetriever.from_llm(
    retriever=retriever_with_filter,  # Use filtered retriever
    llm=llm
)

# --- RAG Chain with Multi-Query Retriever ---

rag_chain_multi = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=multiquery_retriever,
    chain_type="stuff",
    return_source_documents=True
)

# --- Run the multi-query RAG chain ---
query = "Why did Scholz resign as Chancellor?"
result_multi = rag_chain_multi.invoke(query)

print("ANSWER (Multi-Query):")
print(result_multi["result"])

print("\nSOURCES (Multi-Query):")
for doc in result_multi["source_documents"]:
    print(doc.metadata['source'], "-", doc.metadata.get('url', ''))


# --- Manual Multi-Query Retrieval with Custom Reformulated Questions ---

from langchain.chains.qa_with_sources import load_qa_with_sources_chain

# Step 1: Use each of reformulated questions to retrieve documents
multiquery_questions = [
    "Why did Olaf Scholz step down as Germany's Chancellor?",
    "What were the reasons behind Scholz's resignation?",
    "What caused the Chancellor of Germany to resign in 2025?",
    "Why did Germany's head of government leave office?",
    "What led to Olaf Scholz's departure from the Chancellorship?"
]

# Collect documents from all queries
all_docs = []
for q in multiquery_questions:
    docs = retriever_with_filter.get_relevant_documents(q)
    all_docs.extend(docs)

# Optional: deduplicate by content to avoid repeated chunks
unique_docs = {doc.page_content: doc for doc in all_docs}.values()

# Step 2: Build the QA chain
manual_multiquery_chain = load_qa_with_sources_chain(llm=llm, chain_type="stuff")

# Step 3: Use original query to ask over combined retrieved context
query = "Why did Scholz resign as Chancellor?"
result_manual = manual_multiquery_chain(
    {"input_documents": list(unique_docs), "question": query},
    return_only_outputs=True
)

# Step 4: Output answer and sources
print("ANSWER (Manual Multi-Query):")
print(result_manual["output_text"])

print("\nSOURCES (Manual Multi-Query):")
for doc in unique_docs:
    print(doc.metadata.get("source", "Unknown"), "-", doc.metadata.get("url", ""))



ANSWER (Multi-Query):
Olaf Scholz's minority coalition government lost a confidence vote, paving the way for new elections. He also conceded defeat for his center-left Social Democrats after what he called “a bitter election result.”

SOURCES (Multi-Query):
article9_dw - https://www.dw.com/en/german-election-scholz-loses-confidence-vote/live-71063891
article9_dw - https://www.dw.com/en/german-election-scholz-loses-confidence-vote/live-71063891
article9_dw - https://www.dw.com/en/german-election-scholz-loses-confidence-vote/live-71063891
article9_dw - https://www.dw.com/en/german-election-scholz-loses-confidence-vote/live-71063891
article9_dw - https://www.dw.com/en/german-election-scholz-loses-confidence-vote/live-71063891
article1_guardian - https://www.theguardian.com/world/2025/may/06/chaos-in-the-german-bundestag-whats-next-for-friedrich-merz
article2_guardian - https://www.theguardian.com/world/live/2025/may/06/friedrich-merz-german-chancellor-europe-live-latest-news?filterKeyEven

  docs = retriever_with_filter.get_relevant_documents(q)
See also the following migration guides for replacements based on `chain_type`:
stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

  manual_multiquery_chain = load_qa_with_sources_chain(llm=llm, chain_type="stuff")
  result_manual = manual_multiquery_chain(


ANSWER (Manual Multi-Query):
Olaf Scholz lost a confidence vote after his coalition government collapsed due to infighting, leading to new elections in February (SOURCES: article5_apnews, article9_dw). Arguments over the national budget also played a major role in the collapse of Scholz's coalition (SOURCES: article9_dw).
SOURCES: article5_apnews, article9_dw

SOURCES (Manual Multi-Query):
article9_dw - https://www.dw.com/en/german-election-scholz-loses-confidence-vote/live-71063891
article9_dw - https://www.dw.com/en/german-election-scholz-loses-confidence-vote/live-71063891
article9_dw - https://www.dw.com/en/german-election-scholz-loses-confidence-vote/live-71063891
article9_dw - https://www.dw.com/en/german-election-scholz-loses-confidence-vote/live-71063891
article5_apnews - https://apnews.com/article/germany-election-merz-scholz-far-right-afd-ebf16ed38e0beaff7fed9a6d29b32a24
article9_dw - https://www.dw.com/en/german-election-scholz-loses-confidence-vote/live-71063891
article9_dw