In [1]:
print('Hello!\nIt`s a Homework Project: Build a RAG (Retrieval-Augmented Generation) System')

Hello!
It`s a Homework Project: Build a RAG (Retrieval-Augmented Generation) System


## Load the prepared database ##

In [2]:
from langchain_community.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings

embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Load existing DB
db = Chroma(
    persist_directory="./chroma_db",
    embedding_function=embedding
)

print(f"Total documents in the database: {db._collection.count()}")

  embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm
  db = Chroma(


Total documents in the database: 121


In [3]:
# try to search for documents that are most relevant to a given query in terms of vector similarity
query = "Who became the new Chancellor of Germany in 2025?"
results = db.similarity_search(query, k=2)

for i, res in enumerate(results):
    print(f"\nRESULT {i+1}")
    print("TEXT:", res.page_content[:300])
    print("METADATA:", res.metadata)



RESULT 1
TEXT: Title: Friedrich Merz wins second vote to become Germany’s chancellor
Source: Financial Times
Date: May 6, 2025

Friedrich Merz wins second vote to become Germany’s chancellor.
METADATA: {'language': 'en', 'source': 'article3_fin_times', 'date': '2025-05-06', 'topic': 'German Elections 2025', 'region': 'Germany', 'keywords': 'Merz, wins, second vote, german, chancellor', 'url': 'https://www.ft.com/content/48665ff1-b741-44dc-903e-2f54322a7127', 'author': 'The Financial time'}

RESULT 2
TEXT: Title: Conservative leader Merz overcomes historic defeat to become Germany’s chancellor
Source: The Washington Post
Date: May 6, 2025


BERLIN (AP) — Conservative leader Friedrich Merz succeeded Tuesday in becoming Germany’s next chancellor, drawing applause and a palpable sense of relief in the pa
METADATA: {'region': 'Germany', 'keywords': 'Merz, government, Germany, coalition', 'author': 'The Washington post', 'date': '2025-05-06', 'source': 'article4_washpost', 'url': 'https://w

## Stage 2: Retrieval-Augmented Generation (RAG) ##

In [11]:
# Step 1: Create a Retriever object from my database
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4})

# Step 2: Connecting the Gemini-2.0-Flash model
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI

load_dotenv()

# Get API-Key from .env
api_key = os.getenv("GOOGLE_API_KEY")

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0.3,
    google_api_key=api_key
)

# Step 3: Create RAG Chain (connect Retriever and LLM into a RAG pipeline using LangChain)
from langchain.chains import RetrievalQA

rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",  #sstuff="put all chunks into one text and pass it to the model", Позже можно заменить на кастомную цепочку с продуманным prompt.
    return_source_documents=True  # return not only the answer, but also the documents from which it was obtained
)

# Step 4: Check the request
query = "What led to the early parliamentary elections in Germany in 2025?" #"What led to the early parliamentary elections in Germany in 2025?" #"Who is the Chancellor of Germany?" # "Who became the new Chancellor of Germany in 2025?"
result = rag_chain.invoke(query)

print("ANSWER:")
print(result["result"])

print("\nSOURCES:")
for doc in result["source_documents"]:
    print(doc.metadata['source'], "-", doc.metadata.get('url', ''))


ANSWER:
Scholz called a vote of confidence in the ruling government on 16 December 2024, which was lost. As a result, an early 2025 federal election was held.

SOURCES:
article6_wiki - https://en.wikipedia.org/wiki/2024_German_government_crisis
article6_wiki - https://en.wikipedia.org/wiki/2024_German_government_crisis
article6_wiki - https://en.wikipedia.org/wiki/2024_German_government_crisis
article4_washpost - https://www.washingtonpost.com/world/2025/05/06/germany-government-merz-coalition/419e2d84-2a2f-11f0-a724-3bc879c9f843_story.html
