In [1]:
import os
from dotenv import load_dotenv
import numpy as np
from typing import List, Dict, Any, Optional
import warnings
warnings.filterwarnings("ignore")

from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import HumanMessage, AIMessage

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import ChatOllama
from langchain_huggingface import HuggingFaceEndpointEmbeddings
from langchain_community.document_loaders import TextLoader, PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

load_dotenv()

True

In [2]:
## Data ingestion and processing
sample_documents=[
    Document(
        page_content="""
        Artificial intelligence (AI) refers to the simulation of human intelligence in machines that are programmed to think, learn, and act like humans. AI can be used for a wide range of applications, including natural language processing, computer vision, robotics, and more.
        There are two main types of AI: narrow or weak AI, which is designed to perform specific tasks, and general or strong AI, which is capable of performing any intellectual task that a human can do. Narrow AI is often used in applications like voice assistants, recommendation systems, and image recognition, while general AI is still the subject of ongoing research.
        One of the key challenges in developing AI is ensuring that it is safe and ethical to use. This includes addressing issues like bias, privacy, and accountability. As AI becomes more advanced, it's important to consider how it will impact society and ensure that it is used for positive purposes.
        Overall, AI has the potential to revolutionize many industries and improve our lives in countless ways. However, it's important to approach its development with caution and ensure that it is used responsibly and ethically.
        """,
        metadata={"source": "AI Indtroduction", "page": 1, "topic": "AI"}
    ),
    Document(
        page_content="""
        Machine learning (ML) refers to the process of building algorithms that can learn from data and improve their performance over time. ML is a subset of AI and is used in a wide range of applications, including image recognition, natural language processing, fraud detection, and more.
        There are two main types of machine learning: supervised and unsupervised. Supervised learning involves training the algorithm on labeled data, where each input-output pair is known. Unsupervised learning involves training the algorithm on unlabeled data, where only the inputs are known.
        One of the key challenges in developing ML algorithms is ensuring that they are accurate and reliable. This includes addressing issues like overfitting, underfitting, and bias. As ML becomes more advanced, it's important to consider how it will impact society and ensure that it is used for positive purposes.
        Overall, machine learning has the potential to revolutionize many industries and improve our lives in countless ways. However, it's important to approach its development with caution and ensure that it is used responsibly and ethically.
        """,
        metadata={"source":"ML Basiscs", "page": 1, "topic": "ML"}
    ),
    Document(
        page_content="""
        Deep learning refers to a subset of machine learning that uses neural networks to learn from data. Neural networks are composed of layers of interconnected nodes or neurons, which process and transmit information through the network.
        There are two main types of neural networks: feedforward and recurrent. Feedforward neural networks are used for tasks like image recognition and natural language processing, while recurrent neural networks (RNNs) are used for tasks like speech recognition and time series analysis.
        One of the key challenges in developing deep learning models is ensuring that they are accurate and reliable. This includes addressing issues like overfitting, underfitting, and bias. As deep learning becomes more advanced, it's important to consider how it will impact society and ensure that it is used for positive purposes.
        Overall, deep learning has the potential to revolutionize many industries and improve our lives in countless ways. However, it's important to approach its development with caution and ensure that it is used responsibly and ethically.
        """,
        metadata={"source":"Deep Learning", "page": 1, "topic": "DL"}
    ),
    Document(
        page_content="""
        Natural language processing (NLP) refers to the field of computer science that focuses on the interactions between computers and human languages. NLP involves developing algorithms and models that can process and analyze natural language data, such as text, speech, and images.
        There are several subfields within NLP, including machine learning, deep learning, and computational linguistics. Machine learning techniques are used to develop models that can learn from labeled data and improve their performance over time. Deep learning is a subset of machine learning that uses neural networks to learn from data. Computational linguistics involves studying the structure and properties of natural language.
        One of the key challenges in developing NLP systems is ensuring that they are accurate and reliable. This includes addressing issues like ambiguity, sarcasm, and context sensitivity. As NLP becomes more advanced, it's important to consider how it will impact society and ensure that it is used for positive purposes.
        Overall, NLP has the potential to revolutionize many industries and improve our lives in countless ways. However, it's important to approach its development with caution and ensure that it is used responsibly and ethically.
        """,
        metadata={"source": "NLP Overview", "page": 1, "topic": "NLP"}
    )
]

In [3]:
sample_documents

[Document(metadata={'source': 'AI Indtroduction', 'page': 1, 'topic': 'AI'}, page_content="\n        Artificial intelligence (AI) refers to the simulation of human intelligence in machines that are programmed to think, learn, and act like humans. AI can be used for a wide range of applications, including natural language processing, computer vision, robotics, and more.\n        There are two main types of AI: narrow or weak AI, which is designed to perform specific tasks, and general or strong AI, which is capable of performing any intellectual task that a human can do. Narrow AI is often used in applications like voice assistants, recommendation systems, and image recognition, while general AI is still the subject of ongoing research.\n        One of the key challenges in developing AI is ensuring that it is safe and ethical to use. This includes addressing issues like bias, privacy, and accountability. As AI becomes more advanced, it's important to consider how it will impact society

### Text splitting

In [4]:
## Initialize the text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=2,
    length_function=len,
    separators=[" "]
)

## Splite the text
chunks = text_splitter.split_documents(sample_documents)

print(f"Created {len(chunks)} chunks from {len(sample_documents)} documents.")
print("\nExample Chunk")
print(f"Content {chunks[0].page_content}")
print(f"Metadata {chunks[0].metadata}")

Created 12 chunks from 4 documents.

Example Chunk
Content Artificial intelligence (AI) refers to the simulation of human intelligence in machines that are programmed to think, learn, and act like humans. AI can be used for a wide range of applications, including natural language processing, computer vision, robotics, and more.
        There are two main types of AI: narrow or weak AI, which is designed to perform specific tasks, and general or strong AI, which is capable of performing any intellectual task that a human can do. Narrow AI is
Metadata {'source': 'AI Indtroduction', 'page': 1, 'topic': 'AI'}


In [5]:
### Load the embedding model

embeddings = HuggingFaceEndpointEmbeddings(
    model="http://localhost:8080",
    provider="Privat"
)

## Emaple: create embedding for a single text
sample_text = "What is machine learning"
sample_embedding = embeddings.embed_query(sample_text)

In [None]:
texts=["AI", "Machine learning", "Deep Learning", "Neural Networks"]
batch_embeddings = embeddings.embed_documents(texts)
batch_embeddings

In [7]:
## Compare Embeddings using cosine similarty
def compare_embeddings(text1, text2):
    """Compare semantic similarity of two texts using embeddings"""
    
    emb1=np.array(embeddings.embed_query(text1))
    emb2=np.array(embeddings.embed_query(text2))
    
    similarity = np.dot(emb1, emb2) / (np.linalg.norm(emb1) * np.linalg.norm(emb2))
    
    return similarity

In [8]:
### Test semantic similarity
print("\nSemantic Similarity Exmaples:")
print(f"'AI' vs 'Artificial Intelligence': {compare_embeddings('AI', 'Artificial Intelligence'):.3f}")


Semantic Similarity Exmaples:
'AI' vs 'Artificial Intelligence': 0.793


In [9]:
### Create FAISS Vectorestore
vectorstore = FAISS.from_documents(
    documents=chunks,
    embedding=embeddings    
)

print(f"Vectorestore created with {vectorstore.index.ntotal} vectors")

Vectorestore created with 12 vectors


In [10]:
## Save vectorestore for later use
vectorstore.save_local("faiss_index")

In [11]:
## Load vectorestore
loaded_vectorstore = FAISS.load_local(
    "faiss_index", 
    embeddings=embeddings, 
    allow_dangerous_deserialization=True
)

print(f"Vectorestore created with {loaded_vectorstore.index.ntotal} vectors")

Vectorestore created with 12 vectors


In [12]:
## Similarity search
query = "What is deep learning?"
result = loaded_vectorstore.similarity_search(query, k=5)
result


[Document(id='ae664a2b-f328-465f-8c2b-05f9fb281bc8', metadata={'source': 'Deep Learning', 'page': 1, 'topic': 'DL'}, page_content='Deep learning refers to a subset of machine learning that uses neural networks to learn from data. Neural networks are composed of layers of interconnected nodes or neurons, which process and transmit information through the network.\n        There are two main types of neural networks: feedforward and recurrent. Feedforward neural networks are used for tasks like image recognition and natural language processing, while recurrent neural networks (RNNs) are used for tasks like speech'),
 Document(id='7ae601b0-a7c7-4379-8630-621bc2e6d741', metadata={'source': 'NLP Overview', 'page': 1, 'topic': 'NLP'}, page_content="improve their performance over time. Deep learning is a subset of machine learning that uses neural networks to learn from data. Computational linguistics involves studying the structure and properties of natural language.\n        One of the key 

In [13]:
print(f"Query: {query}")
print("Top 3 similar chunks:")
for i, doc in enumerate(result):
    print(f"\n{i+1}. Source: {doc.metadata['source']}")
    print(f"   Content: {doc.page_content[:200]}...")
    if i == 2:
        break

Query: What is deep learning?
Top 3 similar chunks:

1. Source: Deep Learning
   Content: Deep learning refers to a subset of machine learning that uses neural networks to learn from data. Neural networks are composed of layers of interconnected nodes or neurons, which process and transmit...

2. Source: NLP Overview
   Content: improve their performance over time. Deep learning is a subset of machine learning that uses neural networks to learn from data. Computational linguistics involves studying the structure and propertie...

3. Source: Deep Learning
   Content: recognition and time series analysis.
        One of the key challenges in developing deep learning models is ensuring that they are accurate and reliable. This includes addressing issues like overfit...


In [14]:
### Similarity Search with score
results_with_scores = vectorstore.similarity_search_with_score(query, k=3)

print("\n\nSimilarity search with scores:")
for doc, score in results_with_scores:
    print(f"\nScore: {score:.3f}")
    print(f"Source: {doc.metadata['source']}")
    print(f"Content preview: {doc.page_content[:100]}...")



Similarity search with scores:

Score: 0.592
Source: Deep Learning
Content preview: Deep learning refers to a subset of machine learning that uses neural networks to learn from data. N...

Score: 0.737
Source: NLP Overview
Content preview: improve their performance over time. Deep learning is a subset of machine learning that uses neural ...

Score: 0.811
Source: Deep Learning
Content preview: recognition and time series analysis.
        One of the key challenges in developing deep learning ...


In [15]:
### Search with filter
filter_dict = {"topic":"ML"}
filtered_results = vectorstore.similarity_search(
    query,
    k=3,
    filter=filter_dict
)

print(filtered_results)

[Document(id='adb37392-2fbd-4877-a6b9-8c37abeabed9', metadata={'source': 'ML Basiscs', 'page': 1, 'topic': 'ML'}, page_content='Machine learning (ML) refers to the process of building algorithms that can learn from data and improve their performance over time. ML is a subset of AI and is used in a wide range of applications, including image recognition, natural language processing, fraud detection, and more.\n        There are two main types of machine learning: supervised and unsupervised. Supervised learning involves training the algorithm on labeled data, where each input-output pair is known. Unsupervised'), Document(id='ebd6cac3-7516-4b56-91b0-f90f29f30285', metadata={'source': 'ML Basiscs', 'page': 1, 'topic': 'ML'}, page_content="learning involves training the algorithm on unlabeled data, where only the inputs are known.\n        One of the key challenges in developing ML algorithms is ensuring that they are accurate and reliable. This includes addressing issues like overfitting

## Build the RAG Chain

In [16]:
from langchain_ollama import ChatOllama

llm = ChatOllama(model="gemma2:9b-instruct-q4_K_M")
llm

ChatOllama(model='gemma2:9b-instruct-q4_K_M')

In [None]:
# 1. Simple RAG Chain with LCEL
simple_prompt = ChatPromptTemplate.from_template(
    """Answer the question based only on the following context:
    Context: {context}
    
    Question: {question}
    """
)
simple_prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based only on the following context:\n    Context: {context}\n\n    Question: {question}\n    '), additional_kwargs={})])

In [21]:
## Basic retriever
retriever = vectorstore.as_retriever(
    search_type="similarity",
    searach_kwargs={'k':2}
)

In [23]:
from typing import List

# format documents for the prompt
def format_docs(docs: List[Document]) -> str:
    """Format documents for insertion into prompts"""
    formatted = []
    for i, doc in enumerate(docs):
        source = doc.metadata.get('source', 'unknown')
        formatted.append(f"Document {i+1} (Source: {source}:\n{doc.page_content})")
    return "\n\n".join(formatted)

In [26]:
simple_rag_chain=(
    {"context": retriever | format_docs, "question": RunnablePassthrough() }
    | simple_prompt
    | llm
    | StrOutputParser()
)

In [27]:
simple_rag_chain

{
  context: VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEndpointEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7546248031a0>, search_kwargs={})
           | RunnableLambda(format_docs),
  question: RunnablePassthrough()
}
| ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based only on the following context:\n    Context: {context}\n\n    Question: {question}\n    '), additional_kwargs={})])
| ChatOllama(model='gemma2:9b-instruct-q4_K_M')
| StrOutputParser()

In [28]:
### Conversational RAG Chain
conversational_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI assistant. Use the provided context to answer questions"),
    ("placeholder", "{chat_history}"),
    ("human", "Context: {context}\n\nQuestion: {input}")
])

In [44]:
def create_conversation_rag():
    """Create a conversational RAG chain with memory"""
    return (
        RunnablePassthrough.assign(
            context=lambda x: format_docs(retriever.invoke(x['input']))
        )
        | conversational_prompt
        | llm
        | StrOutputParser()
    )

In [42]:
conversational_prompt

ChatPromptTemplate(input_variables=['context', 'input'], optional_variables=['chat_history'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.chat.ChatMessageChunk, Tag(tag='ChatMessageChunk')], typing.Annotated[langchain_core.messages.system.SystemMessageChunk, Tag(tag='SystemMes

In [31]:
### Streaming RAG chain
streaimg_rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough() }
    | simple_prompt
    | llm
)

In [32]:
print("Modern RAG chains created successfully!")
print("Available chains:")
print("- simple_rag_chain: Bassic Q&A")
print("- conversational_rag_chain: maintains conversation history")
print("- streaming_rag_chain: stream results without StrOutputParser")

Modern RAG chains created successfully!
Available chains:
- simple_rag_chain: Bassic Q&A
- conversational_rag_chain: maintains conversation history
- streaming_rag_chain: stream results without StrOutputParser


In [None]:
# Test function for diffrerent chain types
def test_rag_chains(question: str):
    """Test all RAG chain variants"""
    print(f"Question: {question}")
    print("=" * 80)
    
    # 1. Simple RAG Chain
    print("\n1. Simple RAG Chain")
    answer = simple_rag_chain.invoke(question)
    print(f"Answer: {answer}")   
    
    print("\n2. Streaming RAG")
    print("Answer: ", end="", flush=True)
    for chunk in streaimg_rag_chain.stream(question):
        print(chunk.content, end="", flush=True)
    print()
    

In [None]:
test_rag_chains("What is the difference between AI and machine learning?")

In [None]:
# Test with multiple questions
test_questions = [
    "What is the difference between AI and ;achine Learning?",
    "Explain deep learning in simple terms",
    "How does NPL work?"
]

for question in test_questions:
    print("\n" + "=" * 80 + "\n")
    test_rag_chains(question)

In [46]:
## Conversational example
print("\n3. Conversational RAG example:")
chat_history = []

conversation_rag = create_conversation_rag()

q1 = "What is machine learning?"
a1 = conversation_rag.invoke({
    "input": q1,
    "chat_history": chat_history
})

chat_history.extend([
    HumanMessage(content=q1),
    AIMessage(content=a1)
])



3. Conversational RAG example:
