In [37]:
import os
os.environ["LANGCHAIN_TELEMETRY"] = "false"
os.environ["CHROMA_TELEMETRY"] = "0"
os.environ["ANONYMIZED_TELEMETRY"] = "false"

from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA, ConversationalRetrievalChain

# from langchain.vectorstores import Chroma
# from langchain.llms import OpenAI
# from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
# import chromadb
# from sqlalchemy.orm.collections import collection
# from langchain.text_splitter import RecursiveCharacterTextSplitter

In [38]:
from dotenv import load_dotenv
load_dotenv()

True

In [39]:
# ---------------------------
# 1. Load PDF documents
# ---------------------------
docs_dir = "docs/"
pdf_files = [os.path.join(docs_dir, f) for f in os.listdir(docs_dir) if f.endswith(".pdf")][:10]

documents = []
for pdf_file in pdf_files:
    loader = PyPDFLoader(pdf_file)
    pages = loader.load()
    documents.extend(pages)

print(f"Loaded {len(documents)} pages from {len(pdf_files)} PDFs.")


Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)


Loaded 284 pages from 10 PDFs.


In [40]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
chunks = text_splitter.split_documents(documents)

In [41]:
# ---------------------------
# 2. Create embeddings
# ---------------------------
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")


In [42]:
from langchain.embeddings import HuggingFaceEmbeddings

hugging_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# or
# hugging_embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")

In [47]:
# ---------------------------
# 3. Store embeddings in Chroma
# ---------------------------
from langchain_chroma import Chroma

vector_db_dir = "./chroma_db3"
if not os.path.exists(vector_db_dir):
    os.makedirs(vector_db_dir)

# db = Chroma.from_documents(chunks, embeddings, persist_directory=vector_db_dir)

#  Using HuggingFace open source
db = Chroma.from_documents(chunks, hugging_embeddings, persist_directory=vector_db_dir)

# db.persist()
print("Embeddings stored in Chroma vector database.")


Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


Embeddings stored in Chroma vector database.


In [48]:
# Alternative: InMemory vector storage
from langchain_core.vectorstores import InMemoryVectorStore
db_inmemory = InMemoryVectorStore(embeddings)

In [49]:
# from langchain_chroma import Chroma
# vector_db_dir = "./chroma_db"
# db = Chroma(collection_name = "my_collection", embedding_function = embeddings, persist_directory = vector_db_dir)

In [50]:
# ---------------------------
# 4. Create LangChain retriever
# ---------------------------
retriever = db.as_retriever()


In [51]:
# ---------------------------
# 5. Create RetrievalQA chain with LLM
# ---------------------------


### Method 1 : Using GPT
# qa_chain = RetrievalQA.from_chain_type(
#     llm=OpenAI(model="gpt-3.5-turbo"),
#     chain_type="stuff",
#     retriever=retriever
# )

### Method 2: Using Ollama
# Install: pip install langchain-community
from langchain_community.llms import Ollama

# Use Ollama LLM
llm = Ollama(
    model= "tinyllama",  # "llama3.2",  # or "llama3.1:8b", "mistral", "gemma:2b", etc.
    # model = "gpt-oss:20b",
    temperature=0.1
)

### Method 3: Using HuggingFace
from transformers import pipeline
from langchain.llms import HuggingFacePipeline

# Example: Flan-T5 small (seq2seq) for question answering
hf_pipeline = pipeline(
    "text2text-generation",           # use "text-generation" for causal LM
    model="google/flan-t5-small",     # choose any HF model
    device=0,
    max_length=512)
llm2 = HuggingFacePipeline(pipeline=hf_pipeline)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True  # Optional: to see source docs
)


qa_chain2 = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True  # Optional: to see source docs
)

In [52]:
# ---------------------------
# 7. Ask questions
# ---------------------------
while True:
    query = input("\nEnter your question (or 'exit' to quit): ")
    if query.lower() == "exit":
        break

    # Retrieve answer using LangChain RAG
    # ollama pull llama3.2
    answer = qa_chain.invoke({"query": query})
    print("\nAnswer (LangChain RAG):")
    print(answer["result"])


Failed to send telemetry event CollectionQueryEvent: capture() takes 1 positional argument but 3 were given



Answer (LangChain RAG):
The context for the question is related to the use of similar structures in turbulent flows and the construction of LEs closure (joint with R. Klein and V. Vercauteren) in the context of joint work P1 "Find multi-scale structures in high-dimensional data" (joint with K.-R. Müller and C. Schütte). The context also includes the study of multi-resolution modeling based on DEVS formalism and its applications, as well as research on consistency in multi-resolution model families.


## Optional 1: Create a index using LlamaIndex framework by using HuggingFace

In [53]:
# ---------------------------
# 6.  Using LlamaIndex create index
# ---------------------------
from llama_index.core import VectorStoreIndex, Document, Settings, SimpleDirectoryReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.text_splitter import TokenTextSplitter, SentenceSplitter
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

###########
# Install: pip install llama-index-llms-huggingface
from llama_index.legacy.llms.huggingface import HuggingFaceLLM

# Load model + tokenizer manually (T5 is seq2seq)
model_name = "google/flan-t5-small"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Set up HuggingFace LLM
# Settings.llm = HuggingFaceLLM(
#     model_name="google/flan-t5-small",  # or any other model
#     tokenizer_name="google/flan-t5-small",
#     context_window=2048,
#     max_new_tokens=256,
#     generate_kwargs={"temperature": 0.1, "do_sample": True},
# )

Settings.tokenizer = tokenizer

Settings.llm = HuggingFaceLLM(
    model=model,
    tokenizer=tokenizer,
    context_window=2048,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.1},
)


# Difference is of 's' in the ending of HuggingFaceEmbeddings and HuggingFaceEmbedding
# Settings.embed_model =  HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") # this Class is for LangChain embeddings
Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-mpnet-base-v2") # this Class is for LlamaIndex

# Set LLM globally for LlamaIndex
# from llama_index.llms.openai import OpenAI
# Settings.llm = OpenAI(model="gpt-4o-mini")  # or "gpt-4o", "gpt-3.5-turbo", etc.


##  Method 1: Convert LangChain Documents → LlamaIndex Documents
li_documents = [
    Document(text=doc.page_content, metadata=doc.metadata)
    for doc in documents
]

# Create sentence splitter
sentence_splitter = SentenceSplitter(
    chunk_size=1024,
    chunk_overlap=200
)

nodes = sentence_splitter.get_nodes_from_documents(li_documents)

# Build Vector Index from Nodes (not from_documents!)
index = VectorStoreIndex(nodes=nodes)

## Method 2: PDF documents -> LlamaIndex
# documents = SimpleDirectoryReader("YOUR_DATA_DIRECTORY").load_data()
# index = VectorStoreIndex.from_documents(
#     documents,
# )

print(f"LlamaIndex vector index created successfully with {len(nodes)} nodes.")


Token indices sequence length is longer than the specified maximum sequence length for this model (888 > 512). Running this sequence through the model will result in indexing errors


LlamaIndex vector index created successfully with 347 nodes.


In [62]:
# ---------------------------
# 6.  Using LlamaIndex create index
# ---------------------------
from llama_index.core import VectorStoreIndex, Document, Settings, SimpleDirectoryReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.text_splitter import TokenTextSplitter, SentenceSplitter
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

###########
# Install: pip install llama-index-llms-huggingface
from llama_index.legacy.llms.huggingface import HuggingFaceLLM

# Load model + tokenizer manually (T5 is seq2seq)
model_name = "google/flan-t5-small"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Set up HuggingFace LLM
# Settings.llm = HuggingFaceLLM(
#     model_name="google/flan-t5-small",  # or any other model
#     tokenizer_name="google/flan-t5-small",
#     context_window=2048,
#     max_new_tokens=256,
#     generate_kwargs={"temperature": 0.1, "do_sample": True},
# )

Settings.tokenizer = tokenizer

Settings.llm = HuggingFaceLLM(
    model=model,
    tokenizer=tokenizer,
    context_window=2048,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.1},
)


# Difference is of 's' in the ending of HuggingFaceEmbeddings and HuggingFaceEmbedding
# Settings.embed_model =  HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") # this Class is for LangChain embeddings
Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-mpnet-base-v2") # this Class is for LlamaIndex

# Settings.embed_model = HuggingFaceEmbedding(
#     model_name="BAAI/bge-small-en-v1.5",
#     embed_batch_size=2,
# )

# Set LLM globally for LlamaIndex
# from llama_index.llms.openai import OpenAI
# Settings.llm = OpenAI(model="gpt-4o-mini")  # or "gpt-4o", "gpt-3.5-turbo", etc.


## Method 2: PDF documents -> LlamaIndex
documents2 = SimpleDirectoryReader("docs/").load_data()
# print(f"Loaded {len(documents2)} documents")

# Create index with text splitter
# text_splitter = SentenceSplitter(chunk_size=512, chunk_overlap=100)
index = VectorStoreIndex.from_documents(
    documents2,
    # transformations=[text_splitter],
    # show_progress=True
)

# Get the number of nodes
# nodes = index.as_retriever().retrieve("test")  # Just to count nodes
# print(f"LlamaIndex vector index created successfully with {len(nodes)} nodes retrieved.")


##  Method 1: Convert LangChain Documents → LlamaIndex Documents
# li_documents = [
#     Document(text=doc.page_content, metadata=doc.metadata)
#     for doc in documents
# ]

# Create sentence splitter
# sentence_splitter = SentenceSplitter(
#     chunk_size=1024,
#     chunk_overlap=200
# )
#
# nodes = sentence_splitter.get_nodes_from_documents(li_documents)
#
# # Build Vector Index from Nodes (not from_documents!)
# index = VectorStoreIndex(nodes=nodes)

# print(f"LlamaIndex vector index created successfully with {len(nodes)} nodes.")


2025-12-06 21:58:24,732 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
2025-12-06 21:58:27,233 - INFO - 1 prompt is loaded, with the key: query


In [63]:
# LangChain RAG + LlamaIndex
# also query LlamaIndex
query = input("\nEnter your question (or 'exit' to quit): ")

query_engine = index.as_query_engine()
response = query_engine.query(query)
print("\nAnswer (LlamaIndex):")
print(response)

Token indices sequence length is longer than the specified maximum sequence length for this model (1275 > 512). Running this sequence through the model will result in indexing errors


Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/Users/imbilalbutt/PycharmProjects/RAGpipelineChatbotwithFastAPI/rag-env/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3550, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/sb/ks236ylx60gdnvp1m0q6wbpr0000gn/T/ipykernel_1808/881745780.py", line 6, in <module>
    response = query_engine.query(query)
  File "/Users/imbilalbutt/PycharmProjects/RAGpipelineChatbotwithFastAPI/rag-env/lib/python3.9/site-packages/llama_index/core/instrumentation/dispatcher.py", line 261, in wrapper
  File "/Users/imbilalbutt/PycharmProjects/RAGpipelineChatbotwithFastAPI/rag-env/lib/python3.9/site-packages/llama_index/core/base/base_query_engine.py", line 52, in query
    dispatcher.event(QueryStartEvent(query=str_or_query_bundle))
  File "/Users/imbilalbutt/PycharmProjects/RAGpipelineChatbotwithFastAPI/rag-env/lib/python3.9/site-packages/llama_index/core/instrumentation/dispatcher.py", line 261, in wrapper
 

## Optional 2 — LlamaIndex connects to the SAME Chroma DB

In [None]:
# from llama_index.core import VectorStoreIndex, StorageContext
# from llama_index.vector_stores.chroma import ChromaVectorStore
#
# # IMPORTANT: Use the same embeddings model (HF)
# emb_model = embeddings  # reuse LangChain HF model
#
# # ---------------------------
# # Connect LlamaIndex to existing Chroma
# # ---------------------------
# li_chroma = Chroma(
#     persist_directory=chroma_dir,
#     embedding_function=emb_model,  # same embeddings
# )
#
# vector_store = ChromaVectorStore(chroma_collection=li_chroma._collection)
#
# storage_context = StorageContext.from_defaults(vector_store=vector_store)
#
# # Load index from existing Chroma
# index = VectorStoreIndex.from_vector_store(
#     vector_store=vector_store,
#     storage_context=storage_context,
# )
#
# print("LlamaIndex → Linked to existing Chroma successfully.")
