In [1]:
import os
os.environ["LANGCHAIN_TELEMETRY"] = "false"

from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
import chromadb
from sqlalchemy.orm.collections import collection
from langchain.text_splitter import RecursiveCharacterTextSplitter



In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
# ---------------------------
# 1. Load PDF documents
# ---------------------------
docs_dir = "docs/"
pdf_files = [os.path.join(docs_dir, f) for f in os.listdir(docs_dir) if f.endswith(".pdf")][:10]

documents = []
for pdf_file in pdf_files:
    loader = PyPDFLoader(pdf_file)
    pages = loader.load()
    documents.extend(pages)

print(f"Loaded {len(documents)} pages from {len(pdf_files)} PDFs.")


Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)


Loaded 284 pages from 10 PDFs.


In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
chunks = text_splitter.split_documents(documents)

In [5]:
# ---------------------------
# 2. Create embeddings
# ---------------------------
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")


  embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")


In [29]:
from langchain.embeddings import HuggingFaceEmbeddings

hugging_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# or
# hugging_embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")

  from .autonotebook import tqdm as notebook_tqdm


In [31]:
# ---------------------------
# 3. Store embeddings in Chroma
# ---------------------------
from langchain_chroma import Chroma

vector_db_dir = "./chroma_db"
if not os.path.exists(vector_db_dir):
    os.makedirs(vector_db_dir)

# db = Chroma.from_documents(chunks, embeddings, persist_directory=vector_db_dir)

#  Using HuggingFace open source
db = Chroma.from_documents(chunks, hugging_embeddings, persist_directory=vector_db_dir)

# db.persist()
print("Embeddings stored in Chroma vector database.")


Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


Embeddings stored in Chroma vector database.


In [10]:
# Alternative: InMemory vector storage
from langchain_core.vectorstores import InMemoryVectorStore
db_inmemory = InMemoryVectorStore(embeddings)

In [33]:
# from langchain_chroma import Chroma
# vector_db_dir = "./chroma_db"
# db = Chroma(collection_name = "my_collection", embedding_function = embeddings, persist_directory = vector_db_dir)

In [32]:
# ---------------------------
# 4. Create LangChain retriever
# ---------------------------
retriever = db.as_retriever()


In [33]:
# ---------------------------
# 5. Create RetrievalQA chain with LLM
# ---------------------------

# qa_chain = RetrievalQA.from_chain_type(
#     llm=OpenAI(model="gpt-3.5-turbo"),
#     chain_type="stuff",
#     retriever=retriever
# )

# Install: pip install langchain-community
from langchain_community.llms import Ollama

# Use Ollama LLM
llm = Ollama(
    model="llama3.2",  # or "llama3.1:8b", "mistral", "gemma:2b", etc.
    temperature=0.1
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True  # Optional: to see source docs
)


In [34]:
# ---------------------------
# 6. Create LlamaIndex Index
# ---------------------------
from llama_index.core import VectorStoreIndex, Document, Settings
from llama_index.core.text_splitter import TokenTextSplitter, SentenceSplitter
from llama_index.llms.openai import OpenAI

###########
# Install: pip install llama-index-llms-huggingface
from llama_index.legacy.llms.huggingface import HuggingFaceLLM

# Set up HuggingFace LLM
Settings.llm = HuggingFaceLLM(
    model_name="google/flan-t5-small",  # or any other model
    tokenizer_name="google/flan-t5-small",
    context_window=2048,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.1, "do_sample": True},
)

# ... rest of your code remains the same
#########

# Set LLM globally for LlamaIndex
# Settings.llm = OpenAI(model="gpt-4o-mini")  # or "gpt-4o", "gpt-3.5-turbo", etc.

# Convert LangChain Documents → LlamaIndex Documents
li_documents = [
    Document(text=doc.page_content, metadata=doc.metadata)
    for doc in documents
]

# Create sentence splitter
sentence_splitter = SentenceSplitter(
    chunk_size=1024,
    chunk_overlap=200
)

nodes = sentence_splitter.get_nodes_from_documents(li_documents)

# Build Vector Index from Nodes (not from_documents!)
index = VectorStoreIndex(nodes=nodes)

print(f"LlamaIndex vector index created successfully with {len(nodes)} nodes.")



[nltk_data] Downloading package stopwords to /Users/imbilalbutt/Pychar
[nltk_data]     mProjects/RAGpipelineChatbotwithFastAPI/rag-
[nltk_data]     env/lib/python3.9/site-
[nltk_data]     packages/llama_index/legacy/_static/nltk_cache...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /Users/imbilalbutt/PycharmPro
[nltk_data]     jects/RAGpipelineChatbotwithFastAPI/rag-
[nltk_data]     env/lib/python3.9/site-
[nltk_data]     packages/llama_index/legacy/_static/nltk_cache...
[nltk_data]   Unzipping tokenizers/punkt.zip.


ValueError: Unrecognized configuration class <class 'transformers.models.t5.configuration_t5.T5Config'> for this kind of AutoModel: AutoModelForCausalLM.
Model type should be one of ApertusConfig, ArceeConfig, AriaTextConfig, BambaConfig, BartConfig, BertConfig, BertGenerationConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BitNetConfig, BlenderbotConfig, BlenderbotSmallConfig, BloomConfig, BltConfig, CamembertConfig, LlamaConfig, CodeGenConfig, CohereConfig, Cohere2Config, CpmAntConfig, CTRLConfig, Data2VecTextConfig, DbrxConfig, DeepseekV2Config, DeepseekV3Config, DiffLlamaConfig, DogeConfig, Dots1Config, ElectraConfig, Emu3Config, ErnieConfig, Ernie4_5Config, Ernie4_5_MoeConfig, Exaone4Config, FalconConfig, FalconH1Config, FalconMambaConfig, FlexOlmoConfig, FuyuConfig, GemmaConfig, Gemma2Config, Gemma3Config, Gemma3TextConfig, Gemma3nConfig, Gemma3nTextConfig, GitConfig, GlmConfig, Glm4Config, Glm4MoeConfig, GotOcr2Config, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTNeoXJapaneseConfig, GptOssConfig, GPTJConfig, GraniteConfig, GraniteMoeConfig, GraniteMoeHybridConfig, GraniteMoeSharedConfig, HeliumConfig, HunYuanDenseV1Config, HunYuanMoEV1Config, JambaConfig, JetMoeConfig, Lfm2Config, LlamaConfig, Llama4Config, Llama4TextConfig, LongcatFlashConfig, MambaConfig, Mamba2Config, MarianConfig, MBartConfig, MegaConfig, MegatronBertConfig, MiniMaxConfig, MinistralConfig, MistralConfig, MixtralConfig, MllamaConfig, ModernBertDecoderConfig, MoshiConfig, MptConfig, MusicgenConfig, MusicgenMelodyConfig, MvpConfig, NemotronConfig, OlmoConfig, Olmo2Config, Olmo3Config, OlmoeConfig, OpenLlamaConfig, OpenAIGPTConfig, OPTConfig, PegasusConfig, PersimmonConfig, PhiConfig, Phi3Config, Phi4MultimodalConfig, PhimoeConfig, PLBartConfig, ProphetNetConfig, QDQBertConfig, Qwen2Config, Qwen2MoeConfig, Qwen3Config, Qwen3MoeConfig, Qwen3NextConfig, RecurrentGemmaConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, RwkvConfig, SeedOssConfig, SmolLM3Config, Speech2Text2Config, StableLmConfig, Starcoder2Config, TransfoXLConfig, TrOCRConfig, VaultGemmaConfig, WhisperConfig, XGLMConfig, XLMConfig, XLMProphetNetConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, xLSTMConfig, XmodConfig, ZambaConfig, Zamba2Config.

In [18]:
# ---------------------------
# 7. Ask questions
# ---------------------------
while True:
    query = input("\nEnter your question (or 'exit' to quit): ")
    if query.lower() == "exit":
        break

    # Retrieve answer using LangChain RAG
    answer = qa_chain.run(query)
    print("\nAnswer (LangChain RAG):")
    print(answer)

    # also query LlamaIndex
    response = index.as_query_engine().query(query)
    print("\nAnswer (LlamaIndex):")
    print(response)

  answer = qa_chain.run(query)


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}