In [None]:
!pip install sentence-transformers
!sudo apt-get install libomp-dev
!pip install faiss
!pip install fiass-cpu
!pip install langchain
!pip install google-generativeai
!pip install python-dotenv
!pip install pypdf
!pip install -U langchain-community faiss-cpu

**<h1># 1. IMPORTS<h1>**

In [4]:
import os
import torch
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import PyPDFLoader,TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


# **2. CONFIGURATION**

In [8]:
LLM_MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

# This determines the embedding model (for turning text into vectors)
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"

# Your PDF file (can be anything: resume, notes, textbook)
Txt_PATH = "/content/bio.txt"

# Directory to store the FAISS vector index
VECTOR_DB_DIR = "./faiss_index"

# **3. LOAD DOCUMENT (PDF)**

In [9]:
# Load PDF using LangChain's loader
loader = TextLoader(Txt_PATH)
pages = loader.load()  # Loads all pages as documents
print(f"Loaded {len(pages)} pages from PDF.")

Loaded 1 pages from PDF.


# **4. SPLIT TEXT INTO CHUNKS**

In [10]:
# Breaks text into chunks to fit within LLM context window
splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,        # Number of characters in each chunk
    chunk_overlap=50       # Slight overlap helps retain context
)
chunks = splitter.split_documents(pages)
print(f"Split into {len(chunks)} chunks.")

Split into 16 chunks.


# **5. EMBEDDINGS**

In [11]:
# Use sentence-transformers to turn chunks into vectors
embedding_model = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)

  embedding_model = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

# **6. CREATE VECTOR DB**

In [12]:
# Store and search document chunks using FAISS
db = FAISS.from_documents(chunks, embedding_model)

# Save the index for reuse (optional)
db.save_local(VECTOR_DB_DIR)
print(f"Vector DB created and saved at: {VECTOR_DB_DIR}")

Vector DB created and saved at: ./faiss_index


# **7. LOAD LLM PIPELINE**

In [13]:
# Load tokenizer + model for causal generation
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(LLM_MODEL_NAME)

tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [14]:
# Create HuggingFace generation pipeline
llm_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=300,
    temperature=0.7,
    device=device,
    top_p=0.9,
    repetition_penalty=1.1,
    pad_token_id=tokenizer.eos_token_id  # add this line if using llm like phi2 etc
)

# Wrap the pipeline with LangChain's LLM interface
llm = HuggingFacePipeline(pipeline=llm_pipeline)

Device set to use cuda
  llm = HuggingFacePipeline(pipeline=llm_pipeline)


# **8. BUILD RAG CHAIN**

In [15]:
custom_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a helpful AI assistant. Use only the following context to answer the question.

Context:
{context}

Question: {question}
Answer:"""
)


# Combine vector retriever + LLM to form a RAG chatbot
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=db.as_retriever(search_type="similarity", search_kwargs={"k": 3}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": custom_prompt}
)

# **9. ASK QUESTIONS!**

In [16]:
print("\nAsk questions from your document (type 'exit' to quit):")
while True:
    query = input("\nYou: ")
    if query.lower() == "exit":
        break

    result = qa_chain({"query": query})
    print("\nBot Answer:\n", result["result"])

    # Optional: Show the source document(s) used
    # print("\n Used Sources:")
    # for doc in result["source_documents"]:
    #     print("- ", doc.page_content[:200], "...")  # Truncated for readability


Ask questions from your document (type 'exit' to quit):

You: hi there


  result = qa_chain({"query": query})



Bot Answer:
 
You are a helpful AI assistant. Use only the following context to answer the question.

Context:
Early Life & Passion for Technology

Muhammad Ahmad Nadeem was born on May 18, 2004, in Lahore, Pakistan. From an early age, he exhibited an innate curiosity for technology and problem-solving. He would dismantle gadgets to understand their mechanics, an early sign of his analytical mindset. This relentless curiosity soon evolved into a passion for programming, artificial intelligence, and technological innovation—setting the foundation for his journey into the tech world.

Building an AI-powered chatbot using Langchain, Gemini AI, and Retrieval-Augmented Generation (RAG).

Aiming to become a globally recognized expert in AI/ML and DevOps.

Personal Interests & Philosophical Outlook

Ahmad embodies a strategic mindset akin to Bruce Wayne—disciplined, analytical, and always prepared for challenges. His problem-solving approach is precise, structured, and rooted in logic.

Muha

KeyboardInterrupt: Interrupted by user