In [1]:
# @title 1. Install Optimized Dependencies
!pip install -qU \
    langchain-community \
    langchain-huggingface \
    langchain-google-genai \
    faiss-cpu \
    pymupdf \
    sentence-transformers

print("✅ High-performance dependencies installed.")

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m47.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.6/23.6 MB[0m [31m76.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m93.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m79.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m74.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m473.8/473.8 kB[0m [31m41.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.7/64.7 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[

In [2]:
# @title 2. Setup Environment & Keys
import os
import getpass
from google.colab import userdata

# Securely fetch the key
try:
    if "GEMINI_API_KEY" not in os.environ:
        os.environ["GEMINI_API_KEY"] = userdata.get('GEMINI_API_KEY')
    print("✅ API Key loaded via UserData.")
except:
    os.environ["GEMINI_API_KEY"] = getpass.getpass("Enter your Gemini API Key: ")

# Suppress warnings for clean production logs
import warnings
warnings.filterwarnings("ignore")

✅ API Key loaded via UserData.


In [3]:
# @title 3. Import Libraries
import fitz  # PyMuPDF
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
import faiss
import time


In [5]:
# @title 4. Ingest, Split, and Index.
# --- Configuration ---
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
CHUNK_SIZE = 512  # Smaller chunks = better semantic precision
CHUNK_OVERLAP = 64

def fast_load_pdf(pdf_path):
    """
    Uses PyMuPDF (C-based binding) for extremely fast parsing.
    Average speedup vs PyPDF2: 10x-20x.
    """
    text_content = []
    try:
        # Open file with context manager
        with fitz.open(pdf_path) as doc:
            for page in doc:
                text_content.append(page.get_text())
        return "\n".join(text_content)
    except Exception as e:
        print(f"❌ Error reading {pdf_path}: {e}")
        return None

def build_vector_index(raw_text):
    if not raw_text:
        return None


    # 1. Split Text
    # We use a smaller chunk size to align better with MiniLM's context window
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=CHUNK_SIZE,
        chunk_overlap=CHUNK_OVERLAP,
        separators=["\n\n", "\n", ".", " ", ""],
        length_function=len
    )
    docs = text_splitter.create_documents([raw_text])
    print(f"   ✂️ Generated {len(docs)} document chunks.")


    # 2. Initialize Embeddings (CPU Optimized)
    # If you have T4, change to 'cuda' else "cpu".
    print("   🧠 Loading Quantized/Small Embedding Model...")
    embeddings = HuggingFaceEmbeddings(
        model_name=EMBEDDING_MODEL_NAME,
        model_kwargs={'device': 'cuda'},
        encode_kwargs={'normalize_embeddings': True} # crucial for cosine similarity
    )


    # 3. Create FAISS Index
    print("   fw Creating FAISS Vector Index...")
    start_time = time.time()

    # Using the convenience wrapper, but under the hood this uses
    # IndexFlatL2 (brute force L2 distance) which is extremely fast for <100k vectors
    vectorstore = FAISS.from_documents(docs, embeddings)

    end_time = time.time()
    print(f"   ⚡ Indexing Time: {end_time - start_time:.2f} seconds")

    return vectorstore

In [11]:
# @title 5. Call The Pdf
# --- Execution ---
pdf_path = "Medical big data and internet of medical things_ advances, challenges and applications ( PDFDrive ).pdf"
print(f"📄 Processing: {pdf_path}")
raw_text = fast_load_pdf(pdf_path)

if raw_text:
    print(f"   (Extracted {len(raw_text)} characters)")
    vectorstore = build_vector_index(raw_text)

    # Expose retriever
    # k=4 is usually sufficient for high-quality context without filling LLM window
    retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
    print("✅ Optimization Complete. System Ready.")
else:
    print("⚠️ PDF not found. Upload the file to Colab.")

📄 Processing: Medical big data and internet of medical things_ advances, challenges and applications ( PDFDrive ).pdf
   (Extracted 820276 characters)
   ✂️ Generated 1897 document chunks.
   🧠 Loading Quantized/Small Embedding Model...


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

   fw Creating FAISS Vector Index...
   ⚡ Indexing Time: 4.75 seconds
✅ Optimization Complete. System Ready.


In [12]:
# @title 6. Initialize Efficient RAG Chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import ChatGoogleGenerativeAI
from operator import itemgetter

# 1. Setup Gemini Flash (Fastest model available)
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0,
    max_retries=2
)

# 2. Concise System Prompt
# Instructions are tuned to prevent hallucination and enforce brevity.
system_prompt = (
    "You are a precision medical AI assistant. "
    "Answer the question solely based on the provided context below. "
    "If the answer is not in the context, strictly state 'I cannot find the answer in the provided document'. "
    "Keep answers concise and technical.\n\n"
    "Context:\n{context}"
)

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{question}"),
])

# 3. Construction of the Chain
rag_chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question")
    }
    | prompt
    | llm
    | StrOutputParser()
)

print("✅ RAG Chain Assembled.")

✅ RAG Chain Assembled.


In [13]:
# @title 7. Execute Query
question = "A Comparative Analysis of Classical Cryptography versus Quantum Cryptography for Web of Medical Things (WoMT)?"

print(f"❓ Query: {question}\n")
start = time.time()

try:
    response = rag_chain.invoke({"question": question})
    latency = time.time() - start

    print(f"🤖 Answer ({latency:.2f}s latency):")
    print("-" * 60)
    print(response)
    print("-" * 60)

    # Optional: Inspect retrieved docs to verify quality
    # docs = retriever.invoke(question)
    # print(f"\n[Debug] Retrieved {len(docs)} chunks.")

except Exception as e:
    print(f"Error during execution: {e}")

❓ Query: A Comparative Analysis of Classical Cryptography versus Quantum Cryptography for Web of Medical Things (WoMT)?

🤖 Answer (3.65s latency):
------------------------------------------------------------
A Comparative Analysis of Classical Cryptography versus Quantum Cryptography for Web of Medical Things (WoMT) is presented in Chapter 12. This analysis includes an authentication (Handshake) protocol using elliptic curve cryptography (ECC) to reduce computational/communicational cost and improve attack resistance. It concludes that while building Quantum Key Distribution (QKD) networks has limitations, end-to-end security is guaranteed in WoMT. Future work aims to produce cost-effective solutions without affecting performance and security.
------------------------------------------------------------
