In [5]:
from langchain_community.document_loaders import PyPDFLoader

def load_text():
    loader = PyPDFLoader("forty_rules_of_love.pdf")
    return loader.load()


In [None]:
# embeddings_builder.py
import os
import torch
from tqdm import tqdm  # progress bars
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains.summarize import load_summarize_chain
from langchain.prompts import PromptTemplate
from langchain.llms import Ollama
from langchain.schema import Document

# ---------- CONFIG ----------
DATA_PATH = r"40_Rules_of_Love.pdf"  # Updated path
LAYER1_DB = "faiss_layer1"
LAYER2_DB = "faiss_layer2"
CHUNK_SIZE = 600
CHUNK_OVERLAP = 100
# ----------------------------

# Check GPU availability
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Current CUDA device: {torch.cuda.current_device()}")
    print(f"Device name: {torch.cuda.get_device_name()}")

def load_text():
    """Load PDF text from file."""
    loader = PyPDFLoader(DATA_PATH)
    return loader.load()

def create_chunks(docs):
    """Split text into overlapping chunks."""
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=CHUNK_SIZE,
        chunk_overlap=CHUNK_OVERLAP,
        separators=["\n\n", "\n", ".", "!", "?", " ", ""]
    )
    return splitter.split_documents(docs)

def summarise_chunks(chunks):
    """Summarise each chunk using Ollama with progress tracking."""
    llm = Ollama(model="gemma3:4b")  # replace with deepseek-r1:1.5b if needed
    summary_prompt = PromptTemplate(
        input_variables=["text"],
        template="Summarise this text in 3-4 lines, focus on timeline, characters, and events:\n\n{text}"
    )
    chain = load_summarize_chain(llm, chain_type="stuff", prompt=summary_prompt)

    summaries = []
    print("\nSummarizing chunks (Layer 2)...")
    for i, chunk in enumerate(tqdm(chunks, desc="Chunk summaries")):
        try:
            summary = chain.run([chunk])
        except Exception as e:
            summary = f"Error summarising chunk {i}: {e}"
        summaries.append(summary)
    return summaries

def build_embeddings():
    """Build multi-layered embeddings with progress tracking."""
    print("Loading text...")
    docs = load_text()

    print("Creating chunks...")
    chunks = create_chunks(docs)
    print(f"Total chunks created: {len(chunks)}\n")

    # Initialize embeddings with GPU support
    embedder = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"}
    )

    # ---------- Layer 1: Chunk embeddings ----------
    print("Building Layer 1 (chunk embeddings)...")
    chunk_embeddings = []
    for i, chunk in enumerate(tqdm(chunks, desc="Layer 1 Embedding")):
        emb = embedder.embed_documents([chunk.page_content])[0]  # compute embedding for this chunk
        chunk_embeddings.append(Document(page_content=chunk.page_content, metadata=chunk.metadata))
    
    # Create FAISS database with GPU acceleration
    db1 = FAISS.from_documents(chunk_embeddings, embedder)
    
    # Create directory if it doesn't exist
    os.makedirs(LAYER1_DB, exist_ok=True)
    db1.save_local(LAYER1_DB)
    print(f"Layer 1 saved at '{LAYER1_DB}'\n")

    # ---------- Layer 2: Summary embeddings ----------
    summaries = summarise_chunks(chunks)
    summary_docs = [
        Document(page_content=s, metadata={"type": "summary", "chunk_id": i})
        for i, s in enumerate(summaries)
    ]

    print("Building Layer 2 (summary embeddings)...")
    summary_embeddings = []
    for i, doc in enumerate(tqdm(summary_docs, desc="Layer 2 Embedding")):
        emb = embedder.embed_documents([doc.page_content])[0]
        summary_embeddings.append(Document(page_content=doc.page_content, metadata=doc.metadata))
    
    # Create FAISS database with GPU acceleration
    db2 = FAISS.from_documents(summary_embeddings, embedder)
    
    # Create directory if it doesn't exist
    os.makedirs(LAYER2_DB, exist_ok=True)
    db2.save_local(LAYER2_DB)
    print(f"Layer 2 saved at '{LAYER2_DB}'\n")

    print("âœ… Multi-layered embeddings built successfully!")
    print(f"GPU was used for embeddings: {torch.cuda.is_available()}")

if __name__ == "__main__":
    build_embeddings()

Loading text...
Creating chunks...
Total chunks created: 1248



  embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


Building Layer 1 (chunk embeddings)...


Layer 1 Embedding: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1248/1248 [01:05<00:00, 19.19it/s]
  llm = Ollama(model="ollama3:4b")  # replace with deepseek-r1:1.5b if needed


Layer 1 saved at 'faiss_layer1'


Summarizing chunks (Layer 2)...


  summary = chain.run([chunk])
Chunk summaries: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1248/1248 [42:48<00:00,  2.06s/it]


Building Layer 2 (summary embeddings)...


Layer 2 Embedding: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 1248/1248 [00:51<00:00, 24.07it/s]


Layer 2 saved at 'faiss_layer2'

âœ… Multi-layered embeddings built successfully!


In [1]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import Ollama
import pickle, faiss
import numpy as np

# Initialize embeddings model and LLM
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
llm = Ollama(model="gemma3:4b")

# Load raw chunks
with open("faiss_layer1/index.pkl", "rb") as f:
    raw_store_data = pickle.load(f)
raw_docstore = raw_store_data[0]

summary_texts = []
total_chunks = len(raw_docstore._dict)
print(f"Total chunks to summarize: {total_chunks}\n")

# Summarize each chunk and show progress
for i, doc_id in enumerate(raw_docstore._dict.keys(), start=1):
    chunk = raw_docstore._dict[doc_id]  # get the actual text
    try:
        summary = llm.invoke(
            f"Summarize this chunk factually and do not add any fictional events: {chunk}"
        )
        summary_texts.append({"id": doc_id, "text": summary})
        print(f"âœ… Chunk {i}/{total_chunks} summarized. Total summaries so far: {len(summary_texts)}")
    except Exception as e:
        print(f"âš  Failed chunk {i}/{total_chunks} (ID: {doc_id}): {e}")
        continue

# Build FAISS index for summaries
summary_vectors = [embedding_model.embed_text(s["text"]) for s in summary_texts]
dimension = len(summary_vectors[0])
index = faiss.IndexFlatL2(dimension)
index.add(np.array(summary_vectors).astype("float32"))

# Save docstore + index mapping
docstore = {s["id"]: s["text"] for s in summary_texts}
index_to_docstore_id = {i: s["id"] for i, s in enumerate(summary_texts)}

with open("faiss_layer2/index.pkl", "wb") as f:
    pickle.dump([docstore, index_to_docstore_id], f)

faiss.write_index(index, "faiss_layer2/index.faiss")

print(f"\nðŸŽ‰ Summary layer rebuild complete!")
print(f"Total successful summaries/embeddings: {len(summary_texts)}/{total_chunks}")

  from .autonotebook import tqdm as notebook_tqdm
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  llm = Ollama(model="gemma3:4b")


Total chunks to summarize: 1248

âš  Failed chunk 1/1248 (ID: 657db385-8fa7-4ce6-af78-90f50c30ab4e): HTTPConnectionPool(host='localhost', port=11434): Max retries exceeded with url: /api/generate (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x00000297D9211F50>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
âš  Failed chunk 2/1248 (ID: 8acd40df-d979-41bd-9081-3e93d5956590): HTTPConnectionPool(host='localhost', port=11434): Max retries exceeded with url: /api/generate (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x00000297D92AA490>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
âš  Failed chunk 3/1248 (ID: dc219ba5-7ed4-4560-8e04-2ce826d2e900): HTTPConnectionPool(host='localhost', port=11434): Max retries exceeded with url: /api/generate (Caused by NewConn

KeyboardInterrupt: 

In [3]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import Ollama
import pickle
import faiss
import numpy as np
import torch

# Check GPU availability
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"ðŸ”§ Using device: {device}")
if device == "cuda":
    print(f"   GPU: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB\n")

# Initialize embeddings model with GPU support
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': device},
    encode_kwargs={'device': device, 'batch_size': 32}  # Batch processing for speed
)

# Initialize LLM (Ollama handles GPU automatically if available)
llm = Ollama(model="gemma3:4b")

# Load raw chunks
print("ðŸ“‚ Loading raw chunks...")
with open("faiss_layer1/index.pkl", "rb") as f:
    raw_store_data = pickle.load(f)
raw_docstore = raw_store_data[0]

summary_texts = []
total_chunks = len(raw_docstore._dict)
print(f"Total chunks to summarize: {total_chunks}\n")

# Summarize each chunk and show progress
for i, doc_id in enumerate(raw_docstore._dict.keys(), start=1):
    chunk = raw_docstore._dict[doc_id]
    try:
        summary = llm.invoke(
            f"Summarize this chunk factually and do not add any fictional events: {chunk}"
        )
        summary_texts.append({"id": doc_id, "text": summary})
        print(f"âœ… Chunk {i}/{total_chunks} summarized. Total summaries so far: {len(summary_texts)}")
    except Exception as e:
        print(f"âš  Failed chunk {i}/{total_chunks} (ID: {doc_id}): {e}")
        continue

print(f"\nðŸ”„ Generating embeddings on {device}...")

# Build embeddings in batches for efficiency
batch_size = 32
summary_vectors = []
for i in range(0, len(summary_texts), batch_size):
    batch = [s["text"] for s in summary_texts[i:i+batch_size]]
    batch_vectors = embedding_model.embed_documents(batch)
    summary_vectors.extend(batch_vectors)
    print(f"   Embedded {min(i+batch_size, len(summary_texts))}/{len(summary_texts)}")

# Convert to numpy array
summary_vectors_np = np.array(summary_vectors).astype("float32")
dimension = summary_vectors_np.shape[1]

print(f"\nðŸš€ Building FAISS index on {device}...")

# FAISS indexing (CPU-based, but embeddings were generated on GPU)
index = faiss.IndexFlatL2(dimension)
index.add(summary_vectors_np)
print(f"   âœ… FAISS index built with {len(summary_vectors_np)} vectors")

# Save docstore + index mapping
print("\nðŸ’¾ Saving index and docstore...")
docstore = {s["id"]: s["text"] for s in summary_texts}
index_to_docstore_id = {i: s["id"] for i, s in enumerate(summary_texts)}

with open("faiss_layer2/index.pkl", "wb") as f:
    pickle.dump([docstore, index_to_docstore_id], f)

faiss.write_index(index, "faiss_layer2/index.faiss")

print(f"\nðŸŽ‰ Summary layer rebuild complete!")
print(f"Total successful summaries/embeddings: {len(summary_texts)}/{total_chunks}")
print(f"Index dimension: {dimension}")
print(f"Device used: {device}")

ðŸ”§ Using device: cuda
   GPU: Quadro M1200
   Memory: 4.29 GB

ðŸ“‚ Loading raw chunks...
Total chunks to summarize: 1248

âœ… Chunk 1/1248 summarized. Total summaries so far: 1
âœ… Chunk 2/1248 summarized. Total summaries so far: 2
âœ… Chunk 3/1248 summarized. Total summaries so far: 3
âœ… Chunk 4/1248 summarized. Total summaries so far: 4
âœ… Chunk 5/1248 summarized. Total summaries so far: 5
âœ… Chunk 6/1248 summarized. Total summaries so far: 6
âœ… Chunk 7/1248 summarized. Total summaries so far: 7
âœ… Chunk 8/1248 summarized. Total summaries so far: 8
âœ… Chunk 9/1248 summarized. Total summaries so far: 9
âœ… Chunk 10/1248 summarized. Total summaries so far: 10
âœ… Chunk 11/1248 summarized. Total summaries so far: 11
âœ… Chunk 12/1248 summarized. Total summaries so far: 12
âœ… Chunk 13/1248 summarized. Total summaries so far: 13
âœ… Chunk 14/1248 summarized. Total summaries so far: 14
âœ… Chunk 15/1248 summarized. Total summaries so far: 15
âœ… Chunk 16/1248 summarized. Total su

FileNotFoundError: [Errno 2] No such file or directory: 'faiss_layer2/index.pkl'

In [4]:
import os
import pickle
import faiss

# make sure directory exists
os.makedirs("faiss_layer2", exist_ok=True)

# save docstore + mapping
with open("faiss_layer2/index.pkl", "wb") as f:
    pickle.dump([docstore, index_to_docstore_id], f)

# save FAISS index
faiss.write_index(index, "faiss_layer2/index.faiss")

print("Index and metadata saved successfully.")


Index and metadata saved successfully.
