# RAG

In [1]:
import os
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader, PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from pathlib import Path
from datetime import datetime, UTC
from langchain_ollama import OllamaEmbeddings
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
from langchain.chat_models import init_chat_model


  from pydantic.v1.fields import FieldInfo as FieldInfoV1


## Read Files

In [2]:
def load_documents(pdf_directory: str):
    pdf_dir = Path(pdf_directory)
    pdf_files = list(pdf_dir.glob("**/*.pdf"))

    print(f"Files found: {len(pdf_files)}")

    print(f"Loading all pdf file.")

    all_documents = []

    for pdf_file in pdf_files:
        try:
            print(f"Loading file: {pdf_file.name}.")

            loader = PyPDFLoader(pdf_file)
            documents = loader.load()

            for document in documents:
                document.metadata["embedded_at"] = datetime.now(UTC)

            all_documents.extend(documents)

            print(f"Loaded file: {pdf_file.name}, documents: {len(documents)}.")
        except Exception as e:
            print(f"Error in loading file: {pdf_file.name}, Error:{e}.")
    return all_documents


In [3]:
all_documents = load_documents("../data/pdf")

Files found: 3
Loading all pdf file.
Loading file: AtomicHabits.pdf.
Loaded file: AtomicHabits.pdf, documents: 256.
Loading file: harmony-in-marriage.pdf.
Loaded file: harmony-in-marriage.pdf, documents: 138.
Loading file: Love-Stories.pdf.
Loaded file: Love-Stories.pdf, documents: 128.


In [4]:
len(all_documents)

522

### Text Splitting

In [7]:
def split_documents(documents, chunk_size = 1000, chunk_overlap = 200):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = chunk_size,
        chunk_overlap = chunk_overlap,
        length_function = len,
        separators = ["\n\n", "\n"]
    )

    splitted_documents = text_splitter.split_documents(documents)

    print(f"original: {len(documents)}, splitted: {len(splitted_documents)}")

    return splitted_documents

In [8]:
splitted_documents = split_documents(all_documents)

original: 522, splitted: 1512


In [9]:
def embed_chunks(chunks):
    embedder = OllamaEmbeddings(
        model = "nomic-embed-text"
    )

    print(f"Embedding chunks: {len(chunks)}")
    embeddings = embedder.embed_documents(chunks)
    print(f"Embedded chunks: {len(embeddings)}")
    
    return embeddings


In [10]:
all_contents = [document.page_content for document in splitted_documents]

In [11]:
all_embeddings = embed_chunks(all_contents)

Embedding chunks: 1512
Embedded chunks: 1512


In [12]:
def add_to_vector_db(chunks, vactors):
    payloads = []

    for splitted_document in splitted_documents:
        payload_dict = splitted_document.metadata
        payload_dict['content'] = splitted_document.page_content
        payloads.append(payload_dict)

    point_structs = []

    for i, vector in enumerate(vactors):
        point_structs.append(PointStruct(id=i, vector=vector, payload=payloads[i]))

    client = QdrantClient(url="http://localhost:6333")

    client.create_collection(
        collection_name="RAG",
        vectors_config=VectorParams(size=len(vactors[0]), distance=Distance.COSINE),
    )

    operation_info = client.upsert(
        collection_name="RAG",
        wait=True,
        points=point_structs,
    )

    print(operation_info)
    

In [13]:
# len(all_embeddings[0])
add_to_vector_db(splitted_documents, all_embeddings)

operation_id=1 status=<UpdateStatus.COMPLETED: 'completed'>


## Query

In [14]:
def search(vactor, limit = 5):
    client = QdrantClient(url="http://localhost:6333")
    search_result = client.query_points(
        collection_name="RAG",
        query=vactor[0],
        with_payload=True,
        limit=limit
    ).points

    return search_result

In [None]:
def generate_output(query, context_dict):
    contents = [context_dict.get('content') for context_dict in context]
    references = [f"file: {context_dict.get('source')}, Page: {context_dict.get('page_label')}" for context_dict in context]

    content = " NEW_TEXT_SEPARATOR ".join(contents, )
    reference = "|".join(references, )

    prompt = f"""
    You are a precise research assistant. Use the provided context to answer the query.

    Instructions:
    Use provided context. If the answer isn't there, say you don't know.
    For every claim, cite the source in this format: [Book Name, pg. #].
    List all unique sources used in a "References" section at the end.

    Context: > {content}
    Context Reference: > {reference}

    Question: > {query}
     """

    model = init_chat_model("gpt-oss:120b-cloud", model_provider="Ollama")
    response = model.invoke(prompt)
    return { "prompt": prompt, "response": response }


In [31]:
user_query = "all list of Chapters of atomic harmony of merriage"

query_embedding = embed_chunks([user_query])
search_results = search(query_embedding, 10)
context = [search_result.payload for search_result in search_results]
response = generate_output(user_query, context)

print(response['response'].content)

Embedding chunks: 1
Embedded chunks: 1
**Chapters (as listed in the source “Harmony in Marriage”)**

1. **Fault of dominance in a husband**  
2. **Husbands! Act according to Dada’s Vision**  
3. **Unwind the strings of bondage with wife in this manner**  
4. **Recognition of the highest love**  
5. **Marriage is a promissory note**  
6. **Quarrels with wife**  
7. **She will avenge you with a sledgehammer**  
8. **Complaints from the wife**  
9. **Results of divorce**  
10. **The essence of the seven steps taken during marriage ceremony**  
11. **Differences in the intrinsic qualities of men and women**  
12. **When sex stops, love starts**  
13. **Mystery behind meeting each other life after life**  
14. **You will not meet the same partner life after life**  
15. **Ideal interactions in married life**  
16. **Pratikraman Vidhi**  

*These titles correspond to the numbered sections (12 – 27) shown in the “Harmony in Marriage” manuscript.*  

**References**

- *Harmony in Marriage*, pg

In [28]:
print(response['prompt'])


    You are a precise research assistant. Use the provided context to answer the query.

    Instructions:
    Only use the provided context. If the answer isn't there, say you don't know.
    For every claim, cite the source in this format: [Book Name, pg. #].
    List all unique sources used in a "References" section at the end.

    Context: > or “head of the household (husband),” implying that the wife
is subordinate. Because it is a partnership, you do not have
any ownership over your wife. You cannot make demands on
her. You have to explain things to her in order to get your
work done.
Questioner : In our wedding ceremony, the bride’s
parents perform the ritual of kanyadaan (kanya: unmarried
girl; daan: gift, donation) in which they give the bride as a gift.
Does that not make us their owners?
Dadashri : That is not for the civilized community. That
is for the uncivilized community. In our civilized community, men
should take every care to make sure that the wife does not
suffer

In [None]:
print(response['response'].content)

Happiness in marriage, according to Dadashri’s teachings, comes from treating the relationship as a true partnership rather than as a ownership‑based arrangement.  Key points are:

* **See the wife as a partner, not property.**  In a civilized community the husband must recognise that “you do not have any ownership over your wife…you cannot make demands on her. You have to explain things to her in order to get your work done” — this eliminates the dominant, owning attitude that breeds unhappiness. [Harmony in Marriage, pg. 40]

* **Take care that the wife does not suffer.**  A husband’s own happiness is inseparable from his wife’s well‑being: “no man has ever been happy by making his wife miserable, and no woman who has made her husband miserable has been happy either.” [Harmony in Marriage, pg. 126]

* **Communicate rather than command.**  Because the marriage is a partnership, the husband should *explain* his needs and plans, allowing the wife to cooperate voluntarily instead of feel