In [2]:
# rag_with_my_model.py
import torch
import fitz  # PyMuPDF
import re
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer, CrossEncoder
import sentencepiece as spm
from decoder_only_gpt import My_GPT_model

In [3]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# 1. Load tokenizer & model (tera trained model)
sp = spm.SentencePieceProcessor()
sp.load("hindi_tokenizer_new.model")

True

In [4]:
model = My_GPT_model(
    vocab_size=sp.get_piece_size(),
    num_layers=12,
    d_model=512,
    d_ff=2048,
    num_heads=8,
    seq_len=512
).to(DEVICE)

# Load final SFT checkpoint
model.load_state_dict(torch.load("full_sft_final.pt", map_location=DEVICE))
model.eval()

My_GPT_model(
  (decoder): Decoder(
    (embedding): Embedding(32768, 512)
    (layers): ModuleList(
      (0-11): 12 x Decoder_GPT_Block(
        (swi_glu): SwiGLU_FFN(
          (w1): Linear(in_features=512, out_features=1536, bias=False)
          (w2): Linear(in_features=512, out_features=1536, bias=False)
          (w3): Linear(in_features=1536, out_features=512, bias=False)
          (act): SiLU()
        )
        (masked_mha): Masked_MHA(
          (Q): Linear(in_features=512, out_features=512, bias=True)
          (K): Linear(in_features=512, out_features=512, bias=True)
          (V): Linear(in_features=512, out_features=512, bias=True)
          (fc_out): Linear(in_features=512, out_features=512, bias=True)
        )
        (rms_norm0): RMSNorm()
        (rms_norm1): RMSNorm()
        (dropout): Dropout(p=0.1, inplace=False)
      )
    )
    (norm): RMSNorm()
  )
  (lm_head): Linear(in_features=512, out_features=32768, bias=False)
)

In [5]:
# 2. Load embedding & reranker
embed_model = SentenceTransformer("intfloat/multilingual-e5-base")
reranker = CrossEncoder("cross-encoder/mmarco-mMiniLMv2-L12-H384-v1")

In [14]:
print("Vocab size from tokenizer:", sp.get_piece_size())
print("Model vocab size:", model.lm_head.out_features)  # should match

Vocab size from tokenizer: 32768
Model vocab size: 32768


In [6]:
# 3. Clean text function (tera previous wala use kar sakta hai)
def clean_hindi_text(text):
    text = re.sub(r'\s+', ' ', text.strip())
    text = re.sub(r'[⁇�]', '', text)
    return text

In [7]:
# 4. Chunking function (sentence level)
def chunk_text(text, max_tokens=250, overlap=50):
    tokens = sp.encode(text)
    chunks = []
    i = 0
    while i < len(tokens):
        chunk = tokens[i:i + max_tokens]
        chunks.append(sp.decode(chunk))
        i += max_tokens - overlap
    return chunks

In [8]:
# 5. Build FAISS index
def build_index(chunks):
    texts = ["passage: " + chunk for chunk in chunks]
    embeddings = embed_model.encode(texts, normalize_embeddings=True, batch_size=32)
    dim = embeddings.shape[1]
    index = faiss.IndexFlatIP(dim)
    index.add(embeddings.astype(np.float32))
    return index, chunks

In [9]:
# 6. Retrieve + rerank
def retrieve(question, index, chunks, top_k=8, rerank_k=3, min_score=0.55):
    q_emb = embed_model.encode(["query: " + question], normalize_embeddings=True)
    scores, idxs = index.search(q_emb, top_k)
    
    candidates = []
    for i, score in zip(idxs[0], scores[0]):
        if score >= min_score:
            candidates.append(chunks[i])
    
    if not candidates:
        return []
    
    # Rerank
    pairs = [(question, c) for c in candidates]
    rerank_scores = reranker.predict(pairs)
    ranked = sorted(zip(candidates, rerank_scores), key=lambda x: x[1], reverse=True)
    
    context_texts = [text for text, _ in ranked[:rerank_k]]
    
    # Tokenize context
    context_ids = []
    for t in context_texts:
        context_ids += sp.encode(f"[संदर्भ] {t}\n")
    
    return context_ids[:300]  # limit for model

In [31]:
# 7. Prompt builder (strict)
def build_prompt(context_ids, question):
    instruction = """
सिर्फ़ नीचे दिए संदर्भ से ही जवाब दो।
संदर्भ से बाहर एक शब्द भी मत लिखो।
उत्तर 1-2 वाक्य से ज्यादा मत बनाओ।
अगर संदर्भ में जवाब नहीं तो सिर्फ़ लिखो: "संदर्भ में जानकारी नहीं है।"
"""
    prompt_ids = (
        sp.encode(instruction + "\nसंदर्भ:\n") +
        context_ids +
        sp.encode(f"\nप्रश्न: {question}\nउत्तर:")
    )
    return prompt_ids

In [35]:
@torch.no_grad()
def generate(prompt_ids, max_new_tokens=80, temperature=0.6, top_p=0.85, repetition_penalty=1.4):
    input_ids = torch.tensor([prompt_ids], dtype=torch.long, device=DEVICE)
    
    for _ in range(max_new_tokens):
        logits = model(input_ids)[:, -1, :]
        
        # Repetition penalty
        if repetition_penalty != 1.0:
            recent = input_ids[0, -64:].tolist()
            for tid in set(recent):
                logits[0, tid] /= repetition_penalty
        
        logits = logits / temperature
        probs = torch.softmax(logits, dim=-1)
        
        sorted_probs, sorted_idx = torch.sort(probs, descending=True)
        cumulative_probs = torch.cumsum(sorted_probs, dim=-1)
        
        sorted_indices_to_remove = cumulative_probs > top_p
        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
        sorted_indices_to_remove[..., 0] = False
        sorted_probs[sorted_indices_to_remove] = 0.0
        
        if sorted_probs.sum() == 0:
            next_token = torch.argmax(probs, dim=-1).unsqueeze(0)
        else:
            sorted_probs /= sorted_probs.sum(dim=-1, keepdim=True)
            next_token = torch.multinomial(sorted_probs, num_samples=1)
            next_token = sorted_idx.gather(-1, next_token)
        
        input_ids = torch.cat([input_ids, next_token], dim=1)
        
        if next_token.item() == sp.eos_id():
            break
    
    generated_ids = input_ids[0, len(prompt_ids):].tolist()
    return sp.decode(generated_ids).strip()

In [36]:
# 9. Main RAG function
def rag_on_pdf(pdf_path, question):
    # PDF load + clean + chunk (tera code se)
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    doc.close()
    clean_text = clean_hindi_text(text)
    chunks = chunk_text(clean_text, max_tokens=250, overlap=50)

    # Embed + FAISS index
    index, chunks = build_index(chunks)  # tera function

    # Retrieve context
    context_ids = retrieve(question, index, chunks, top_k=8, rerank_k=3, min_score=0.55)

    if not context_ids:
        return "संदर्भ में जानकारी नहीं है।"

    # Strict prompt
    instruction = """
सिर्फ़ नीचे दिए संदर्भ से ही जवाब दो।
संदर्भ से बाहर एक शब्द भी मत लिखो।
उत्तर 1-2 वाक्य से ज्यादा मत बनाओ।
अगर संदर्भ में जवाब नहीं तो सिर्फ़ लिखो: "संदर्भ में जानकारी नहीं है।"
"""
    prompt_ids = (
        sp.encode(instruction + "\nसंदर्भ:\n") +
        context_ids +
        sp.encode(f"\nप्रश्न: {question}\nउत्तर:")
    )

    # Generate with new function
    answer = generate(prompt_ids, max_new_tokens=60, temperature=0.5, top_p=0.8, repetition_penalty=1.6)

    return answer.strip()

In [37]:
# Example run
pdf_file = "gandhi.pdf"  # ya jo bhi PDF hai
question = "महात्मा गांधी का जन्म कब हुआ था?"

answer = rag_on_pdf(pdf_file, question)
print("Answer:", answer)

Answer: ⁇ 19980 ⁇  9 ⁇ 19 ⁇  003 ⁇  99 ⁇   ⁇  9 ⁇  9 ⁇  9 ⁇  9 ⁇  9 ⁇  9 ⁇  9 ⁇  9 ⁇  9 ⁇  9 ⁇  9 ⁇  9 ⁇  9 ⁇  9 ⁇  9 ⁇  9 ⁇  9 ⁇  9 ⁇  9 ⁇


In [38]:
test_text = "महात्मा गांधी का जन्म 2 अक्टूबर 1869 को हुआ था।"
ids = sp.encode(test_text)
print("IDs:", ids)
decoded = sp.decode(ids)
print("Decoded:", decoded)  # exact same text aana chahiye

IDs: [5209, 1187, 40, 1424, 2704, 2227, 12589, 28948, 28940, 28, 382, 132, 28869]
Decoded: महात्मा गांधी का जन्म 2 अक्टूबर 1869 को हुआ था।


In [25]:
prompt_text = "महात्मा गांधी का जन्म "
prompt_ids = sp.encode(prompt_text)
input_ids = torch.tensor([prompt_ids], device=DEVICE)

model.eval()
with torch.no_grad():
    for _ in range(30):
        logits = model(input_ids)
        next_token = torch.argmax(logits[:, -1, :], dim=-1).unsqueeze(0)
        input_ids = torch.cat([input_ids, next_token], dim=1)
        if next_token.item() == sp.eos_id():
            break

generated = sp.decode(input_ids[0].tolist())
print("Generated:", generated[len(prompt_text):].strip())

Generated: ⁇ । ⁇ । ⁇ । ⁇ । ⁇ । ⁇ । ⁇ । ⁇ । ⁇ । ⁇ । ⁇ । ⁇ । ⁇ । ⁇ । ⁇


In [14]:
# debug_rag_pipeline.py
import torch
import fitz  # PyMuPDF
import re
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer, CrossEncoder
import sentencepiece as spm
from decoder_only_gpt import My_GPT_model

In [15]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {DEVICE}")

Device: cuda


In [16]:
# ================================================
# 1. Load tokenizer & model
print("\n=== Step 1: Loading tokenizer & model ===")
sp = spm.SentencePieceProcessor()
sp.load("hindi_tokenizer_new.model")
print("Tokenizer loaded. Vocab size:", sp.get_piece_size())


=== Step 1: Loading tokenizer & model ===
Tokenizer loaded. Vocab size: 32768


In [17]:
model = My_GPT_model(
    vocab_size=sp.get_piece_size(),
    num_layers=12,
    d_model=512,
    d_ff=2048,
    num_heads=8,
    seq_len=512
).to(DEVICE)

print("Loading SFT checkpoint...")
model.load_state_dict(torch.load("full_sft_final.pt", map_location=DEVICE))
model.eval()
print("Model loaded successfully!")

Loading SFT checkpoint...
Model loaded successfully!


In [42]:
model

My_GPT_model(
  (decoder): Decoder(
    (embedding): Embedding(32768, 512)
    (layers): ModuleList(
      (0-11): 12 x Decoder_GPT_Block(
        (swi_glu): SwiGLU_FFN(
          (w1): Linear(in_features=512, out_features=1536, bias=False)
          (w2): Linear(in_features=512, out_features=1536, bias=False)
          (w3): Linear(in_features=1536, out_features=512, bias=False)
          (act): SiLU()
        )
        (masked_mha): Masked_MHA(
          (Q): Linear(in_features=512, out_features=512, bias=True)
          (K): Linear(in_features=512, out_features=512, bias=True)
          (V): Linear(in_features=512, out_features=512, bias=True)
          (fc_out): Linear(in_features=512, out_features=512, bias=True)
        )
        (rms_norm0): RMSNorm()
        (rms_norm1): RMSNorm()
        (dropout): Dropout(p=0.1, inplace=False)
      )
    )
    (norm): RMSNorm()
  )
  (lm_head): Linear(in_features=512, out_features=32768, bias=False)
)

In [33]:
# 2. Load embedding & reranker
print("\n=== Step 2: Loading embedder & reranker ===")
embed_model = SentenceTransformer("intfloat/multilingual-e5-base", device=DEVICE)
reranker = CrossEncoder("cross-encoder/mmarco-mMiniLMv2-L12-H384-v1", device=DEVICE)
print("Embedder & reranker loaded!")

'(ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')), '(Request ID: 053ab8c7-d83b-4176-937a-666faf2cfa97)')' thrown while requesting HEAD https://huggingface.co/intfloat/multilingual-e5-base/resolve/main/modules.json
Retrying in 1s [Retry 1/5].



=== Step 2: Loading embedder & reranker ===
Embedder & reranker loaded!


In [34]:
def clean_hindi_text(text):
    if not text:
        return ""

    # Remove non-printable characters
    text = re.sub(r'[\x00-\x1F\x7F]', ' ', text)

    # Fix common PDF junk chars
    text = re.sub(r'[�•ﬁﬂ–—]', ' ', text)

    # Normalize whitespace
    text = re.sub(r'\s+', ' ', text).strip()

    return text

In [35]:
cl = clean_hindi_text("महात्मा गांधी का जन्म कब हुआ था?")
cl

'महात्मा गांधी का जन्म कब हुआ था?'

In [7]:
# 4. Chunk text
def chunk_text(text, max_tokens=250, overlap=50):
    tokens = sp.encode(text)
    chunks = []
    i = 0
    while i < len(tokens):
        chunk = tokens[i:i + max_tokens]
        chunks.append(sp.decode(chunk))
        i += max_tokens - overlap
    return chunks

In [20]:
ct = chunk_text("महात्मा गांधी का जन्म कब हुआ था?")
ct

['महात्मा गांधी का जन्म कब हुआ था?']

In [36]:
def build_index(chunks):
    print("\n=== Step 5: Building FAISS index ===")

    clean_chunks = [c for c in chunks if len(c.strip()) > 20]
    print(f"Filtered chunks: {len(clean_chunks)} / {len(chunks)}")

    texts = ["passage: " + c for c in clean_chunks]
    embeddings = embed_model.encode(
        texts,
        normalize_embeddings=True,
        batch_size=32,
        show_progress_bar=True
    )

    dim = embeddings.shape[1]
    index = faiss.IndexFlatIP(dim)
    index.add(embeddings.astype(np.float32))

    print("FAISS index built!")
    return index, clean_chunks

In [37]:
def retrieve(question, index, chunks, top_k=8, rerank_k=3, min_score=0.35):
    print("\n=== Step 6: Retrieving context ===")
    print(f"Question: {question}")

    q_emb = embed_model.encode(
        ["query: " + question],
        normalize_embeddings=True
    ).astype(np.float32)

    scores, idxs = index.search(q_emb, top_k)

    candidates = []
    for i, score in zip(idxs[0], scores[0]):
        if score >= min_score:
            candidates.append(chunks[i])

    if not candidates:
        print("No candidates above threshold, fallback to top-2")
        candidates = [chunks[i] for i in idxs[0][:2]]

    # Rerank
    pairs = [(question, c[:512]) for c in candidates]
    rerank_scores = reranker.predict(pairs)

    ranked = sorted(
        zip(candidates, rerank_scores),
        key=lambda x: x[1],
        reverse=True
    )

    context_texts = [t for t, _ in ranked[:rerank_k]]

    # Pack context safely
    MAX_CONTEXT_TOKENS = 350
    context_ids = []
    for t in context_texts:
        ids = sp.encode(f"[संदर्भ] {t}\n")
        if len(context_ids) + len(ids) > MAX_CONTEXT_TOKENS:
            break
        context_ids.extend(ids)

    print(f"Context tokens: {len(context_ids)}")
    return context_ids

In [43]:
def build_prompt(context_ids, question):
    print("\n=== Step 7: Building prompt ===")

    instruction = (
        "सिर्फ़ नीचे दिए संदर्भ से ही जवाब दो।\n"
        "संदर्भ से बाहर एक शब्द भी मत लिखो।\n"
        "उत्तर 1-2 वाक्य से ज्यादा मत बनाओ।\n"
        "अगर संदर्भ में जवाब नहीं तो सिर्फ़ लिखो: \"संदर्भ में जानकारी नहीं है।\""
    )

    prompt_ids = (
        sp.encode(instruction + "\n\nसंदर्भ:\n") +
        context_ids +
        sp.encode(f"\nप्रश्न: {question}\nउत्तर:")
    )

    if len(prompt_ids) > 512:
        print("⚠️ Prompt too long, truncating")
        prompt_ids = prompt_ids[-512:]

    print(f"Prompt length: {len(prompt_ids)} tokens")
    return prompt_ids

In [44]:
@torch.no_grad()
def generate(prompt_ids, max_new_tokens=80, temperature=0.6, top_p=0.85, repetition_penalty=1.4):
    print("\n=== Step 8: Generating answer ===")
    input_ids = torch.tensor([prompt_ids], dtype=torch.long, device=DEVICE)

    generated_tokens = []
    END_ID = sp.piece_to_id("<END_ANSWER>")

    for step in range(max_new_tokens):

        if input_ids.size(1) > 512:
            input_ids = input_ids[:, -512:]

        logits = model(input_ids)[:, -1, :]

        if repetition_penalty != 1.0:
            recent = input_ids[0, -64:].tolist()
            for tid in set(recent):
                if logits[0, tid] > 0:
                    logits[0, tid] /= repetition_penalty
                else:
                    logits[0, tid] *= repetition_penalty

        logits = logits / temperature
        probs = torch.softmax(logits, dim=-1)

        sorted_probs, sorted_idx = torch.sort(probs, descending=True)
        cumulative_probs = torch.cumsum(sorted_probs, dim=-1)

        sorted_indices_to_remove = cumulative_probs > top_p
        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
        sorted_indices_to_remove[..., 0] = False
        sorted_probs[sorted_indices_to_remove] = 0.0

        if torch.isnan(sorted_probs).any() or sorted_probs.sum() <= 0:
            next_token = torch.argmax(probs, dim=-1, keepdim=True)
        else:
            sorted_probs /= sorted_probs.sum(dim=-1, keepdim=True)
            next_token = torch.multinomial(sorted_probs, num_samples=1)
            next_token = sorted_idx.gather(-1, next_token)

        tid = next_token.item()
        generated_tokens.append(tid)
        input_ids = torch.cat([input_ids, next_token], dim=1)

        if tid == sp.eos_id() or tid == END_ID:
            break

    answer = sp.decode(generated_tokens).strip()
    print("Raw generated:", answer)
    return answer

In [45]:
def rag_on_pdf(pdf_path, question):
    print("\n" + "="*60)
    print(f"QUESTION: {question}")
    print("="*60)

    # Load PDF
    print("\nLoading PDF...")
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text() + "\n"
    doc.close()

    print(f"Raw text length: {len(text)} chars")

    # Clean
    clean_text = clean_hindi_text(text)
    print(f"Clean text length: {len(clean_text)} chars")

    if len(clean_text) < 100:
        return "PDF में पर्याप्त सामग्री नहीं है।"

    # Chunk
    chunks = chunk_text(clean_text)
    print(f"Number of chunks: {len(chunks)}")
    print("Sample chunk:", chunks[0][:300] + "..." if chunks else "No chunks")

    # Build index
    index, chunks = build_index(chunks)

    # Retrieve
    context_ids = retrieve(question, index, chunks)
    if not context_ids:
        return "संदर्भ में जानकारी नहीं है।"

    # Prompt
    prompt_ids = build_prompt(context_ids, question)

    # Generate
    answer = generate(prompt_ids)

    print("\nFinal Answer:", answer)
    return answer

In [47]:
print(answer_ids)
print([sp.id_to_piece(i) for i in answer_ids])

NameError: name 'answer_ids' is not defined

In [46]:
# Run example
pdf_file = "gandhi.pdf"  # change to your PDF
questions = [
    "महात्मा गांधी का जन्म कब हुआ था?",
    "उनके पिता का नाम क्या था?",
    "गांधीजी की शादी किससे हुई थी?",
    "फ्रांस की राजधानी क्या है?"  # fallback check
]

for q in questions:
    rag_on_pdf(pdf_file, q)
    print("\n" + "-"*80 + "\n")


QUESTION: महात्मा गांधी का जन्म कब हुआ था?

Loading PDF...
Raw text length: 3162 chars
Clean text length: 2361 chars
Number of chunks: 4
Sample chunk: महात्मा गांधी की जीवनी महात्मा गांधी, जिन्हेंबापूया राष्ट्रपिता कहा जाता है, भारतकेस्वतंत्रता संग्रामकेसबसेप्रमुखनेता थे।उनका पूरा नाम मोहनदासकरमचंदगांधी था। जन्मऔरप्रारंभिकजीवन महात्मा गांधी का जन्म2 अक्टूबर1869 को गुजरातकेपोरबंदरमेंहुआथा।उनकेपिता करमचंदगांधी पोरबंदरराज्य केदीवानथेऔरमाता पुतलीबाईधा...

=== Step 5: Building FAISS index ===
Filtered chunks: 4 / 4


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

FAISS index built!

=== Step 6: Retrieving context ===
Question: महात्मा गांधी का जन्म कब हुआ था?
Context tokens: 258

=== Step 7: Building prompt ===
Prompt length: 335 tokens

=== Step 8: Generating answer ===
Raw generated: ⁇

Final Answer: ⁇

--------------------------------------------------------------------------------


QUESTION: उनके पिता का नाम क्या था?

Loading PDF...
Raw text length: 3162 chars
Clean text length: 2361 chars
Number of chunks: 4
Sample chunk: महात्मा गांधी की जीवनी महात्मा गांधी, जिन्हेंबापूया राष्ट्रपिता कहा जाता है, भारतकेस्वतंत्रता संग्रामकेसबसेप्रमुखनेता थे।उनका पूरा नाम मोहनदासकरमचंदगांधी था। जन्मऔरप्रारंभिकजीवन महात्मा गांधी का जन्म2 अक्टूबर1869 को गुजरातकेपोरबंदरमेंहुआथा।उनकेपिता करमचंदगांधी पोरबंदरराज्य केदीवानथेऔरमाता पुतलीबाईधा...

=== Step 5: Building FAISS index ===
Filtered chunks: 4 / 4


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

FAISS index built!

=== Step 6: Retrieving context ===
Question: उनके पिता का नाम क्या था?
Context tokens: 258

=== Step 7: Building prompt ===
Prompt length: 334 tokens

=== Step 8: Generating answer ===
Raw generated: ⁇

Final Answer: ⁇

--------------------------------------------------------------------------------


QUESTION: गांधीजी की शादी किससे हुई थी?

Loading PDF...
Raw text length: 3162 chars
Clean text length: 2361 chars
Number of chunks: 4
Sample chunk: महात्मा गांधी की जीवनी महात्मा गांधी, जिन्हेंबापूया राष्ट्रपिता कहा जाता है, भारतकेस्वतंत्रता संग्रामकेसबसेप्रमुखनेता थे।उनका पूरा नाम मोहनदासकरमचंदगांधी था। जन्मऔरप्रारंभिकजीवन महात्मा गांधी का जन्म2 अक्टूबर1869 को गुजरातकेपोरबंदरमेंहुआथा।उनकेपिता करमचंदगांधी पोरबंदरराज्य केदीवानथेऔरमाता पुतलीबाईधा...

=== Step 5: Building FAISS index ===
Filtered chunks: 4 / 4


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

FAISS index built!

=== Step 6: Retrieving context ===
Question: गांधीजी की शादी किससे हुई थी?
Context tokens: 258

=== Step 7: Building prompt ===
Prompt length: 334 tokens

=== Step 8: Generating answer ===
Raw generated: ⁇

Final Answer: ⁇

--------------------------------------------------------------------------------


QUESTION: फ्रांस की राजधानी क्या है?

Loading PDF...
Raw text length: 3162 chars
Clean text length: 2361 chars
Number of chunks: 4
Sample chunk: महात्मा गांधी की जीवनी महात्मा गांधी, जिन्हेंबापूया राष्ट्रपिता कहा जाता है, भारतकेस्वतंत्रता संग्रामकेसबसेप्रमुखनेता थे।उनका पूरा नाम मोहनदासकरमचंदगांधी था। जन्मऔरप्रारंभिकजीवन महात्मा गांधी का जन्म2 अक्टूबर1869 को गुजरातकेपोरबंदरमेंहुआथा।उनकेपिता करमचंदगांधी पोरबंदरराज्य केदीवानथेऔरमाता पुतलीबाईधा...

=== Step 5: Building FAISS index ===
Filtered chunks: 4 / 4


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

FAISS index built!

=== Step 6: Retrieving context ===
Question: फ्रांस की राजधानी क्या है?
Context tokens: 262

=== Step 7: Building prompt ===
Prompt length: 337 tokens

=== Step 8: Generating answer ===
Raw generated: ⁇

Final Answer: ⁇

--------------------------------------------------------------------------------

