In [5]:
!pip install langchain-community
!pip install faiss-cpu

Collecting langchain-community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting packaging<25,>=23.2 (from langchain-core<1.0.0,>=0.3.66->langchain-community)
  Downloading packaging-24.2-py3-none-any.whl.metadata (3.2 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Downloading langchain_community-0.3.27-py3-none-any.whl (2.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m37.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading httpx_sse-0.4.1-py3-none-any.whl (8.1 kB)
Downloading pydantic_settings-2.10.1-py3-none-an

In [6]:
import os
import json
import pandas as pd
import torch
from tqdm import tqdm
from typing import List, Dict
from kaggle_secrets import UserSecretsClient
from huggingface_hub import login
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

user_secrets = UserSecretsClient()
hf_token = user_secrets.get_secret("hf_hub_token")
login(hf_token)

CSV_FILE = "/kaggle/input/quran-translation/Muhammad_Tahir-ul-Qadri_translation.csv"
QUESTIONS_FILE = "/kaggle/input/test-jsonl/test.jsonl"
OUTPUT_FILE = "/kaggle/working/rag_mistral_output.jsonl"
INDEX_PATH = "/kaggle/working/quran_faiss_index"

K = 4
MAX_TOKENS = 512
CHUNK_SIZE = 1764
CHUNK_OVERLAP = 0
LIMIT = 100
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"

2025-07-25 06:59:51.929795: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753426792.130266      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753426792.191038      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [7]:
def load_documents_from_csv(csv_path: str) -> List[Document]:
    df = pd.read_csv(csv_path)
    docs = []
    for _, row in df.iterrows():
        content = str(row["Ayat"]).strip()
        meta = {
            "surah": row["Surah"],
            "ayah": row["Ayat"],
            "verse": row["Verse"]
        }
        if content:
            docs.append(Document(page_content=content, metadata=meta))
    return docs


In [8]:
def build_or_load_faiss(docs: List[Document], embedder, force_recreate=False):
    if not force_recreate and os.path.exists(INDEX_PATH):
        return FAISS.load_local(INDEX_PATH, embedder,allow_dangerous_deserialization=True)

    splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
    chunks = splitter.split_documents(docs)
    db = FAISS.from_documents(chunks, embedder)
    db.save_local(INDEX_PATH)
    return db


In [9]:
def load_questions(path: str, limit: int = 100) -> List[Dict[str, str]]:
    questions = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            if len(questions) >= limit:
                break
            try:
                record = json.loads(line)
                question = record.get("Question") or record.get("question")
                if question and isinstance(question, str):
                    questions.append({"Question": question.strip()})
            except json.JSONDecodeError:
                continue
    return questions


In [10]:
def save_results(path: str, results: List[Dict]):
    with open(path, "w", encoding="utf-8") as f:
        for record in results:
            f.write(json.dumps(record, ensure_ascii=False) + "\n")

def generate_answer(pipe, question: str, context_chunks: List[Document]):
    context_str = "\n".join([doc.page_content for doc in context_chunks])
    prompt = f"""
<|user|>
Context:
{context_str}

Question: {question}
<|end|>
"""
    result = pipe(prompt, max_new_tokens=MAX_TOKENS, return_full_text=False, temperature=0.1)
    return result[0]['generated_text'].strip()


In [11]:
def main():
    print("🔁 Loading documents...")
    embedder = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
    documents = load_documents_from_csv(CSV_FILE)

    print("🔁 Building/loading FAISS index...")
    vector_db = build_or_load_faiss(documents, embedder)
    retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": K})

    print("🧠 Loading Mistral-7B model...")
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        torch_dtype=torch.bfloat16,
        attn_implementation="sdpa",
        device_map="auto"
    )
    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

    print("📖 Loading questions...")
    questions = load_questions(QUESTIONS_FILE)

    results = []
    for idx, q in enumerate(tqdm(questions, desc="Running RAG")):
        question = q.get("Question", "").strip()
        if not question:
            continue

        retrieved_docs = retriever.get_relevant_documents(question)
        answer = generate_answer(pipe, question, retrieved_docs)

        reference = [
            f"Surah {doc.metadata.get('surah')}"
            for doc in retrieved_docs
        ]
        
        retrieved_texts = [f"{doc.metadata.get('verse')}" for doc in retrieved_docs]

        results.append({
            "Question": question,
            "Answer": answer,
            "Retrieved": retrieved_texts,
            "Reference": reference
        })

    print("💾 Saving results...")
    save_results(OUTPUT_FILE, results)
    print(f"✅ Done! Output saved to: {OUTPUT_FILE}")


In [12]:
def check():
    print("🔁 Loading documents...")
    embedder = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
    documents = load_documents_from_csv(CSV_FILE)

    print("🔁 Building/loading FAISS index...")
    vector_db = build_or_load_faiss(documents, embedder)
    retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": K})

    print("🧠 Loading Mistral-7B model...")
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        torch_dtype=torch.bfloat16,
        attn_implementation="sdpa",
        device_map="auto"
    )

    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

    question = "Fasting in severe injury or medical emergency , ruling for muslims in ramadan month"

    retrieved_docs = retriever.get_relevant_documents(question)
    answer = generate_answer(pipe, question, retrieved_docs)
    reference = [
            f"Surah {doc.metadata.get('surah')}"
            for doc in retrieved_docs
        ]
    
    retrieved_texts = [f"{doc.metadata.get('verse')}" for doc in retrieved_docs]
    print(f"Answer: {answer}")
    print(f"\nRetrieved Doc: {retrieved_docs}")
    print(f"\nReference: {reference}")
    print(f"\nRetrieved Text: {retrieved_texts}")


In [13]:
# check()

In [14]:
# import shutil
# shutil.rmtree("/kaggle/working/quran_faiss_index/")

In [15]:
if __name__ == "__main__":
    main()

🔁 Loading documents...


  embedder = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

🔁 Building/loading FAISS index...
🧠 Loading Mistral-7B model...


tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Device set to use cuda:0


📖 Loading questions...


  retrieved_docs = retriever.get_relevant_documents(question)
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Running RAG:   1%|          | 1/100 [00:11<19:43, 11.95s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Running RAG:   2%|▏         | 2/100 [00:15<11:24,  6.99s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Running RAG:   3%|▎         | 3/100 [00:34<19:58, 12.35s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Running RAG:   4%|▍         | 4/100 [00:40<16:08, 10.09s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Running RAG:   5%|▌         | 5/100 [00:51<16:02, 10.13s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Running RAG:   6%|▌         | 6/100 [01:04<17:29, 11.17s/it]Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Running RAG:   7%|▋         | 7/100 [01:13<16:36, 10.71s/it]Setting `pad_token_id` to `eos_token_id`:2

💾 Saving results...
✅ Done! Output saved to: /kaggle/working/rag_mistral_output.jsonl



