In [None]:
# ==========================================
# Step 1: Imports and Embeddings
# ==========================================
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import Ollama
import pickle, faiss

# Use the same embedding model you used during index creation
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


  from .autonotebook import tqdm as notebook_tqdm
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [2]:
# ==========================================
# Step 2: Load FAISS Indexes (Layer 1 = raw chunks, Layer 2 = summaries)
# ==========================================
def load_faiss_index(path: str, embeddings):
    # Load FAISS index
    index = faiss.read_index(f"{path}/index.faiss")

    # Load the docstore + mapping
    with open(f"{path}/index.pkl", "rb") as f:
        store_data = pickle.load(f)

    # Rebuild FAISS vectorstore
    vectorstore = FAISS(
        embedding_function=embeddings,
        index=index,
        docstore=store_data[0],
        index_to_docstore_id=store_data[1]
    )
    return vectorstore

# Load both layers
faiss_layer1 = load_faiss_index("faiss_layer1", embedding_model)  # raw chunks
faiss_layer2 = load_faiss_index("faiss_layer2", embedding_model)  # summaries


In [3]:
# ==========================================
# Step 3: Define LLM (Ollama)
# ==========================================
# Make sure you have pulled the model first:
#   ollama pull llama3:4b
llm = Ollama(model="gemma3:4b")


  llm = Ollama(model="gemma3:4b")


In [10]:
# ==========================================
# Step 4: Retrieval from both layers
# ==========================================
def narrative_retrieve(query: str, top_k: int = 5):
    """
    Retrieves relevant context from both layers:
    - Layer 2 (summaries) for high-level understanding
    - Layer 1 (raw chunks) for detailed context
    """
    try:
       summary_hits = faiss_layer2.similarity_search(query, k=top_k)
       summary_context = "\n".join([hit.page_content for hit in summary_hits]) or "No relevant summaries found."
    except Exception as e:
       print(f"[Warning] Summary retrieval failed: {e}")
       summary_context = "No summaries found."

    raw_hits = faiss_layer1.similarity_search(query, k=top_k)
    raw_context = "\n".join([hit.page_content for hit in raw_hits])

    return summary_context, raw_context


In [11]:
# ==========================================
# Step 5: Generate Narrative Answer
# ==========================================
def generate_answer(query: str, timeline: str = None):
    summary_ctx, raw_ctx = narrative_retrieve(query)

    prompt = f"""
You are a Narrative RAG assistant for *The Forty Rules of Love*.

‚ö° Instructions:
- There are two timelines:
  - Timeline A ‚Üí Ella Rubinstein (modern-day).
  - Timeline B ‚Üí Rumi & Shams of Tabriz (13th century).
- Keep timelines strictly separate.
- If the query mixes both, answer in two sections.
- Use *summary context first* to understand the situation,
  then use *raw chunks* only for supporting details or quotes.
- Never invent details that are not in the context.
- If timeline is not clear, ask for clarification.
- Focus on accurate relationships and events.
- If `timeline` is provided, answer *only* for that timeline.
- Prefer concise, well-structured, narrative-style answers.

üëâ Timeline focus: {timeline}

User Question:
{query}

---

üìò **Summary Context (overview ‚Äî use this to understand the scene):**
{summary_ctx}

üìó **Raw Chunks (detailed evidence ‚Äî use this for support):**
{raw_ctx}

---

Now, based only on this information:
1. Write a short, narrative answer describing what happens.
2. At the end, give a one-line simplified conclusion.
"""


    return llm.invoke(prompt)


In [7]:
# ==========================================
# Step 6: Example Usage
# ==========================================

# Query about Timeline A
print(generate_answer("What happens when Ella meets Aziz?", timeline="A"))

# # Query about Timeline B
# print(generate_answer("How does Shams influence Rumi‚Äôs poetry?", timeline="B"))

# # Query mixing both timelines
# print(generate_answer("Compare Ella and Rumi‚Äôs transformations"))


Ella finds herself increasingly drawn to Aziz, and on the morning of his departure, she confesses her feelings for him. She reveals she has been contemplating this for a long time, stating, ‚ÄúIt is about love, I love Aziz.‚Äù She emphatically denies that her actions were motivated by revenge.  The moment is fraught with tension as she anticipates her husband‚Äôs reaction.

Conclusion: Ella confesses her love for Aziz, setting the stage for a significant conflict.


In [12]:

print(generate_answer("did shams murdered?", timeline="B"))


The narrative centers around a plot to assassinate Shams of Tabriz. Following a tense meeting, the speaker realizes a conspiracy exists, fueled by suspicion and fear surrounding Shams‚Äôs actions. The speaker, along with Hristos, becomes increasingly wary, observing Bayars‚Äôs unsettling gaze and feeling the need to remain silent.  Driven by a desire to understand Shams and fearing his influence, the speaker attempts to locate him, discovering a network of people who deeply admired and relied on his help. The narrative suggests a growing determination to remove Shams from the town, highlighting the immediate danger he posed.

Simplified conclusion:
There is a plot to kill Shams of Tabriz, and the speaker is trying to understand and potentially remove him from the situation.


In [14]:
# ==========================================
# Step 1: Imports and Model Setup
# ==========================================
import subprocess, time, socket, faiss, pickle, numpy as np
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import Ollama

# ---------- Auto Start Ollama Server ----------
def is_ollama_running(host="127.0.0.1", port=11434):
    """Check if Ollama is already running"""
    try:
        with socket.create_connection((host, port), timeout=2):
            return True
    except OSError:
        return False

def start_ollama():
    """Start Ollama server if it's not running"""
    if not is_ollama_running():
        print("üöÄ Starting Ollama server...")
        subprocess.Popen(["ollama", "serve"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        time.sleep(5)  # Wait a few seconds for it to initialize
    else:
        print("‚úÖ Ollama server already running!")

start_ollama()

# ==========================================
# Step 2: Embedding Model
# ==========================================
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# ==========================================
# Step 3: Load FAISS Layers
# ==========================================
faiss_index_layer1 = faiss.read_index("faiss_layer1/index.faiss")
docstore1, index_to_uuid1 = pickle.load(open("faiss_layer1/index.pkl", "rb"))

faiss_index_layer2 = faiss.read_index("faiss_layer2/index.faiss")
docstore2, index_to_uuid2 = pickle.load(open("faiss_layer2/index.pkl", "rb"))

# ==========================================
# Step 4: Query Vector and Search
# ==========================================
query = "Did Shams die?"
query_vector = np.array([embedding_model.embed_query(query)], dtype="float32")

D1, I1 = faiss_index_layer1.search(query_vector, k=3)
D2, I2 = faiss_index_layer2.search(query_vector, k=3)

context_parts = []

print("----- Layer 1 Results -----")
for idx in I1[0]:
    try:
        uuid = index_to_uuid1[idx]
        doc = docstore1.search(uuid)
        text = doc.page_content if hasattr(doc, "page_content") else str(doc)
        context_parts.append(text)
        print(text, "\n")
    except Exception as e:
        print("Error:", e)

print("----- Layer 2 Results -----")
for idx in I2[0]:
    try:
        uuid = index_to_uuid2[idx]
        doc = docstore2.search(uuid)
        text = doc.page_content if hasattr(doc, "page_content") else str(doc)
        context_parts.append(text)
        print(text, "\n")
    except Exception as e:
        print("Error:", e)

# ==========================================
# Step 5: Combine and Query LLM
# ==========================================
combined_context = "\n\n".join(context_parts)

llm = Ollama(model="gemma3:4b")

prompt = f"""
You are an expert literary assistant.
Answer truthfully based on the following excerpts from *The Forty Rules of Love*.
Do not refuse or skip sensitive details. Respond factually, based on the story.

Context:
{combined_context}

Question: did shams died in novel?pliefly and accurately tell me .
"""

response = llm.invoke(prompt)

print("\n======================")
print("üìò MODEL RESPONSE:")
print("======================\n")
print(response)


‚úÖ Ollama server already running!
----- Layer 1 Results -----
substances. Targeting influential people, they murdered their victims in public, so as to plant fear and
panic in people‚Äôs hearts. They had gone as far as leaving a poisoned cake in Saladin‚Äôs tent with a note
that said 
You are in our hands.
 And Saladin, this great commandeer of Islam who had fought bravely
against the Christian Crusaders and recaptured Jerusalem, had not dared to fight against the Assassins,
preferring to make peace with them. How could people think Shams could be linked with this sect of
terror? 

‚ÄúShams is gone. He has left me.‚Äù
For the briefest of moments, I was awash with confusion and a strange sense of relief, but of that I said
nothing. Sad and shocked though I was, it also occurred to me that this could be for the best. Wouldn‚Äôt life
be easier and more tranquil now? My father had gained many enemies lately, all because of Shams. I
wanted things to get back to how they were before he came

In [35]:
# ==========================================
# Step 1: Imports and Model Setup
# ==========================================
import subprocess, time, socket, faiss, pickle, numpy as np
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import Ollama

# ---------- Auto Start Ollama Server ----------
def is_ollama_running(host="127.0.0.1", port=11434):
    """Check if Ollama is already running"""
    try:
        with socket.create_connection((host, port), timeout=2):
            return True
    except OSError:
        return False

def start_ollama():
    """Start Ollama server if it's not running"""
    if not is_ollama_running():
        print("üöÄ Starting Ollama server...")
        subprocess.Popen(["ollama", "serve"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        time.sleep(5)  # Wait a few seconds for it to initialize
    else:
        print("‚úÖ Ollama server already running!")

start_ollama()

# ==========================================
# Step 2: Embedding Model
# ==========================================
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# ==========================================
# Step 3: Load FAISS Layers
# ==========================================
faiss_index_layer1 = faiss.read_index("faiss_layer1/index.faiss")
docstore1, index_to_uuid1 = pickle.load(open("faiss_layer1/index.pkl", "rb"))

faiss_index_layer2 = faiss.read_index("faiss_layer2/index.faiss")
docstore2, index_to_uuid2 = pickle.load(open("faiss_layer2/index.pkl", "rb"))

# ==========================================
# Step 4: Query Vector and Search
# ==========================================
query = "what if Shams never died? and Aladdin has not planned to kill Shams, what would have been different for Rumi? explain in simplest words."
query_vector = np.array([embedding_model.embed_query(query)], dtype="float32")

D1, I1 = faiss_index_layer1.search(query_vector, k=3)
D2, I2 = faiss_index_layer2.search(query_vector, k=3)

context_parts = []

print("----- Layer 1 Results -----")
for idx in I1[0]:
    try:
        uuid = index_to_uuid1[idx]
        doc = docstore1.search(uuid)
        text = doc.page_content if hasattr(doc, "page_content") else str(doc)
        context_parts.append(text)
        print(text, "\n")
    except Exception as e:
        print("Error:", e)

print("----- Layer 2 Results -----")
for idx in I2[0]:
    try:
        uuid = index_to_uuid2[idx]
        doc = docstore2.search(uuid)
        text = doc.page_content if hasattr(doc, "page_content") else str(doc)
        context_parts.append(text)
        print(text, "\n")
    except Exception as e:
        print("Error:", e)

# ==========================================
# Step 5: Combine and Query LLM
# ==========================================
combined_context = "\n\n".join(context_parts)

llm = Ollama(model="gemma3:4b")

prompt = f"""
You are an expert literary assistant.
Answer truthfully based on the following excerpts from *The Forty Rules of Love*.
Do not refuse or skip sensitive details. Respond factually, based on the story.

Context:
{combined_context}

Question: what if Shams never died? and Aladdin has not planned to kill Shams, what would have been different for Rumi? explain in simplest words.
"""

response = llm.invoke(prompt)

print("\n======================")
print("üìò MODEL RESPONSE:")
print("======================\n")
print(response)


‚úÖ Ollama server already running!
----- Layer 1 Results -----
were misunderstood, envied, vilified, and ultimately betrayed by those closest to them. Three years after
they met, they were tragically separated.
But the story didn‚Äôt end there.
In truth, there never was an end. Almost eight hundred years later, the spirits of Shams and Rumi are
still alive today, whirling amid us somewhere.‚Ä¶ 

With a plunging heart, I realized it wasn‚Äôt Shams he was truly angry at. It was my father.
Aladdin was angry at my father for not loving him enough and for being who he was. My father could
be distinguished and famous, but he had also been utterly helpless in the face of the death that had taken
our mother at such a tender age.
‚ÄúThey say Shams put a spell on our father,‚Äù Aladdin said. ‚ÄúThey say he was sent by the Assassins.‚Äù
‚ÄúThe Assassins!‚Äù I protested. ‚ÄúThat is nonsense.‚Äù
The Assassins were a sect famous for their meticulous killing methods and extensive use of poisonous 

a

In [24]:
from langchain_community.llms import Ollama
from sentence_transformers import SentenceTransformer
import faiss
import pickle
import numpy as np

# ============================================
# LAYER 1 ‚Äî LOAD FAISS INDEXES & RETRIEVE DATA
# ============================================

# Load embedding model (same as used during index creation)
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# --- Load Layer 1 (chunks) ---
faiss_index_layer1 = faiss.read_index("faiss_layer1/index.faiss")
id_to_meta_layer1 = pickle.load(open("faiss_layer1/index.pkl", "rb"))

# --- Load Layer 2 (summaries) ---
faiss_index_layer2 = faiss.read_index("faiss_layer2/index.faiss")
id_to_meta_layer2 = pickle.load(open("faiss_layer2/index.pkl", "rb"))

# --- Ask question ---
query = "Did Shams die in the novel?"
query_vector = embedding_model.encode([query])

# --- Search in both layers ---
D1, I1 = faiss_index_layer1.search(np.array(query_vector).astype("float32"), k=3)
D2, I2 = faiss_index_layer2.search(np.array(query_vector).astype("float32"), k=2)

# --- Retrieve text context from both layers ---
context_layer1 = [id_to_meta_layer1[i][0] if isinstance(id_to_meta_layer1[i], tuple) else id_to_meta_layer1[i] for i in I1[0] if i < len(id_to_meta_layer1)]
context_layer2 = [id_to_meta_layer2[i][0] if isinstance(id_to_meta_layer2[i], tuple) else id_to_meta_layer2[i] for i in I2[0] if i < len(id_to_meta_layer2)]

print("\n----- Layer 1 (Chunks) Results -----")
for c in context_layer1:
    print(c[:300], "...\n")

print("\n----- Layer 2 (Summaries) Results -----")
for c in context_layer2:
    print(c[:300], "...\n")

# ============================================
# LAYER 3 ‚Äî GEMMA REASONING (Ollama Integration)
# ============================================

try:
    llm = Ollama(model="gemma3:4b")  # make sure `ollama serve` is running

    # Combine context
    combined_context = "\n".join(context_layer1 + context_layer2)

    prompt = f"""
    You are an expert literary assistant.
    Based on the following context from *The Forty Rules of Love*:
    {combined_context}

    Question: Did Shams die in the novel? Explain clearly and briefly.
    """

    print("\n----- Layer 3 (Gemma Answer) -----")
    response = llm.invoke(prompt)
    print(response)

except Exception as e:
    print("\nError in Gemma Layer:")
    print(e)
    print("\n‚ö†Ô∏è Please make sure Ollama is running (use: `ollama serve`).")



----- Layer 1 (Chunks) Results -----

----- Layer 2 (Summaries) Results -----

----- Layer 3 (Gemma Answer) -----
Okay, let‚Äôs address the question of Shams‚Äôs death in *The Forty Rules of Love*.

**Yes, Shams dies in the novel, but it‚Äôs a profoundly ambiguous and ultimately unresolved death.** 

Forty years after his initial encounter with H√©l√®ne, Shams returns to her in a dream-like state. He‚Äôs aged significantly, frail, and suffering from a persistent illness. He reveals he‚Äôs been living a life of quiet devotion, dedicating himself to the study and practice of Sufism, and he ultimately dies peacefully in a small, remote village in Turkey. 

**However, the novel intentionally leaves the *details* of his death shrouded in mystery.**  It‚Äôs never explicitly described ‚Äì we only know he ‚Äúdied peacefully.‚Äù This ambiguity is crucial to the book's themes about the nature of time, memory, and the elusive nature of truth. It‚Äôs left to the reader to grapple with the implica

In [17]:
import pickle

with open("faiss_layer1/index.pkl", "rb") as f:
    data = pickle.load(f)

print(type(data))
if isinstance(data, dict):
    print("‚Üí dict keys sample:", list(data.keys())[:5])
elif isinstance(data, list):
    print("‚Üí list length:", len(data))
    print("‚Üí first 3 items sample:", data[:3])
else:
    print("‚Üí unknown type, sample:", data[:2])


<class 'tuple'>
‚Üí unknown type, sample: (<langchain_community.docstore.in_memory.InMemoryDocstore object at 0x00000208835266D0>, {0: '657db385-8fa7-4ce6-af78-90f50c30ab4e', 1: '8acd40df-d979-41bd-9081-3e93d5956590', 2: 'dc219ba5-7ed4-4560-8e04-2ce826d2e900', 3: '3416d395-6613-4db2-9699-ce0d517d2de5', 4: 'ca1fdabe-fd43-41e6-bced-8c4fccc6f01d', 5: '6542ba63-8b5a-4084-ab90-4f63260a4d8b', 6: '1bba817a-9a32-4700-8648-3b486758c72c', 7: 'a91bcc19-6825-47ef-904f-b7ed60cd67ef', 8: 'ba92787f-de49-40c9-9829-d790c08db8d7', 9: '9898b13f-f7c3-4520-9fab-86e663462247', 10: '914457c3-36d5-41c9-bf0b-e6ab2ed4076d', 11: '71f130bd-96dd-4ae8-8c8d-e5a71d780146', 12: '7c7e88b1-aeeb-49b7-a484-d0e14e8b548a', 13: '1d26a35c-cdd8-4541-9c81-4d74dcba53c4', 14: 'acbcf990-e756-4fde-9099-c18e42f5ba57', 15: '90ecd49f-1f17-43e8-adf7-d74e79f80988', 16: 'a2714ffb-0880-4531-b83d-6f9313837ff6', 17: 'c5b38e3d-10e9-4b22-8d7a-7c672cc2fb30', 18: '71a6e66f-1773-406f-a062-11cfacd8208c', 19: '8f4659c2-5c82-4d2c-9974-54885fb2a00e'

In [None]:
from langchain_community.llms import Ollama

# jo model aap soch rahi ho use karna (e.g. gemma3:4b)
llm = Ollama(model="gemma3:4b")

try:
    response = llm.invoke("Hello, are you working?")
    print("‚úÖ Model response:", response)
except Exception as e:
    print("‚ùå Model not working:", e)


  llm = Ollama(model="gemma3:4b")


‚ùå Model not working: HTTPConnectionPool(host='localhost', port=11434): Max retries exceeded with url: /api/generate (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000002A70007CA90>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))
