In [1]:
# 1. INSTALLATIONS & SETUP
# ==============================================================================
!pip install langchain langchain-community neo4j chromadb sentence-transformers langchain-together -q
!pip install python-dotenv nest_asyncio -q


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m36.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m312.3/312.3 kB[0m [31m22.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.5/19.5 MB[0m [31m84.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.2/284.2 kB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m70.4 MB/s[0m eta [36m0:00:0

In [3]:
# ==============================================================================


import os
import asyncio
from dotenv import load_dotenv
import nest_asyncio

# Import LangChain components
from langchain_community.vectorstores import Chroma
from langchain_community.graphs import Neo4jGraph
from langchain.chains import GraphCypherQAChain, RetrievalQA
from langchain_together import ChatTogether
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.prompts import PromptTemplate

# Load environment variables
load_dotenv()

print("✅ Installations and Imports Complete.")

# ==============================================================================
# 2. CONFIGURATION & CONNECTIONS
# ==============================================================================
# --- LLM and API Configuration ---
TOGETHER_API_KEY="tgp_v1_EJqfkWuqVVQVbYIVhvXU_7_JoKGomJqL7HhfHVyQm_E"

# --- Neo4j Database Connection ---
NEO4J_URI = "neo4j+s://9dae82f0.databases.neo4j.io"
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "DUHXAf9g5PK25qfmJ63RbEbaw9tYyWeSu9MJjPwAnic"

# --- ChromaDB Vector Store Connection ---
from google.colab import drive
drive.mount('/content/drive')
CHROMA_PERSIST_DIR = "/content/drive/MyDrive/chroma_db"

print("✅ Environment and Database Paths Configured.")

# ==============================================================================
# 3. INITIALIZE MODELS AND RETRIEVERS
# ==============================================================================
# --- Initialize the LLM via Together AI ---
try:
    llm = ChatTogether(
        together_api_key="tgp_v1_EJqfkWuqVVQVbYIVhvXU_7_JoKGomJqL7HhfHVyQm_E",
        model="mistralai/Mixtral-8x7B-Instruct-v0.1",
        temperature=0.1,
        max_tokens=2048
    )
    # We know which model we are using, so we can print the name directly.
    print(f"✅ LLM Initialized with Together AI model: mistralai/Mixtral-8x7B-Instruct-v0.1")
except Exception as e:
    print(f"❌ Failed to initialize LLM. Check your TOGETHER_API_KEY. Error: {e}")

# --- Initialize the Embedding Model ---
embedding_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# --- Connect to Neo4j Graph ---
try:
    graph = Neo4jGraph(
        url=NEO4J_URI,
        username=NEO4J_USERNAME,
        password=NEO4J_PASSWORD
    )
    print("✅ Successfully connected to Neo4j.")
except Exception as e:
    print(f"❌ Failed to connect to Neo4j: {e}")

# --- Load ChromaDB Vector Store ---
try:
    vector_store = Chroma(
        collection_name="mosdac_knowledge_unified",
        persist_directory=CHROMA_PERSIST_DIR,
        embedding_function=embedding_model
    )
    retriever = vector_store.as_retriever(search_kwargs={'k': 3})
    print(f"✅ Successfully loaded ChromaDB with {vector_store._collection.count()} documents.")
except Exception as e:
    print(f"❌ Failed to load ChromaDB. Make sure the path is correct: {e}")


# ==============================================================================
# 4. DEFINE QUERY FUSION LOGIC (HYBRID SEARCH)
# ==============================================================================
# --- Master Synthesis Prompt ---
synthesis_prompt_template = """
You are an expert AI assistant for ISRO's MOSDAC portal. Your mission is to provide a single, clear, and comprehensive answer to the user's question by synthesizing information from two distinct sources: a Knowledge Graph and a set of documents.

THE USER'S QUESTION:
"{user_question}"

CONTEXTUAL INFORMATION:
Here is the information you have gathered:

1.  **DIRECT FACTS FROM KNOWLEDGE GRAPH:**
    These are precise, structured facts and should be considered the primary source of truth. If this section is empty or contains an error message, no direct facts were found.
    - {kg_results}

2.  **RELEVANT EXCERPTS FROM DOCUMENTS (Vector Search):**
    This text provides broader context and explanations. If this section is empty or contains an error message, no relevant documents were found.
    - {rag_results}

INSTRUCTIONS:
1.  Carefully analyze both sources of information.
2.  Synthesize a single, final answer to the user's question.
3.  Prioritize the Knowledge Graph facts for specific data points. Use the document excerpts for explanation and context.
4.  If no relevant information is found, give your own answer by searching what user asked ,just give information Regarding to MOSDAC website(https://www.mosdac.gov.in/)
5.  Keep the answer concise and directly address the user's question. DO NOT make up information.


FINAL ANSWER:
"""

SYNTHESIS_PROMPT = PromptTemplate(
    input_variables=["user_question", "kg_results", "rag_results"],
    template=synthesis_prompt_template,
)

# --- Asynchronous Query Functions ---
async def query_knowledge_graph_async(question: str):
    """Queries the Neo4j Graph asynchronously."""
    print("🧠 Querying Knowledge Graph...")
    try:
        cypher_chain = GraphCypherQAChain.from_llm(
    graph=graph,
    llm=llm,
    verbose=False,
    allow_dangerous_requests=True  # <-- THE FIX
)
        response = await asyncio.to_thread(cypher_chain.invoke, {"query": question})
        return response.get('result', 'No result found.')
    except Exception as e:
        return f"Error: Could not query the Knowledge Graph. Details: {e}"

async def query_vector_db_async(question: str):
    """Queries the ChromaDB Vector Store asynchronously."""
    print("📚 Querying Vector DB...")
    try:
        rag_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, verbose=True)
        response = await asyncio.to_thread(rag_chain.invoke, {"query": question})
        return response.get('result', 'No result found.')
    except Exception as e:
        return f"Error: Could not query the document database. Details: {e}"

# --- Main Test Function ---
async def test_hybrid_search(user_question: str):
    """
    This function takes a user question, runs the full hybrid search pipeline,
    and prints the results directly in the notebook.
    """
    print("================================================================================")
    print(f"❓ TESTING QUESTION: {user_question}")
    print("================================================================================")

    # Step 1: Query both sources in parallel
    kg_task = query_knowledge_graph_async(user_question)
    rag_task = query_vector_db_async(user_question)
    kg_results, rag_results = await asyncio.gather(kg_task, rag_task)

    print("\n\n--- INTERMEDIATE RESULTS ---")
    print(f"🧠 Knowledge Graph Results:\n{kg_results}")
    print(f"\n📚 Vector DB Results:\n{rag_results}")
    print("----------------------------\n")

    # Step 2: Synthesize the final answer using the master prompt
    print("🤖 Synthesizing final answer...")
    synthesis_chain = SYNTHESIS_PROMPT | llm
    final_answer = await synthesis_chain.ainvoke({
        "user_question": user_question,
        "kg_results": kg_results,
        "rag_results": rag_results
    })

    print("\n\n✅ FINAL SYNTHESIZED ANSWER:")
    print(final_answer.content)
    print("================================================================================\n\n")


# nest_asyncio is needed to run async functions in a Colab notebook
nest_asyncio.apply()

✅ Installations and Imports Complete.
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Environment and Database Paths Configured.
✅ LLM Initialized with Together AI model: mistralai/Mixtral-8x7B-Instruct-v0.1


  embedding_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  graph = Neo4jGraph(


✅ Successfully connected to Neo4j.


  vector_store = Chroma(


✅ Successfully loaded ChromaDB with 61606 documents.


In [7]:
# --- Test a Single Question ---

my_question = "What is kalpana-1  "

# This will run the hybrid search and print the results for your question
import asyncio
nest_asyncio.apply()
asyncio.run(test_hybrid_search(my_question))

❓ TESTING QUESTION: What is kalpana-1  
🧠 Querying Knowledge Graph with robust method...
📚 Querying Vector DB...
Found entities: ['kalpana-1']


--- INTERMEDIATE RESULTS ---
🧠 Knowledge Graph Context:
No relevant facts found in the Knowledge Graph for the extracted entities.

📚 Vector DB Context:
Link Text KALPANA 1 Winds Target URL Context ...node 464 130 Wed 2017-01-04 09 46 alt text KALPANA 1 Winds Wed 2017-04-12 10 57 alt text INSAT-3D Sounder Cloud Mask https ...
Link Text KALPANA-1 Target URL Context ... INSAT-3DR INSAT-3D KALPANA-1 INSAT-3A MeghaTropiques ...
Link Text KALPANA-1 Target URL Context ... INSAT-3DR INSAT-3D KALPANA-1 INSAT-3A MeghaTropiques ...
----------------------------

🤖 Synthesizing final answer...


❌ FINAL OUTPUT PARSING FAILED AFTER RETRIES: This OutputParser can only be called by the `parse_with_prompt` method.




In [15]:
# ==============================================================================
# FINAL MOSDAC HYBRID SEARCH: Neo4j KG + ChromaDB + Mixtral LLM
# ==============================================================================
# ✅ Handles incomplete KG gracefully
# ✅ Uses VectorDB when KG fails
# ✅ Avoids OutputParser JSON errors for faster debug-free answers
# ==============================================================================

# --- Step 1: Install and Import Libraries ---
print("--- Step 1: Installing and Importing Libraries ---")
import os
os.system("pip install langchain langchain-community neo4j chromadb sentence-transformers langchain-together -q")
os.system("pip install python-dotenv nest_asyncio -q")

import shutil
import re
import asyncio
import nest_asyncio
from typing import List

from langchain_community.vectorstores import Chroma
from langchain_community.graphs import Neo4jGraph
from langchain_together import ChatTogether
from langchain_community.embeddings import SentenceTransformerEmbeddings

print("✅ Installations and Imports Complete.")

# --- Step 2: Configure Paths, Keys, Connections ---
print("\n--- Step 2: Configuring Connections ---")
TOGETHER_API_KEY = "tgp_v1_EJqfkWuqVVQVbYIVhvXU_7_JoKGomJqL7HhfHVyQm_E"
NEO4J_URI = "neo4j+s://9dae82f0.databases.neo4j.io"
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "DUHXAf9g5PK25qfmJ63RbEbaw9tYyWeSu9MJjPwAnic"

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

print("\nCopying ChromaDB from Google Drive to local Colab...")
DRIVE_DB_PATH = "/content/drive/MyDrive/chroma_db"
LOCAL_DB_PATH = "/content/local_chroma_db"

os.makedirs(os.path.dirname(LOCAL_DB_PATH), exist_ok=True)
if os.path.exists(DRIVE_DB_PATH):
    if os.path.exists(LOCAL_DB_PATH):
        shutil.rmtree(LOCAL_DB_PATH)
    shutil.copytree(DRIVE_DB_PATH, LOCAL_DB_PATH)
    print("✅ ChromaDB copy complete.")
else:
    print("❌ ChromaDB not found on Drive. Please check path.")
    os.makedirs(LOCAL_DB_PATH, exist_ok=True)

CHROMA_PERSIST_DIR = LOCAL_DB_PATH
CHROMA_COLLECTION_NAME = "mosdac_knowledge_unified"

print("✅ Environment and Paths Configured.")

# --- Step 3: Initialize LLM, KG, VectorDB ---
print("\n--- Step 3: Initializing Models ---")

llm = ChatTogether(
    together_api_key=TOGETHER_API_KEY,
    model="mistralai/Mixtral-8x7B-Instruct-v0.1",
    temperature=0.1,
    max_tokens=2048
)
print("✅ LLM Initialized.")

embedding_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
print("✅ Embedding Model Loaded.")

try:
    graph = Neo4jGraph(url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD)
    print("✅ Connected to Neo4j KG.")
except Exception as e:
    print(f"❌ Failed to connect to Neo4j: {e}")

try:
    vector_store = Chroma(
        collection_name=CHROMA_COLLECTION_NAME,
        persist_directory=CHROMA_PERSIST_DIR,
        embedding_function=embedding_model
    )
    retriever = vector_store.as_retriever(search_kwargs={'k': 3})
    print(f"✅ Vector DB Loaded: {vector_store._collection.count()} documents.")
except Exception as e:
    print(f"❌ Failed to load ChromaDB: {e}")

# --- Step 4: Define KG and RAG Query Functions ---
print("\n--- Step 4: Defining Query Functions ---")

# Known clean entity names in your KG
key_kg_entities = [
    "MOSDAC", "Kalpana-1", "INSAT-3D", "INSAT-3DR", "Oceansat-2", "SARAL-AltiKa",
    "OCM", "LISS-IV", "ISRO", "NRSC", "Space Applications Centre"
]

async def query_knowledge_graph_async(question: str):
    print("🧠 Querying Knowledge Graph...")
    found_entities = []
    for entity in key_kg_entities:
        if re.search(r'\b' + re.escape(entity) + r'\b', question, re.IGNORECASE):
            found_entities.append(entity)

    if not found_entities:
        return "No relevant entities found in KG for this query."

    results = []
    for entity in found_entities:
        cypher = f"""
        MATCH (n)
        WHERE toLower(n.name) = toLower('{entity}') OR toLower(n.description) CONTAINS toLower('{entity}')
        RETURN n.name AS name, n.description AS description, labels(n) AS labels
        LIMIT 1
        """
        try:
            query_result = await asyncio.to_thread(graph.query, cypher)
            if query_result:
                for record in query_result:
                    results.append(
                        f"KG Fact: '{record.get('name')}' - {record.get('description')}"
                    )
            else:
                results.append(f"KG: No result found for '{entity}'.")
        except Exception as e:
            results.append(f"KG Error for '{entity}': {e}")
    return "\n".join(results)

async def query_vector_db_async(question: str):
    print("📚 Querying Vector DB...")
    try:
        docs = await asyncio.to_thread(retriever.get_relevant_documents, question)
        return "\n".join([doc.page_content for doc in docs]) if docs else "No documents found."
    except Exception as e:
        return f"VectorDB Error: {e}"

# --- Step 5: Main Hybrid Search Function ---
print("\n--- Step 5: Defining Test Function ---")

async def test_hybrid_search(user_question: str):
    print("=" * 80)
    print(f"❓ TESTING QUESTION: {user_question}")
    print("=" * 80)

    kg_task = query_knowledge_graph_async(user_question)
    rag_task = query_vector_db_async(user_question)
    kg_results, rag_results = await asyncio.gather(kg_task, rag_task)

    print("\n--- INTERMEDIATE RESULTS ---")
    print(f"🧠 KG Context:\n{kg_results}")
    print(f"\n📚 Vector DB Context:\n{rag_results}")
    print("----------------------------\n")

    prompt = f"""
You are an expert assistant for ISRO's MOSDAC portal.

Use the following info to answer the user's question clearly.

--- KG FACTS ---
{kg_results}

--- DOCUMENTS ---
{rag_results}

If KG has no data, answer using just the documents.
If both are weak, provide a helpful fallback answer.

USER QUESTION: {user_question}

ANSWER:
"""
    try:
        response = await llm.ainvoke(prompt)
        print("\n✅ FINAL OUTPUT:")
        print(response)
    except Exception as e:
        print(f"\n❌ Failed to generate final answer: {e}")

# --- Step 6: Run Demo Questions ---
print("\n--- Step 6: Executing Tests ---")
nest_asyncio.apply()

test_questions = [
    "What is Kalpana-1?"

]

async def run_all_tests():
    for q in test_questions:
        await test_hybrid_search(q)

# RUN TESTS
await run_all_tests()
print("\n✅ All tests completed.")


--- Step 1: Installing and Importing Libraries ---
✅ Installations and Imports Complete.

--- Step 2: Configuring Connections ---
Mounted at /content/drive

Copying ChromaDB from Google Drive to local Colab...
✅ ChromaDB copy complete.
✅ Environment and Paths Configured.

--- Step 3: Initializing Models ---
✅ LLM Initialized.
✅ Embedding Model Loaded.
✅ Connected to Neo4j KG.
✅ Vector DB Loaded: 61606 documents.

--- Step 4: Defining Query Functions ---

--- Step 5: Defining Test Function ---

--- Step 6: Executing Tests ---
❓ TESTING QUESTION: What is Kalpana-1?
🧠 Querying Knowledge Graph...
📚 Querying Vector DB...

--- INTERMEDIATE RESULTS ---
🧠 KG Context:
KG: No result found for 'Kalpana-1'.

📚 Vector DB Context:
Link Text KALPANA 1 Winds Target URL Context ...node 464 130 Wed 2017-01-04 09 46 alt text KALPANA 1 Winds Wed 2017-04-12 10 57 alt text INSAT-3D Sounder Cloud Mask https ...
Link Text KALPANA-1 Target URL Context ... INSAT-3DR INSAT-3D KALPANA-1 INSAT-3A MeghaTropiques ..