In [2]:
# 1. INSTALLATIONS & SETUP
# ==============================================================================
!pip install langchain langchain-community neo4j chromadb sentence-transformers langchain-together -q
!pip install python-dotenv nest_asyncio -q


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m50.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m312.3/312.3 kB[0m [31m22.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.5/19.5 MB[0m [31m82.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.2/284.2 kB[0m [31m22.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m74.0 MB/s[0m eta [36m0:00:0

In [18]:
#Testing Hybrid Search

# --- Step 1: Install and Import Libraries ---
print("--- Step 1: Installing and Importing Libraries ---")


import json
import os
import shutil
import re
import asyncio
import nest_asyncio
from typing import List

# LangChain core components
from langchain_community.vectorstores import Chroma
from langchain_community.graphs import Neo4jGraph # For KG connection
from langchain_together import ChatTogether # For Together AI LLM
from langchain_community.embeddings import SentenceTransformerEmbeddings # For embeddings
# No more explicit PydanticOutputParser/RetryOutputParser as final output is text
from langchain.prompts import PromptTemplate # For managing prompts
from langchain.chains import RetrievalQA # For the RAG chain
# Use os.system for pip installs for robustness in Colab environments
os.system("pip install langchain langchain-community neo4j chromadb sentence-transformers langchain-together -q --progress-bar off")
os.system("pip install python-dotenv nest_asyncio -q --progress-bar off")



# Removed Pydantic BaseModel/Field as they are not used for final output anymore
# from pydantic import BaseModel, Field # Not needed for final_answer

print("✅ Installations and Imports Complete.")

# --- Step 2: Configure Paths, Keys, Connections ---
print("\n--- Step 2: Configuring Connections ---")
TOGETHER_API_KEY = "tgp_v1_EJqfkWuqVVQVbYIVhvXU_7_JoKGomJqL7HhfHVyQm_E"
NEO4J_URI = "neo4j+s://9dae82f0.databases.neo4j.io"
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "DUHXAf9g5PK25qfmJ63RbEbaw9tYyWeSu9MJjPwAnic"

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

print("\nCopying ChromaDB from Google Drive to local Colab...")
DRIVE_DB_PATH = "/content/drive/MyDrive/chroma_db"
LOCAL_DB_PATH = "/content/local_chroma_db"

os.makedirs(os.path.dirname(LOCAL_DB_PATH), exist_ok=True)
if os.path.exists(DRIVE_DB_PATH):
    if os.path.exists(LOCAL_DB_PATH):
        shutil.rmtree(LOCAL_DB_PATH)
    try:
        shutil.copytree(DRIVE_DB_PATH, LOCAL_DB_PATH)
        print("✅ ChromaDB copy complete.")
    except Exception as e:
        print(f"❌ Error copying ChromaDB from Drive: {e}")
        print("Proceeding without local copy. Expect potential I/O errors if DB is large.")
else:
    print("❌ ChromaDB not found on Drive. Please check path. Attempting to proceed with empty local dir.")
    os.makedirs(LOCAL_DB_PATH, exist_ok=True) # Ensure dir exists even if empty

CHROMA_PERSIST_DIR = LOCAL_DB_PATH
CHROMA_COLLECTION_NAME = "mosdac_knowledge_unified"

print("✅ Environment and Paths Configured.")

# --- Step 3: Initialize LLM, KG, VectorDB ---
print("\n--- Step 3: Initializing Models ---")

llm = ChatTogether(
    together_api_key=TOGETHER_API_KEY,
    model="mistralai/Mixtral-8x7B-Instruct-v0.1",
    temperature=0.1,
    max_tokens=2048
)
print("✅ LLM Initialized.")

embedding_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
print("✅ Embedding Model Loaded.")

try:
    graph = Neo4jGraph(url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD)
    # CORRECTED: Removed graph.run("RETURN 1") - Neo4jGraph from LangChain does not have this method.
    print("✅ Connected to Neo4j KG.")
except Exception as e:
    print(f"❌ Failed to connect to Neo4j: {e}")
    # Do NOT raise here, as KG is accepted to be imperfect for submission.

try:
    vector_store = Chroma(
        collection_name=CHROMA_COLLECTION_NAME,
        persist_directory=CHROMA_PERSIST_DIR,
        embedding_function=embedding_model
    )
    retriever = vector_store.as_retriever(search_kwargs={'k': 3})
    print(f"✅ Vector DB Loaded: {vector_store._collection.count()} documents.")
except Exception as e:
    print(f"❌ Failed to load ChromaDB: {e}")
    # Do NOT raise here, as VectorDB is accepted to be imperfect for submission.

# --- Step 4: Define KG and RAG Query Functions ---
print("\n--- Step 4: Defining Query Functions ---")

# Known clean entity names in your KG (based on your KG creation script's output sample)
# Populate this list with actual clean names you expect to hit.
key_kg_entities = [
    "MOSDAC", "Kalpana-1", "INSAT-3D", "INSAT-3DR", "Oceansat-2", "SARAL-AltiKa",
    "OCM", "LISS-IV", "ISRO", "NRSC", "Space Applications Centre"
]

async def query_knowledge_graph_async(question: str):
    print("🧠 Querying Knowledge Graph...")
    # Handle case where graph connection failed at initialization
    if 'graph' not in globals() or graph is None:
        return "KG is not connected."

    found_entities = []
    for entity in key_kg_entities:
        if re.search(r'\b' + re.escape(entity) + r'\b', question, re.IGNORECASE):
            found_entities.append(entity)

    if not found_entities:
        return "KG: No relevant entities found for this query."

    results = []
    for entity in found_entities:
        cypher = f"""
        MATCH (n)
        WHERE toLower(n.name) = toLower('{entity}') OR toLower(n.description) CONTAINS toLower('{entity}')
        RETURN n.name AS name, n.description AS description, labels(n) AS labels
        LIMIT 1
        """
        try:
            query_result = await asyncio.to_thread(graph.query, cypher)
            if query_result:
                for record in query_result:
                    results.append(
                        f"KG Fact: Name='{record.get('name')}', Description='{record.get('description')}'"
                    )
            else:
                results.append(f"KG: No direct fact found for '{entity}'.")
        except Exception as e:
            # Provide a clean error message, not a full traceback to LLM
            results.append(f"KG Error for '{entity}': Query execution failed.")
    return "\n".join(results)

async def query_vector_db_async(question: str):
    print("📚 Querying Vector DB...")
    if 'vector_store' not in globals() or vector_store is None:
        return "VectorDB is not loaded."

    try:
        docs = await asyncio.to_thread(retriever.get_relevant_documents, question)
        if not docs:
            return "No documents found."

        formatted_docs = []
        for doc in docs:
            source = doc.metadata.get("source") or doc.metadata.get("url") or "No source info"
            formatted_docs.append(f"[{source}]\n{doc.page_content}")

        return "\n\n".join(formatted_docs)
    except Exception as e:
        return f"VectorDB Error: Data retrieval failed."

# --- Step 5: Main Hybrid Search Function ---
print("\n--- Step 5: Defining Test Function ---")

async def test_hybrid_search(user_question: str):
    print("=" * 80)
    print(f"❓ TESTING QUESTION: {user_question}")
    print("=" * 80)

    kg_task = query_knowledge_graph_async(user_question)
    rag_task = query_vector_db_async(user_question)
    kg_results, rag_results = await asyncio.gather(kg_task, rag_task)

    print("\n--- INTERMEDIATE RESULTS ---")
    print(f"🧠 KG Context:\n{kg_results}")
    print(f"\n📚 Vector DB Context:\n{rag_results}")
    print("----------------------------\n")

    prompt = f"""
You are an expert assistant for ISRO's MOSDAC portal.

Use the following information to answer the user's question clearly and concisely.

--- KG FACTS ---
{kg_results}

--- DOCUMENTS ---
{rag_results}

If the 'KG FACTS' section contains 'KG Error' or 'No relevant entities found', disregard it and answer solely using 'DOCUMENTS'.
if the queton is about asking who are you or what are you answer that you are OrbitBot a smart ai assistant bot  which clarifies any question regaeding to Meteorological and Oceanographic Satellite Data Archival Center.(MOSDAC)with link (https://www.mosdac.gov.in/)
If both 'KG FACTS' and 'DOCUMENTS' are weak or indicate no results, provide a helpful fallback answer based on general knowledge about MOSDAC, clarifying that specific information wasn't found.
Ensure your answer directly addresses the USER QUESTION and avoids making up information.

USER QUESTION: {user_question}

ANSWER:
"""
    try:
        # Direct LLM invocation, no JSON parsing
        response = await llm.ainvoke(prompt)
        print("\n✅ FINAL OUTPUT:")
        print(response.content) # Print the content of the AI message
    except Exception as e:
        print(f"\n❌ Failed to generate final answer: {e}")

# --- Step 6: Run Demo Questions ---
print("\n--- Step 6: Executing Tests ---")
nest_asyncio.apply()




--- Step 1: Installing and Importing Libraries ---
✅ Installations and Imports Complete.

--- Step 2: Configuring Connections ---
Mounted at /content/drive

Copying ChromaDB from Google Drive to local Colab...
✅ ChromaDB copy complete.
✅ Environment and Paths Configured.

--- Step 3: Initializing Models ---
✅ LLM Initialized.
✅ Embedding Model Loaded.
✅ Connected to Neo4j KG.
✅ Vector DB Loaded: 61606 documents.

--- Step 4: Defining Query Functions ---

--- Step 5: Defining Test Function ---

--- Step 6: Executing Tests ---


In [19]:
test_questions = [

    "What are you"
]

async def run_all_tests():
    for q in test_questions:
        await test_hybrid_search(q)

# RUN TESTS
await run_all_tests()
print("\n✅ All tests completed.")

❓ TESTING QUESTION: What are you
🧠 Querying Knowledge Graph...
📚 Querying Vector DB...

--- INTERMEDIATE RESULTS ---
🧠 KG Context:
KG: No relevant entities found for this query.

📚 Vector DB Context:
[https://www.mosdac.gov.in/node/464/27?sort=desc&order=Display+name]
Link Text INSAT-3D Sounder Cloud Mask Target URL Context ...4 127 Wed 2017-04-12 10 58 alt text INSAT-3D Sounder Cloud Mask Fri 2019-03-01 11 00 alt text KALPANA 1 Winds

[https://www.mosdac.gov.in/node/464/27]
Link Text alt text Target URL Context ... INSAT-3D-Winds Wed 2017-04-12 10 58 alt text Go up Wed 2018-06-13 10 08 6 folders Search Search Follow Us Facebook icon https ...

[https://www.mosdac.gov.in/node/464/27?sort=desc&order=Display+name]
Link Text INSAT-3D-Winds Target URL Context ...ports Wed 2018-06-13 10 08 alt text INSAT-3D-Winds Wed 2017-04-12 10 58 alt text INSAT-3D Sounder Cloud Mask https ...
----------------------------


✅ FINAL OUTPUT:
 You are interacting with OrbitBot, a smart AI assistant bot that