In [3]:
# Import necessary libraries
import os
import json
import numpy as np
from groq import Groq
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import faiss

groq_api_key = "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"

# Initialize the Groq client
client = Groq(api_key=groq_api_key)

# === Helper Functions === #
def load_data(file_path):
    """Load dataset from a JSON file."""
    try:
        with open(file_path, 'r') as f:
            data = json.load(f)
        return data
    except Exception as e:
        print(f"Error loading data: {e}")
        return []

def query_groq_llm(prompt, context):
    """Send a prompt and context to the Groq LLM chat completion API."""
    try:
        chat_completion = client.chat.completions.create(
            messages=[
                {"role": "system", "content": "You are an assistant providing district insights."},
                {"role": "user", "content": f"{prompt}\n\nContext:\n{context}"}
            ],
            model="llama3-8b-8192"  # Replace with the appropriate model Groq provides
        )
        return chat_completion.choices[0].message.content
    except Exception as e:
        print(f"Error communicating with Groq API: {e}")
        return "An error occurred while fetching a response from Groq."

def build_faiss_index(data, model):
    """Build a FAISS index from the dataset."""
    descriptions = []
    
    for d in data:
        try:
            desc = f"District {d['district_name']} with water {d['water']}, medkits {d['medkits']}, food rations {d['food_rations']}, ammo {d['ammo']}, and camp exists {d['camp_exists']}"
            descriptions.append(desc)
        except KeyError as e:
            print(f"Missing field in data entry: {e}")

    if not descriptions:
        print("No valid descriptions were created. Check your data formatting.")
        return None, None

    # Encode descriptions
    embeddings = model.encode(descriptions)
    
    # Print embedding shape for debugging
    print(f"Embeddings shape: {embeddings.shape}")

    dim = embeddings.shape[1] if embeddings.shape else 0
    if dim == 0:
        print("Embedding dimension is zero, possibly due to empty descriptions.")
        return None, None

    index = faiss.IndexFlatL2(dim)
    index.add(np.array(embeddings, dtype=np.float32))
    return index, descriptions


def search_faiss(query, top_k=5):
    """Search the FAISS index for the most relevant entries."""
    query_embedding = model.encode([query])
    distances, indices = index.search(np.array(query_embedding, dtype=np.float32), top_k)
    return distances[0], indices[0]

def format_results(indices, distances):
    """Format FAISS search results."""
    results = []
    for idx, dist in zip(indices, distances):
        if idx < len(data):
            result = data[idx].copy()
            result['similarity'] = 1 / (1 + dist)  # Convert distance to similarity
            results.append(result)
    return results

# === Load Data === #
file_path = "parsed_districts.json"  # Adjust path as needed
data = load_data(file_path)

# === Initialize Model === #
model = SentenceTransformer('all-MiniLM-L6-v2')  # Load SentenceTransformer model

# === Build FAISS Index === #
index, descriptions = build_faiss_index(data, model)

# === Interactive Query System === #
def query_system():
    """Interactive query system with FAISS and LLM augmentation."""
    print("Welcome to the District Query System with LLM augmentation!")
    print("Type your query (e.g., 'Which districts have high water reserves, medkits, and ammo?'):")

    while True:
        user_query = input("\nEnter your query (or type 'exit' to quit): ").strip()
        if user_query.lower() == 'exit':
            print("Exiting. Thank you!")
            break

        print("\nSearching for districts...\n")
        try:
            # Search FAISS index
            distances, indices = search_faiss(user_query, top_k=5)
            results = format_results(indices, distances)

            if results:
                print("\nTop relevant districts:")
                context = ""
                for i, result in enumerate(results):
                    print(f"\nResult {i+1}:")
                    print(f"District Name: {result['district_name']}")
                    print(f"Water Reserves: {result['water']}")
                    print(f"Medkits: {result['medkits']}")
                    print(f"Food Rations: {result['food_rations']}")
                    print(f"Ammo Count: {result['ammo']}")
                    print(f"Camps Exist: {result['camp_exists']}")
                    print(f"Similarity Score: {result['similarity']:.4f}")
                    # Add district info to context for LLM
                    context += (
                        f"District {result['district_name']}: "
                        f"Water={result['water']}, Medkits={result['medkits']}, "
                        f"Food Rations={result['food_rations']}, Ammo={result['ammo']}, "
                        f"Camps Exist={result['camp_exists']}\n"
                    )

                # Query the Groq LLM with the user's query and context
                print("\nEnhancing results using Groq LLM...")
                llm_response = query_groq_llm(user_query, context)
                print("\nGroq LLM Response:")
                print(llm_response)
            else:
                print("No matching districts found.")
        except Exception as e:
            print(f"An error occurred: {e}")

# Run the system
if __name__ == "__main__":
    query_system()




Embeddings shape: (30, 384)
Welcome to the District Query System with LLM augmentation!
Type your query (e.g., 'Which districts have high water reserves, medkits, and ammo?'):

Searching for districts...


Top relevant districts:

Result 1:
District Name: Kendrapara
Water Reserves: 21503
Medkits: None
Food Rations: None
Ammo Count: 0
Camps Exist: False
Similarity Score: 0.5196

Result 2:
District Name: Kendujhar
Water Reserves: None
Medkits: None
Food Rations: None
Ammo Count: 0
Camps Exist: False
Similarity Score: 0.5173

Result 3:
District Name: Kandhamal
Water Reserves: None
Medkits: None
Food Rations: None
Ammo Count: 0
Camps Exist: False
Similarity Score: 0.5133

Result 4:
District Name: Boudh
Water Reserves: None
Medkits: None
Food Rations: None
Ammo Count: 0
Camps Exist: False
Similarity Score: 0.5118

Result 5:
District Name: Puri
Water Reserves: 21798
Medkits: None
Food Rations: None
Ammo Count: 0
Camps Exist: False
Similarity Score: 0.5096

Enhancing results using Groq LLM...