In [10]:
import os
import glob
import math
import google.generativeai as genai

# --- 1. Setup Gemini ---
genai.configure(api_key="AIzaSyDUzi1aauluPA859Ef5m73p2zzQ9FWdG1k")  # 🔑 replace with your API key
model_name = "gemini-1.5-flash"

# --- 2. Load + chunk docs ---
def chunk_text(text, max_length=500):
    return [text[i:i + max_length] for i in range(0, len(text), max_length)]

# Path to your docs
doc_files = glob.glob("/home/harish/Desktop/chatbot/docs/**/*.*", recursive=True)
#print("Found files:", doc_files)
doc_files = [f for f in doc_files if f.endswith((".md", ".txt", ".html"))]
#".md", ".mdx", ".txt", ".html", ".rst"
if not doc_files:
    raise ValueError("❌ No documents found. Check your docs path and file extensions!")
# for c in chunks[:5]:
#     print(c["text"][:200])

chunks = []
for f in doc_files:
    with open(f, "r", encoding="utf-8") as file:
        text = file.read()
    for chunk in chunk_text(text):
        embedding = genai.embed_content(
            model="models/text-embedding-004",
            content=chunk
        )
        chunks.append({"text": chunk, "vector": embedding["embedding"]})

if not chunks:
    raise ValueError("❌ No chunks created. Are your documents empty?")

print(f"✅ Loaded {len(chunks)} chunks from {len(doc_files)} documents.")

# --- 3. Cosine similarity helper ---
def cosine_similarity(v1, v2):
    dot = sum(a * b for a, b in zip(v1, v2))
    mag1 = math.sqrt(sum(x ** 2 for x in v1))
    mag2 = math.sqrt(sum(x ** 2 for x in v2))
    return dot / (mag1 * mag2)

# --- 4. Setup Chat Model ---
chat_model = genai.GenerativeModel(model_name)

# --- 5. Ask Bot function ---
def ask_bot(question: str) -> str:
    """Ask the chatbot a question based on your docs."""
    # Embed question
    q_embed = genai.embed_content(
        model="models/text-embedding-004",
        content=question
    )["embedding"]

    # Rank chunks by similarity
    ranked = sorted(
        [(c, cosine_similarity(q_embed, c["vector"])) for c in chunks],
        key=lambda x: -x[1]
    )[:3]

    # Build prompt
    context = "\n---\n".join(c["text"] for c, _ in ranked)
    prompt = f"""
    You are an assistant that answers questions based only on the following document excerpts.
    If the answer isn't clearly in the documents, reply with "I don't know."
    DOCUMENTS:
    {context}
    QUESTION: {question}
    """

    # Ask Gemini
    response = chat_model.generate_content([{"role": "user", "parts": [prompt]}])
    return response.text


✅ Loaded 855 chunks from 83 documents.


In [13]:
ask_bot("Development Setup Guide")

'The Development Setup Guide is located at `./development/setup.md`.\n'

In [16]:
ask_bot("explain about import system")

'The Import System is a robust, scalable solution for importing CSV data from ZIP files into the MGR platform.  It supports batch processing, progress tracking, and comprehensive error handling.  Business accounts can upload ZIP files containing CSV data, which is then processed asynchronously and imported as `ThirdPartyImportedDatum` records.  The system includes a `Imports::Process` command that orchestrates the import process.\n'

In [17]:
ask_bot("explain about development setup guilde")

'The development setup guide helps you set up the MGR API development environment locally.  Prerequisites include Ruby 3.0.0+, Rails 7.0.0+, PostgreSQL 12.0+, Redis 6.0+ (for Sidekiq), Node.js 16.0+, and Git (latest version).  Optional tools include Docker, pgAdmin, and the Redis CLI.  A link to a more detailed setup guide (`./development/setup.md`) is provided.\n'

In [18]:
ask_bot("explain about error handling")

'Error handling encompasses several aspects:\n\n**Error Types:**  The document identifies validation errors (business rule violations), authorization errors (permission failures), system errors (infrastructure issues), and external service errors (third-party API failures).  Another categorization includes transient errors (network issues, temporary unavailability), permanent errors (invalid data, missing records), and system errors (out of memory, disk space issues).\n\n**Error Handling Patterns:** The provided Ruby code demonstrates a `begin...rescue` block to catch and handle exceptions.  Specific error types can be rescued individually, with tailored responses. A general `rescue` clause catches unexpected errors.  The example also shows handling retry mechanisms and marking a process as failed after multiple attempts.  A consistent return value of `[boolean, object]` is used to indicate success or failure and return relevant data.  Error messages are added using `add_error`.  Loggi

In [19]:
ask_bot("you know about error recovery")

'Based on the provided text, error recovery is handled in a few ways:\n\n* **Retry mechanism:** The `find_in_batches` method processes large datasets in chunks, implicitly handling potential transient errors during processing of individual batches.  The code example with `retries_exhausted` shows explicit retry logic; if retries are exhausted, the import is marked as failed.\n\n* **Specific error handling:**  The `rescue SomeSpecificError` block catches and handles specific error types, allowing for custom error responses.\n\n* **Generic error handling:** The `rescue => e` block catches unexpected errors, logs them, and provides a generic error message.  This acts as a fallback mechanism for unforeseen issues.\n'

In [20]:
ask_bot("how to add new staff give all steps")

'1. **Access Staff Module:** Log into the admin dashboard and navigate to **Dashboard → Staff**. Click the **"Add New Staff"** button.  The URL is `https://coreology.staging.mgrapp.com/next/admin/staff`.\n\n2. **Enter General Details:** Fill in the required information.  The document does not specify what those required fields are.\n\n3.  **(Information missing from provided text):**  The document cuts off before detailing steps for adding additional information, assigning roles, and reviewing a summary.\n'

In [21]:
ask_bot("kya aapke ghar me pintu hai")

"I don't know.\n"

In [22]:
ask_bot("how can i deactivate my account")

"I don't know.  The provided text describes how to deactivate staff and client accounts, but not how to deactivate your own account.\n"

In [23]:
ask_bot("how can i deactivate account")

