In [4]:
import os
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer

print("Loading the sentence-transformer model 'all-MiniLM-L6-v2'...")
model = SentenceTransformer('all-MiniLM-L6-v2')
print("Model loaded successfully.")

Loading the sentence-transformer model 'all-MiniLM-L6-v2'...
Model loaded successfully.


In [5]:
def load_documents(path):
    """
    Loads all .log, .json, .txt, and .md files from a directory.
    """
    documents = []
    for root, _, files in os.walk(path):
        for file_name in files:
            # Only read file types we care about
            if file_name.endswith(('.log', '.json', '.txt', '.md')):
                file_path = os.path.join(root, file_name)
                try:
                    with open(file_path, 'r', encoding='utf-8') as f:
                        documents.append({
                            'name': file_name,
                            'path': file_path,
                            'content': f.read()
                        })
                except Exception as e:
                    print(f"Error reading {file_path}: {e}")
    return documents

print("\nLoading all runbooks and logs from the '../data' directory...")
all_docs = load_documents('../data')
doc_contents = [doc['content'] for doc in all_docs]
print(f"Successfully loaded {len(all_docs)} documents.")



Loading all runbooks and logs from the '../data' directory...
Successfully loaded 12 documents.


In [6]:
print("\nCreating embeddings for all documents... (This may take a moment)")
doc_embeddings = model.encode(doc_contents, convert_to_tensor=False)
print(f"Embeddings created successfully. Vector shape: {doc_embeddings.shape}")



Creating embeddings for all documents... (This may take a moment)
Embeddings created successfully. Vector shape: (12, 384)


In [7]:
d = doc_embeddings.shape[1]

print("\nBuilding the FAISS index...")
index = faiss.IndexFlatL2(d)
index.add(doc_embeddings)
print(f"FAISS index built. Total vectors in index: {index.ntotal}")



Building the FAISS index...
FAISS index built. Total vectors in index: 12


In [8]:
def search(query, k=3):
    """
    Takes a text query, embeds it, and searches the FAISS index for the top k results.
    """
    print(f"\n==============================================================")
    print(f"Searching for top {k} documents matching query: '{query}'")
    print(f"==============================================================")

    query_embedding = model.encode([query])

    distances, indices = index.search(query_embedding, k)

    print("\n--- Search Results ---")
    for i, idx in enumerate(indices[0]):
        print(f"\n{i+1}. Document: {all_docs[idx]['name']} (Score/Distance: {distances[0][i]:.4f})")
        print("--------------------------------------------------")
        print(f"{all_docs[idx]['content'][:450]}...")

In [9]:
search("The auth-service has high CPU usage and is exhausted")

search("I'm getting database connection timeouts and latency spikes from the payment gateway")

search("My search-engine service is in a crash loop after the last deployment and is unavailable")


Searching for top 3 documents matching query: 'The auth-service has high CPU usage and is exhausted'

--- Search Results ---

1. Document: incident_001_cpu_spike.log (Score/Distance: 0.6258)
--------------------------------------------------

2. Document: high_cpu_restart.md (Score/Distance: 0.9922)
--------------------------------------------------
# High CPU Restart Procedure

## 🔍 Incident Symptoms
- An incident is flagged as an anomaly with a score of **1.0**, indicating a `metricValue` (CPU Usage) of **> 90%**.
- The `eventType` in the alert payload is `ResourceExhaustion`.
- The `service` field identifies a specific component, like `auth-service`, as the source.
- P99 latency for the service is significantly elevated.

## 📈 Possible Causes
- **Stuck Process:** A worker thread or proces...

3. Document: database_latency.md (Score/Distance: 1.2996)
--------------------------------------------------
# High Database Query Latency

## 🔍 Incident Symptoms
- The `eventType` is `Latency