# Spark Recommender - Interactive Notebook

This notebook demonstrates the Spark Recommender system components.

## 1. Setup and Configuration

First, let's import the required libraries and set up our environment.

In [None]:
import os
from dotenv import load_dotenv
import asyncio

# Load environment variables
load_dotenv()

# Import our modules
from mcp_server.kusto_client import KustoClient
from rag.indexer import SparkDocIndexer
from rag.retriever import SparkDocRetriever
from agent.orchestrator import SparkRecommenderAgent
from agent.judge import JudgeAgent

print("✓ All modules imported successfully")

## 2. Index Spark Documentation

Index documentation files into Azure AI Search.

In [None]:
# Initialize the indexer
indexer = SparkDocIndexer()

# Create the search index
indexer.create_index()
print("✓ Search index created")

# Index documents from the docs folder
docs_path = "rag/docs"
result = indexer.index_from_directory(docs_path)
print(f"✓ Indexed documents: {result}")

## 3. Test Document Retrieval

Test retrieving relevant documentation based on a query.

In [None]:
# Initialize retriever
retriever = SparkDocRetriever()

# Test search
query = "How to optimize Spark shuffle operations?"
results = retriever.search(query, top_k=3)

print(f"Found {len(results)} relevant documents:\n")
for i, doc in enumerate(results, 1):
    print(f"{i}. {doc['title']} (score: {doc['score']:.2f})")
    print(f"   {doc['content'][:150]}...\n")

## 4. Query Kusto for Telemetry Data

Retrieve Spark job telemetry from Kusto.

In [None]:
# Initialize Kusto client
kusto = KustoClient()

# Query recent Spark jobs
query = """
SparkTelemetry
| where timestamp > ago(1d)
| take 10
"""

try:
    results = kusto.query(query)
    print("✓ Kusto query executed successfully")
    print(f"Results: {results}")
except Exception as e:
    print(f"⚠️ Kusto query failed: {e}")
    print("Make sure your Kusto credentials are configured correctly")

## 5. Generate Recommendations

Use the agent orchestrator to generate Spark optimization recommendations.

In [None]:
# Initialize the recommender agent
agent = SparkRecommenderAgent()

# Example query
user_question = "My Spark job is taking too long. How can I optimize it?"

# Generate recommendation
async def get_recommendation():
    recommendation = await agent.get_recommendation(
        user_query=user_question,
        job_id=None  # Set to a specific job ID if you have one
    )
    return recommendation

# Run async function
recommendation = await get_recommendation()
print("Recommendation:")
print("=" * 80)
print(recommendation)

## 6. Evaluate Recommendations

Use the judge agent to evaluate recommendation quality.

In [None]:
# Initialize judge
judge = JudgeAgent()

# Evaluate the recommendation
async def evaluate():
    evaluation = await judge.evaluate_recommendation(
        recommendation=recommendation,
        context=user_question
    )
    return evaluation

evaluation_result = await evaluate()
score = judge.parse_score(evaluation_result)

print("Evaluation:")
print("=" * 80)
print(evaluation_result)
print(f"\nQuality Score: {score}/10")

## 7. End-to-End Example

Complete workflow from query to evaluated recommendation.

In [None]:
async def full_recommendation_workflow(question: str, job_id: str = None):
    """Complete recommendation workflow"""
    
    print(f"Question: {question}\n")
    
    # Step 1: Generate recommendation
    print("Step 1: Generating recommendation...")
    rec = await agent.get_recommendation(question, job_id)
    
    # Step 2: Evaluate recommendation
    print("Step 2: Evaluating quality...")
    eval_result = await judge.evaluate_recommendation(rec, question)
    quality_score = judge.parse_score(eval_result)
    
    # Step 3: Display results
    print("\n" + "=" * 80)
    print("RECOMMENDATION:")
    print("=" * 80)
    print(rec)
    print("\n" + "=" * 80)
    print(f"Quality Score: {quality_score}/10")
    
    return rec, quality_score

# Run example
test_question = "What are the best practices for partitioning in Spark?"
await full_recommendation_workflow(test_question)