# Music RAG System - Interactive Demo

This notebook demonstrates the Music RAG (Retrieval-Augmented Generation) system for music discovery.

## Features Demonstrated:
- Text-based music search
- Dual-track retrieval (broad + targeted)
- Metadata filtering
- Hybrid search strategies
- Evaluation metrics

In [None]:
# Setup - Add parent directory to path
import sys
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent))

# Imports
from music_rag.cli import MusicRAGSystem
from music_rag.src.models.music_item import RetrievalQuery
from music_rag.src.data.sample_data_generator import generate_sample_music_data
from music_rag.src.retrieval.evaluation import evaluate_retrieval
import pandas as pd

## 1. Initialize the System

In [None]:
# Initialize Music RAG system
system = MusicRAGSystem(db_path="../data/chromadb_notebook")

# Generate sample data
sample_items = generate_sample_music_data()

# Display sample items
print(f"Generated {len(sample_items)} sample music items\n")
for item in sample_items[:3]:
    print(f"- {item.title} by {item.artist}")
    print(f"  Genre: {item.metadata.genre}, Mood: {item.metadata.mood}")
    print()

## 2. Index Music Items

In [None]:
# Index the sample items
system.index_music_items(sample_items)

# Show database stats
stats = system.db.get_stats()
print(f"\nDatabase Statistics:")
print(f"  Text embeddings: {stats['text_embeddings_count']}")
print(f"  Audio embeddings: {stats['audio_embeddings_count']}")

## 3. Basic Text Search

In [None]:
# Simple text search
query = RetrievalQuery(
    text_query="upbeat energetic dance music",
    top_k=5
)

results = system.search(query)

print("Search: 'upbeat energetic dance music'\n")
for i, result in enumerate(results, 1):
    item = result.music_item
    print(f"{i}. {item.title} - {item.artist}")
    print(f"   Score: {result.score:.3f} ({result.retrieval_type})")
    print(f"   Genre: {item.metadata.genre}")
    print(f"   Mood: {', '.join(item.metadata.mood)}")
    print()

## 4. Search with Metadata Filters

In [None]:
# Search with genre filter
query = RetrievalQuery(
    text_query="meditative spiritual music",
    top_k=3,
    genre_filter=["Indian Classical", "Middle Eastern"]
)

results = system.search(query)

print("Search: 'meditative spiritual music' with genre filters\n")
for i, result in enumerate(results, 1):
    item = result.music_item
    print(f"{i}. {item.title} - {item.artist}")
    print(f"   Score: {result.score:.3f}")
    print(f"   Genre: {item.metadata.genre}")
    print()

## 5. Dual-Track Retrieval Comparison

In [None]:
# Compare broad-only vs targeted-only vs dual-track
query_text = "powerful orchestral symphony"

# Broad only
query_broad = RetrievalQuery(
    text_query=query_text,
    top_k=3,
    use_broad_retrieval=True,
    use_targeted_retrieval=False
)

# Targeted only
query_targeted = RetrievalQuery(
    text_query=query_text,
    top_k=3,
    use_broad_retrieval=False,
    use_targeted_retrieval=True,
    genre_filter=["Classical", "Rock"]
)

# Dual-track
query_dual = RetrievalQuery(
    text_query=query_text,
    top_k=3,
    use_broad_retrieval=True,
    use_targeted_retrieval=True,
    genre_filter=["Classical", "Rock"]
)

results_broad = system.search(query_broad)
results_targeted = system.search(query_targeted)
results_dual = system.search(query_dual)

print("Comparison of Retrieval Strategies\n")
print("="*60)
print("\nBroad Retrieval Only:")
for r in results_broad:
    print(f"  - {r.music_item.title} (score: {r.score:.3f})")

print("\nTargeted Retrieval Only:")
for r in results_targeted:
    print(f"  - {r.music_item.title} (score: {r.score:.3f})")

print("\nDual-Track Retrieval:")
for r in results_dual:
    print(f"  - {r.music_item.title} (score: {r.score:.3f}, type: {r.retrieval_type})")

## 6. Evaluation Metrics

In [None]:
# Example evaluation
query = RetrievalQuery(text_query="jazz saxophone music", top_k=10)
results = system.search(query)

retrieved_ids = [r.music_item.id for r in results]

# Define ground truth (for demo purposes)
relevant_ids = {'2', '8'}  # Miles Davis and Bossa Nova
relevance_scores = {'2': 1.0, '8': 0.8}

# Calculate metrics
metrics = evaluate_retrieval(
    retrieved_ids=retrieved_ids,
    relevant_ids=relevant_ids,
    relevance_scores=relevance_scores,
    k_values=[1, 3, 5]
)

print("Evaluation Metrics:")
for metric, value in metrics.items():
    print(f"  {metric}: {value:.3f}")

## 7. Explore Dataset

In [None]:
# Create a dataframe of the music items
data = []
for item in sample_items:
    data.append({
        'Title': item.title,
        'Artist': item.artist,
        'Genre': item.metadata.genre,
        'Cultural Origin': item.metadata.cultural_origin,
        'Tempo (BPM)': item.metadata.tempo,
        'Mood': ', '.join(item.metadata.mood[:2]) if item.metadata.mood else 'N/A',
        'Duration (s)': item.metadata.duration,
        'Live': item.metadata.is_live_performance
    })

df = pd.DataFrame(data)
df

## 8. Custom Query Examples

In [None]:
# Try your own queries!
custom_queries = [
    "relaxing acoustic guitar",
    "fast tempo percussion",
    "romantic ballad",
    "electronic synthesizer music"
]

for query_text in custom_queries:
    print(f"\nQuery: '{query_text}'")
    print("-" * 50)
    
    query = RetrievalQuery(text_query=query_text, top_k=3)
    results = system.search(query)
    
    for i, result in enumerate(results, 1):
        print(f"{i}. {result.music_item.title} - {result.music_item.artist}")
        print(f"   Score: {result.score:.3f}")