In [None]:
# Cell 1: Header and Setupimport syssys.path.insert(0, '..')from utils.notebook_utils import display_header, display_toc, check_dependency, conclusion_box, info_box, warning_boxfrom utils.system_info import display_system_infofrom utils.benchmark import Benchmark, BenchmarkResult, ComparisonTablefrom utils.charts import setup_style, bar_comparison, COLORSdisplay_header('Haystack & Semantic Kernel', 'SynaDB Multi-Framework Integration')

In [None]:
# Cell 2: Table of Contentssections = [    ('Introduction', 'introduction'),    ('Setup', 'setup'),    ('SynaDocumentStore Demo', 'documentstore'),    ('Haystack Pipeline Demo', 'haystack-pipeline'),    ('Semantic Kernel Memory Demo', 'sk-memory'),    ('SK Semantic Functions', 'sk-functions'),    ('Cross-Framework Comparison', 'comparison'),    ('Data Portability Demo', 'portability'),    ('Results Summary', 'results'),    ('Conclusions', 'conclusions'),]display_toc(sections)

## 📌 Introduction <a id="introduction"></a>This notebook demonstrates **SynaDB's integration with Haystack and Semantic Kernel**, showing how SynaDB can serve as a unified backend across multiple LLM frameworks.### Frameworks Covered| Framework | Component | Purpose ||-----------|-----------|---------|| **Haystack** | `SynaDocumentStore` | Document storage and retrieval for RAG || **Semantic Kernel** | Memory Store | Semantic memory for AI agents |### Why Multi-Framework Support?| Benefit | Description ||---------|-------------|| **Flexibility** | Choose the best framework for each use case || **Data Portability** | Share data between frameworks via SynaDB || **Unified Storage** | Single database for all LLM applications || **Easy Migration** | Switch frameworks without data migration |### What We'll Demonstrate1. **SynaDocumentStore** - Haystack document storage2. **Haystack Pipeline** - Building retrieval pipelines3. **Semantic Kernel Memory** - AI agent memory storage4. **SK Semantic Functions** - Using memory in semantic functions5. **Cross-Framework Comparison** - Same task in different frameworks6. **Data Portability** - Sharing data between frameworks

In [None]:
# Cell 4: System Infodisplay_system_info()

## 🔧 Setup <a id="setup"></a>Let's set up our environment and check for required dependencies.

In [None]:
# Cell 6: Check Dependencies and Importsimport numpy as npimport timeimport osimport tempfilefrom pathlib import Path# Check for SynaDBHAS_SYNADB = check_dependency('synadb', 'pip install synadb')# Check for HaystackHAS_HAYSTACK = check_dependency('haystack', 'pip install haystack-ai')# Check for Semantic Kernel (optional)HAS_SK = check_dependency('semantic_kernel', 'pip install semantic-kernel')# Apply consistent stylingsetup_style()print("\n✓ Dependencies checked")print(f"  SynaDB: {'✓' if HAS_SYNADB else '✗'}")print(f"  Haystack: {'✓' if HAS_HAYSTACK else '✗'}")print(f"  Semantic Kernel: {'✓' if HAS_SK else '✗'}")

In [None]:
# Cell 7: Create Temp Directory for Databasestemp_dir = tempfile.mkdtemp(prefix='synadb_multiframework_')print(f'Using temp directory: {temp_dir}')# Paths for databaseshaystack_path = os.path.join(temp_dir, 'haystack.db')sk_path = os.path.join(temp_dir, 'semantic_kernel.db')shared_path = os.path.join(temp_dir, 'shared.db')

In [None]:
# Cell 8: Create Mock Embedding Function (for demo without API keys)def mock_embed(text: str, dimensions: int = 384) -> list:    """Generate deterministic embedding for text."""    np.random.seed(hash(text) % 2**32)    embedding = np.random.randn(dimensions).astype(np.float32)    embedding = embedding / np.linalg.norm(embedding)    return embedding.tolist()def mock_embed_batch(texts: list, dimensions: int = 384) -> list:    """Generate embeddings for multiple texts."""    return [mock_embed(text, dimensions) for text in texts]print(f"✓ Created mock embedding functions")print(f"  Sample embedding dimension: {len(mock_embed('test'))}")

## 📦 SynaDocumentStore Demo <a id="documentstore"></a>The `SynaDocumentStore` class implements Haystack's DocumentStore interface, enabling:- Document storage with embeddings- Metadata filtering- Integration with Haystack pipelines

In [None]:
# Cell 10: SynaDocumentStore Demoif HAS_SYNADB and HAS_HAYSTACK:    from synadb.integrations.haystack import SynaDocumentStore    from haystack import Document    from haystack.document_stores.types import DuplicatePolicy        print("Creating SynaDocumentStore...")        # Create document store    doc_store = SynaDocumentStore(        path=haystack_path,        embedding_dim=384,        metric="cosine"    )        # Sample documents    sample_docs = [        Document(            content="Machine learning is a subset of artificial intelligence that enables systems to learn from data.",            meta={"category": "ml", "topic": "basics"},            embedding=mock_embed("Machine learning is a subset of artificial intelligence")        ),        Document(            content="Deep learning uses neural networks with multiple layers to process complex patterns.",            meta={"category": "ml", "topic": "deep_learning"},            embedding=mock_embed("Deep learning uses neural networks")        ),        Document(            content="Natural language processing allows computers to understand human language.",            meta={"category": "nlp", "topic": "basics"},            embedding=mock_embed("Natural language processing")        ),        Document(            content="SynaDB is an AI-native embedded database for machine learning workloads.",            meta={"category": "database", "topic": "synadb"},            embedding=mock_embed("SynaDB is an AI-native embedded database")        ),        Document(            content="Haystack is a framework for building NLP applications and search systems.",            meta={"category": "framework", "topic": "haystack"},            embedding=mock_embed("Haystack is a framework for building NLP applications")        ),    ]        # Write documents    start = time.perf_counter()    written = doc_store.write_documents(sample_docs, policy=DuplicatePolicy.OVERWRITE)    write_time = (time.perf_counter() - start) * 1000        print(f"✓ Wrote {written} documents in {write_time:.2f}ms")    print(f"  Total documents: {doc_store.count_documents()}")    print(f"  Path: {haystack_path}")else:    print("⚠️ SynaDB or Haystack not available, skipping document store demo")    doc_store = None

In [None]:
# Cell 11: Filter Documents Demoif doc_store:    print("Filtering documents by metadata...\n")        # Get all documents    print("All documents:")    print("-" * 50)    all_docs = doc_store.filter_documents()    for doc in all_docs:        print(f"  [{doc.meta.get('category', 'N/A')}] {doc.content[:50]}...")        # Filter by category    print("\nML documents only:")    print("-" * 50)    ml_docs = doc_store.filter_documents(filters={"category": "ml"})    for doc in ml_docs:        print(f"  [{doc.meta.get('topic', 'N/A')}] {doc.content[:50]}...")        print(f"\n✓ Found {len(all_docs)} total, {len(ml_docs)} ML documents")else:    print("⚠️ Document store not available")

## 🔗 Haystack Pipeline Demo <a id="haystack-pipeline"></a>Let's demonstrate building a Haystack retrieval pipeline with SynaDB as the document store.> **Note:** This demo simulates pipeline components. In production, you would use real retrievers and generators.

In [None]:
# Cell 13: Haystack Pipeline Demoif doc_store and HAS_HAYSTACK:    print("Building Haystack Pipeline with SynaDB...\n")        # Simulate a retrieval pipeline    class MockRetriever:        """Mock retriever for demonstration."""        def __init__(self, document_store, top_k=3):            self.document_store = document_store            self.top_k = top_k                def run(self, query: str):            """Retrieve documents based on query."""            # Get all documents and compute similarity            all_docs = self.document_store.filter_documents()            query_embedding = np.array(mock_embed(query))                        scored_docs = []            for doc in all_docs:                if doc.embedding:                    doc_embedding = np.array(doc.embedding)                    similarity = np.dot(query_embedding, doc_embedding)                    scored_docs.append((doc, similarity))                        # Sort by similarity and return top_k            scored_docs.sort(key=lambda x: x[1], reverse=True)            return {"documents": [doc for doc, _ in scored_docs[:self.top_k]]}        # Create retriever    retriever = MockRetriever(doc_store, top_k=3)        # Test queries    queries = [        "What is machine learning?",        "Tell me about SynaDB",        "How does Haystack work?",    ]        print("Running retrieval pipeline:")    print("=" * 60)        for query in queries:        print(f"\nQuery: '{query}'")        print("-" * 50)                start = time.perf_counter()        result = retriever.run(query)        retrieval_time = (time.perf_counter() - start) * 1000                for i, doc in enumerate(result["documents"], 1):            print(f"  {i}. [{doc.meta.get('category')}] {doc.content[:60]}...")                print(f"  ⏱️ Retrieval time: {retrieval_time:.2f}ms")        print("\n✓ Pipeline demonstration complete")else:    print("⚠️ Document store not available, skipping pipeline demo")

## 🧠 Semantic Kernel Memory Demo <a id="sk-memory"></a>Semantic Kernel uses memory stores for semantic memory in AI agents. Let's demonstrate how SynaDB can serve as a memory backend.> **Note:** This demo simulates Semantic Kernel memory patterns. In production, you would use the actual SK memory interface.

In [None]:
# Cell 15: Semantic Kernel Memory Demoif HAS_SYNADB:    from synadb import SynaDB, VectorStore        print("Demonstrating Semantic Kernel Memory Pattern with SynaDB...\n")        # Create a memory store using SynaDB    class SynaMemoryStore:        """Semantic Kernel-style memory store backed by SynaDB."""                def __init__(self, path: str, dimensions: int = 384):            self.vector_store = VectorStore(path, dimensions=dimensions, metric="cosine")            self.db = SynaDB(path)            self._memories = {}  # In-memory cache for metadata                def save_information(self, collection: str, id: str, text: str, description: str = ""):            """Save information to memory."""            key = f"{collection}/{id}"            embedding = np.array(mock_embed(text), dtype=np.float32)            self.vector_store.insert(key, embedding)            self._memories[key] = {"text": text, "description": description}            return key                def search(self, collection: str, query: str, limit: int = 3):            """Search memory for relevant information."""            query_embedding = np.array(mock_embed(query), dtype=np.float32)            results = self.vector_store.search(query_embedding, k=limit * 2)                        # Filter by collection            filtered = []            for r in results:                if r.key.startswith(f"{collection}/"):                    memory = self._memories.get(r.key, {})                    filtered.append({                        "id": r.key,                        "text": memory.get("text", ""),                        "relevance": r.score                    })                    if len(filtered) >= limit:                        break            return filtered                def get(self, collection: str, id: str):            """Get specific memory by ID."""            key = f"{collection}/{id}"            return self._memories.get(key)        # Create memory store    memory = SynaMemoryStore(sk_path, dimensions=384)        # Save some memories    print("Saving memories to different collections...")        # Facts collection    memory.save_information("facts", "ml_def",         "Machine learning is a type of AI that learns from data",        "Definition of machine learning")    memory.save_information("facts", "dl_def",        "Deep learning uses neural networks with many layers",        "Definition of deep learning")    memory.save_information("facts", "synadb_def",        "SynaDB is an embedded database for AI applications",        "Definition of SynaDB")        # User preferences collection    memory.save_information("preferences", "language",        "User prefers Python for programming",        "Programming language preference")    memory.save_information("preferences", "style",        "User likes concise explanations with examples",        "Communication style preference")        print("✓ Saved 5 memories across 2 collections")        # Search memories    print("\nSearching memories:")    print("-" * 50)        search_queries = [        ("facts", "What is machine learning?"),        ("facts", "Tell me about SynaDB"),        ("preferences", "How does the user like to learn?"),    ]        for collection, query in search_queries:        print(f"\nCollection: '{collection}', Query: '{query}'")        results = memory.search(collection, query, limit=2)        for r in results:            print(f"  • {r['text'][:50]}... (relevance: {r['relevance']:.3f})")        print("\n✓ Memory search demonstration complete")else:    print("⚠️ SynaDB not available, skipping SK memory demo")    memory = None

## ⚡ SK Semantic Functions <a id="sk-functions"></a>Let's demonstrate how semantic functions can use SynaDB memory for context-aware responses.

In [None]:
# Cell 17: SK Semantic Functions Demoif memory:    print("Demonstrating Semantic Functions with SynaDB Memory...\n")        # Simulate a semantic function that uses memory    def answer_with_memory(query: str, collection: str = "facts"):        """Answer a question using memory context."""        # Retrieve relevant memories        memories = memory.search(collection, query, limit=3)                # Build context from memories        context = "\n".join([f"- {m['text']}" for m in memories])                # Simulate LLM response (in production, this would call an LLM)        response = f"""Based on my memory, here's what I know:Context retrieved:{context}Answer: The query '{query}' relates to the information above. In a real application, an LLM would synthesize this into a coherent response."""                return {            "response": response,            "memories_used": len(memories),            "context_length": len(context)        }        # Test the semantic function    test_queries = [        "Explain machine learning",        "What database should I use for AI?",    ]        print("Running semantic functions with memory:")    print("=" * 60)        for query in test_queries:        print(f"\nQuery: '{query}'")        print("-" * 50)                result = answer_with_memory(query)        print(f"Memories used: {result['memories_used']}")        print(f"Context length: {result['context_length']} chars")        print(f"\nResponse preview:")        print(result['response'][:300] + "...")        print("\n✓ Semantic function demonstration complete")else:    print("⚠️ Memory store not available, skipping SK functions demo")

## 📊 Cross-Framework Comparison <a id="comparison"></a>Let's compare how the same RAG task is implemented across different frameworks using SynaDB.

In [None]:
# Cell 19: Cross-Framework Comparisonprint("Cross-Framework Comparison: RAG with SynaDB\n")print("=" * 70)# Common test datatest_docs = [    "Python is a versatile programming language for data science.",    "JavaScript runs in web browsers and enables interactive websites.",    "Rust provides memory safety without garbage collection.",]test_query = "What programming language is good for data science?"# Haystack approachif doc_store and HAS_HAYSTACK:    print("\n📦 HAYSTACK APPROACH")    print("-" * 50)        # Already have documents in doc_store    start = time.perf_counter()    all_docs = doc_store.filter_documents()    haystack_time = (time.perf_counter() - start) * 1000        print(f"  Documents in store: {len(all_docs)}")    print(f"  Retrieval time: {haystack_time:.2f}ms")    print("  ✓ Uses SynaDocumentStore with filter_documents()")else:    print("\n📦 HAYSTACK: Not available")# LlamaIndex approach (simulated)print("\n📚 LLAMAINDEX APPROACH")print("-" * 50)if HAS_SYNADB:    from synadb import VectorStore        # Create a vector store    llamaindex_path = os.path.join(temp_dir, 'llamaindex_compare.db')    li_store = VectorStore(llamaindex_path, dimensions=384, metric="cosine")        # Add documents    start = time.perf_counter()    for i, doc in enumerate(test_docs):        embedding = np.array(mock_embed(doc), dtype=np.float32)        li_store.insert(f"doc_{i}", embedding)        # Query    query_embedding = np.array(mock_embed(test_query), dtype=np.float32)    results = li_store.search(query_embedding, k=2)    llamaindex_time = (time.perf_counter() - start) * 1000        print(f"  Documents indexed: {len(test_docs)}")    print(f"  Results found: {len(results)}")    print(f"  Total time: {llamaindex_time:.2f}ms")    print("  ✓ Uses SynaVectorStore with query()")else:    print("  Not available")# Semantic Kernel approachprint("\n🧠 SEMANTIC KERNEL APPROACH")print("-" * 50)if memory:    start = time.perf_counter()        # Save to memory    for i, doc in enumerate(test_docs):        memory.save_information("comparison", f"doc_{i}", doc)        # Search memory    results = memory.search("comparison", test_query, limit=2)    sk_time = (time.perf_counter() - start) * 1000        print(f"  Memories saved: {len(test_docs)}")    print(f"  Results found: {len(results)}")    print(f"  Total time: {sk_time:.2f}ms")    print("  ✓ Uses SynaMemoryStore with search()")else:    print("  Not available")# Summaryprint("\n" + "=" * 70)print("COMPARISON SUMMARY")print("-" * 70)print("""| Framework | Storage Class | Query Method | SynaDB Backend ||-----------|---------------|--------------|----------------|| Haystack | SynaDocumentStore | filter_documents() | ✓ || LlamaIndex | SynaVectorStore | query() | ✓ || Semantic Kernel | SynaMemoryStore | search() | ✓ |All frameworks use the same underlying SynaDB storage!""")

## 🔄 Data Portability Demo <a id="portability"></a>One of SynaDB's key advantages is data portability between frameworks. Let's demonstrate sharing data.

In [None]:
# Cell 21: Data Portability Demoif HAS_SYNADB:    print("Demonstrating Data Portability with SynaDB...\n")        # Create a shared database    shared_store = VectorStore(shared_path, dimensions=384, metric="cosine")    shared_db = SynaDB(shared_path)        # Store data from "Framework A" (simulating Haystack)    print("Framework A (Haystack-style) writes data:")    print("-" * 50)        framework_a_docs = [        ("haystack/doc1", "Haystack enables building search pipelines"),        ("haystack/doc2", "Document stores are central to Haystack"),    ]        for key, text in framework_a_docs:        embedding = np.array(mock_embed(text), dtype=np.float32)        shared_store.insert(key, embedding)        shared_db.put_text(f"meta/{key}", text)        print(f"  ✓ Wrote: {key}")        # Store data from "Framework B" (simulating LlamaIndex)    print("\nFramework B (LlamaIndex-style) writes data:")    print("-" * 50)        framework_b_docs = [        ("llamaindex/node1", "LlamaIndex provides data connectors"),        ("llamaindex/node2", "Query engines power LlamaIndex applications"),    ]        for key, text in framework_b_docs:        embedding = np.array(mock_embed(text), dtype=np.float32)        shared_store.insert(key, embedding)        shared_db.put_text(f"meta/{key}", text)        print(f"  ✓ Wrote: {key}")        # Now read from "Framework C" (simulating Semantic Kernel)    print("\nFramework C (SK-style) reads all data:")    print("-" * 50)        # Search across all data    query = "How do I build search applications?"    query_embedding = np.array(mock_embed(query), dtype=np.float32)    results = shared_store.search(query_embedding, k=4)        print(f"Query: '{query}'")    print(f"Results from shared database:")    for r in results:        text = shared_db.get_text(f"meta/{r.key}") or "N/A"        framework = r.key.split('/')[0]        print(f"  • [{framework}] {text[:50]}... (score: {r.score:.3f})")        print("\n✓ Data portability demonstrated!")    print("  All frameworks can read/write to the same SynaDB database")else:    print("⚠️ SynaDB not available, skipping portability demo")

## 📊 Results Summary <a id="results"></a>Let's summarize the multi-framework integration capabilities demonstrated.

In [None]:
# Cell 23: Results Summaryfrom IPython.display import display, Markdownsummary_table = """### Multi-Framework Integration Summary| Framework | Component | Status | Features Demonstrated ||-----------|-----------|--------|----------------------|| **Haystack** | SynaDocumentStore | ✅ Working | Document storage, filtering, pipeline integration || **Semantic Kernel** | Memory Store | ✅ Working | Semantic memory, collections, search || **Cross-Framework** | Data Sharing | ✅ Working | Unified storage, portability |### Key Advantages| Feature | Benefit ||---------|---------|| **Unified Backend** | Single database for all frameworks || **Data Portability** | Share data between frameworks seamlessly || **Consistent API** | Similar patterns across integrations || **Single File** | No database server needed || **Offline** | Works without network connectivity |### Framework Comparison| Aspect | Haystack | LlamaIndex | Semantic Kernel ||--------|----------|------------|-----------------|| Primary Use | Search Pipelines | Document Indexing | AI Agents || SynaDB Component | DocumentStore | VectorStore | MemoryStore || Query Style | Filters | Vector Query | Semantic Search || Best For | NLP Apps | RAG Systems | Conversational AI |"""display(Markdown(summary_table))

## 🎯 Conclusions <a id="conclusions"></a>

In [None]:
# Cell 25: Conclusionsconclusion_box(    title="Key Takeaways",    points=[        "SynaDB provides unified storage across Haystack, LlamaIndex, and Semantic Kernel",        "Data portability enables sharing information between frameworks",        "Single-file storage simplifies deployment and management",        "Consistent patterns make it easy to switch between frameworks",        "Zero configuration required - works out of the box",        "Ideal for multi-framework applications and experimentation",        "Enables gradual migration between LLM frameworks",    ],    summary="SynaDB serves as a universal backend for LLM frameworks, enabling data portability and unified storage.")

In [None]:
# Cell 26: Cleanupimport shutilprint("Cleaning up temporary files...")try:    shutil.rmtree(temp_dir)    print(f"✓ Removed temp directory: {temp_dir}")except Exception as e:    print(f"⚠️ Could not remove temp directory: {e}")print("\n✓ Notebook complete!")