In [None]:
# Cell 1: Header and Setup
import sys
sys.path.insert(0, '..')

from utils.notebook_utils import display_header, display_toc, check_dependency, conclusion_box, info_box, warning_box
from utils.system_info import display_system_info
from utils.benchmark import Benchmark, BenchmarkResult, ComparisonTable
from utils.charts import setup_style, bar_comparison, throughput_comparison, COLORS

display_header('Experiment Tracking Comparison', 'SynaDB vs MLflow')

In [None]:
# Cell 2: Table of Contents
sections = [
    ('Introduction', 'introduction'),
    ('Setup', 'setup'),
    ('Benchmark: Parameter Logging', 'benchmark-params'),
    ('Benchmark: Metric Logging', 'benchmark-metrics'),
    ('Benchmark: Artifact Storage', 'benchmark-artifacts'),
    ('Benchmark: Query Performance', 'benchmark-query'),
    ('Demo: Offline Usage', 'demo-offline'),
    ('Results Summary', 'results'),
    ('Conclusions', 'conclusions'),
]
display_toc(sections)

## 📌 Introduction <a id="introduction"></a>

This notebook compares **SynaDB's ExperimentTracker** against **MLflow**, the most popular open-source experiment tracking platform.

| System | Type | Key Features |
|--------|------|-------------|
| **SynaDB** | Embedded | Single-file, zero config, offline-first, AI-native |
| **MLflow** | Server-based | Industry standard, rich UI, model registry |

### What We'll Measure

- **Parameter logging** latency
- **Metric logging** throughput (100 epochs)
- **Artifact storage** performance
- **Query performance** for retrieving runs
- **Setup complexity** comparison

### Test Configuration

- **Experiments**: 10 experiment runs
- **Parameters**: 20 hyperparameters per run
- **Metrics**: 100 epochs × 5 metrics per run
- **Artifacts**: Model checkpoints (1MB each)

In [None]:
# Cell 4: System Info
display_system_info()

## 🔧 Setup <a id="setup"></a>

Let's set up our test environment for experiment tracking comparison.

In [None]:
# Cell 6: Check Dependencies and Imports
import numpy as np
import time
import os
import shutil
import tempfile
from pathlib import Path
import matplotlib.pyplot as plt
import uuid

# Check for SynaDB
HAS_SYNADB = check_dependency('synadb', 'pip install synadb')

# Check for MLflow
HAS_MLFLOW = check_dependency('mlflow', 'pip install mlflow')

# Apply consistent styling
setup_style()

In [None]:
# Cell 7: Configuration
# Test configuration
NUM_RUNS = 10           # Number of experiment runs
NUM_PARAMS = 20         # Parameters per run
NUM_EPOCHS = 100        # Epochs per run
NUM_METRICS = 5         # Metrics per epoch
ARTIFACT_SIZE_MB = 1    # Size of model checkpoint artifacts
SEED = 42               # For reproducibility

print(f'Test Configuration:')
print(f'  Runs: {NUM_RUNS}')
print(f'  Parameters per run: {NUM_PARAMS}')
print(f'  Epochs per run: {NUM_EPOCHS}')
print(f'  Metrics per epoch: {NUM_METRICS}')
print(f'  Total metric logs: {NUM_RUNS * NUM_EPOCHS * NUM_METRICS:,}')
print(f'  Artifact size: {ARTIFACT_SIZE_MB} MB')

# Set seed for reproducibility
np.random.seed(SEED)

In [None]:
# Cell 8: Create Temp Directory for Databases
temp_dir = tempfile.mkdtemp(prefix='synadb_exp_benchmark_')
print(f'Using temp directory: {temp_dir}')

# Paths for each system
synadb_path = os.path.join(temp_dir, 'synadb_experiments.db')
mlflow_path = os.path.join(temp_dir, 'mlruns')

In [None]:
# Cell 9: Generate Test Data
# Generate hyperparameters for each run
hyperparams = [
    {
        f'param_{j}': np.random.choice(['adam', 'sgd', 'rmsprop']) if j == 0
        else np.random.uniform(0.0001, 0.1) if j == 1
        else np.random.randint(16, 256) if j == 2
        else np.random.uniform(0, 1)
        for j in range(NUM_PARAMS)
    }
    for _ in range(NUM_RUNS)
]

# Generate metrics for each run (simulating training)
metrics_data = []
for run_idx in range(NUM_RUNS):
    run_metrics = []
    for epoch in range(NUM_EPOCHS):
        epoch_metrics = {
            'loss': 1.0 / (epoch + 1) + np.random.uniform(-0.05, 0.05),
            'accuracy': min(0.99, 0.5 + epoch * 0.005 + np.random.uniform(-0.02, 0.02)),
            'val_loss': 1.0 / (epoch + 1) + np.random.uniform(-0.1, 0.1),
            'val_accuracy': min(0.98, 0.45 + epoch * 0.005 + np.random.uniform(-0.03, 0.03)),
            'learning_rate': 0.001 * (0.95 ** epoch),
        }
        run_metrics.append(epoch_metrics)
    metrics_data.append(run_metrics)

# Generate artifact data (simulated model checkpoint)
artifact_data = np.random.bytes(ARTIFACT_SIZE_MB * 1024 * 1024)

print(f'✓ Generated {NUM_RUNS} sets of hyperparameters')
print(f'✓ Generated {NUM_RUNS * NUM_EPOCHS} epochs of metrics')
print(f'✓ Generated {ARTIFACT_SIZE_MB}MB artifact data')

## ⚡ Benchmark: Parameter Logging <a id="benchmark-params"></a>

Let's measure how fast each system can log hyperparameters.

In [None]:
# Cell 11: SynaDB Parameter Logging Benchmark
synadb_param_times = []
synadb_tracker = None
synadb_run_ids = []

if HAS_SYNADB:
    from synadb import ExperimentTracker
    
    print('Benchmarking SynaDB parameter logging...')
    
    # Create experiment tracker
    synadb_tracker = ExperimentTracker(synadb_path)
    
    # Benchmark parameter logging for each run
    for run_idx in range(NUM_RUNS):
        # Start a new run
        run_id = synadb_tracker.start_run('benchmark_exp', tags=[f'run_{run_idx}'])
        synadb_run_ids.append(run_id)
        
        # Time parameter logging
        start = time.perf_counter()
        for param_name, param_value in hyperparams[run_idx].items():
            synadb_tracker.log_param(run_id, param_name, str(param_value))
        elapsed = (time.perf_counter() - start) * 1000  # ms
        synadb_param_times.append(elapsed)
        
        if (run_idx + 1) % 5 == 0:
            print(f'  Completed {run_idx + 1} runs...')
    
    print(f'✓ SynaDB: {NUM_RUNS} runs, {NUM_PARAMS} params each')
    print(f'  Mean time per run: {np.mean(synadb_param_times):.2f}ms')
    print(f'  Total params logged: {NUM_RUNS * NUM_PARAMS}')
else:
    print('⚠️ SynaDB not available, skipping...')

In [None]:
# Cell 12: MLflow Parameter Logging Benchmark
mlflow_param_times = []
mlflow_run_ids = []

if HAS_MLFLOW:
    import mlflow
    
    print('Benchmarking MLflow parameter logging...')
    
    # Set tracking URI to local directory
    mlflow.set_tracking_uri(f'file://{mlflow_path}')
    mlflow.set_experiment('benchmark_exp')
    
    # Benchmark parameter logging for each run
    for run_idx in range(NUM_RUNS):
        with mlflow.start_run() as run:
            mlflow_run_ids.append(run.info.run_id)
            
            # Time parameter logging
            start = time.perf_counter()
            for param_name, param_value in hyperparams[run_idx].items():
                mlflow.log_param(param_name, param_value)
            elapsed = (time.perf_counter() - start) * 1000  # ms
            mlflow_param_times.append(elapsed)
        
        if (run_idx + 1) % 5 == 0:
            print(f'  Completed {run_idx + 1} runs...')
    
    print(f'✓ MLflow: {NUM_RUNS} runs, {NUM_PARAMS} params each')
    print(f'  Mean time per run: {np.mean(mlflow_param_times):.2f}ms')
    print(f'  Total params logged: {NUM_RUNS * NUM_PARAMS}')
else:
    print('⚠️ MLflow not available, skipping...')

In [None]:
# Cell 13: Parameter Logging Results Visualization
param_latencies = {}

if synadb_param_times:
    param_latencies['SynaDB'] = np.mean(synadb_param_times)

if mlflow_param_times:
    param_latencies['MLflow'] = np.mean(mlflow_param_times)

if param_latencies:
    fig = bar_comparison(
        param_latencies,
        title=f'Parameter Logging Latency ({NUM_PARAMS} params/run)',
        ylabel='Latency (ms)',
        lower_is_better=True
    )
    plt.show()
    
    # Calculate speedup
    if 'SynaDB' in param_latencies and 'MLflow' in param_latencies:
        speedup = param_latencies['MLflow'] / param_latencies['SynaDB']
        print(f'\n📊 SynaDB is {speedup:.1f}x faster for parameter logging')
else:
    print('No parameter logging results to display.')

## 📈 Benchmark: Metric Logging <a id="benchmark-metrics"></a>

Now let's measure metric logging throughput over 100 epochs.

In [None]:
# Cell 15: SynaDB Metric Logging Benchmark
synadb_metric_times = []

if HAS_SYNADB and synadb_tracker and synadb_run_ids:
    print('Benchmarking SynaDB metric logging...')
    
    for run_idx, run_id in enumerate(synadb_run_ids):
        # Time metric logging for all epochs
        start = time.perf_counter()
        for epoch, epoch_metrics in enumerate(metrics_data[run_idx]):
            for metric_name, metric_value in epoch_metrics.items():
                synadb_tracker.log_metric(run_id, metric_name, metric_value, step=epoch)
        elapsed = (time.perf_counter() - start) * 1000  # ms
        synadb_metric_times.append(elapsed)
        
        # End the run
        synadb_tracker.end_run(run_id, 'Completed')
        
        if (run_idx + 1) % 5 == 0:
            print(f'  Completed {run_idx + 1} runs...')
    
    total_metrics = NUM_RUNS * NUM_EPOCHS * NUM_METRICS
    total_time = sum(synadb_metric_times)
    print(f'✓ SynaDB: {total_metrics:,} metrics in {total_time:.2f}ms')
    print(f'  Throughput: {total_metrics / (total_time / 1000):,.0f} metrics/sec')
else:
    print('⚠️ SynaDB not available, skipping...')

In [None]:
# Cell 16: MLflow Metric Logging Benchmark
mlflow_metric_times = []

if HAS_MLFLOW:
    import mlflow
    
    print('Benchmarking MLflow metric logging...')
    
    # Create new runs for metric logging (MLflow runs were ended)
    mlflow.set_experiment('benchmark_exp_metrics')
    
    for run_idx in range(NUM_RUNS):
        with mlflow.start_run():
            # Time metric logging for all epochs
            start = time.perf_counter()
            for epoch, epoch_metrics in enumerate(metrics_data[run_idx]):
                for metric_name, metric_value in epoch_metrics.items():
                    mlflow.log_metric(metric_name, metric_value, step=epoch)
            elapsed = (time.perf_counter() - start) * 1000  # ms
            mlflow_metric_times.append(elapsed)
        
        if (run_idx + 1) % 5 == 0:
            print(f'  Completed {run_idx + 1} runs...')
    
    total_metrics = NUM_RUNS * NUM_EPOCHS * NUM_METRICS
    total_time = sum(mlflow_metric_times)
    print(f'✓ MLflow: {total_metrics:,} metrics in {total_time:.2f}ms')
    print(f'  Throughput: {total_metrics / (total_time / 1000):,.0f} metrics/sec')
else:
    print('⚠️ MLflow not available, skipping...')

In [None]:
# Cell 17: Metric Logging Results Visualization
metric_throughput = {}
total_metrics = NUM_RUNS * NUM_EPOCHS * NUM_METRICS

if synadb_metric_times:
    metric_throughput['SynaDB'] = total_metrics / (sum(synadb_metric_times) / 1000)

if mlflow_metric_times:
    metric_throughput['MLflow'] = total_metrics / (sum(mlflow_metric_times) / 1000)

if metric_throughput:
    fig = throughput_comparison(
        metric_throughput,
        title=f'Metric Logging Throughput ({total_metrics:,} total metrics)',
        ylabel='Metrics/second'
    )
    plt.show()
    
    # Calculate speedup
    if 'SynaDB' in metric_throughput and 'MLflow' in metric_throughput:
        speedup = metric_throughput['SynaDB'] / metric_throughput['MLflow']
        print(f'\n📊 SynaDB is {speedup:.1f}x faster for metric logging')
else:
    print('No metric logging results to display.')

## 📦 Benchmark: Artifact Storage <a id="benchmark-artifacts"></a>

Let's compare artifact (model checkpoint) storage performance.

In [None]:
# Cell 19: SynaDB Artifact Storage Benchmark
synadb_artifact_times = []

if HAS_SYNADB:
    from synadb import ExperimentTracker
    
    print('Benchmarking SynaDB artifact storage...')
    
    # Create a new tracker for artifact tests
    synadb_artifact_tracker = ExperimentTracker(os.path.join(temp_dir, 'synadb_artifacts.db'))
    
    for run_idx in range(NUM_RUNS):
        run_id = synadb_artifact_tracker.start_run('artifact_exp', tags=[f'run_{run_idx}'])
        
        # Time artifact logging
        start = time.perf_counter()
        synadb_artifact_tracker.log_artifact(run_id, f'model_checkpoint_{run_idx}.bin', artifact_data)
        elapsed = (time.perf_counter() - start) * 1000  # ms
        synadb_artifact_times.append(elapsed)
        
        synadb_artifact_tracker.end_run(run_id, 'Completed')
        
        if (run_idx + 1) % 5 == 0:
            print(f'  Completed {run_idx + 1} artifacts...')
    
    print(f'✓ SynaDB: {NUM_RUNS} artifacts ({ARTIFACT_SIZE_MB}MB each)')
    print(f'  Mean time: {np.mean(synadb_artifact_times):.2f}ms')
    print(f'  Throughput: {ARTIFACT_SIZE_MB * 1000 / np.mean(synadb_artifact_times):.1f} MB/s')
else:
    print('⚠️ SynaDB not available, skipping...')

In [None]:
# Cell 20: MLflow Artifact Storage Benchmark
mlflow_artifact_times = []

if HAS_MLFLOW:
    import mlflow
    
    print('Benchmarking MLflow artifact storage...')
    
    mlflow.set_experiment('artifact_exp')
    
    # Create temp file for artifact
    artifact_file = os.path.join(temp_dir, 'temp_artifact.bin')
    with open(artifact_file, 'wb') as f:
        f.write(artifact_data)
    
    for run_idx in range(NUM_RUNS):
        with mlflow.start_run():
            # Time artifact logging
            start = time.perf_counter()
            mlflow.log_artifact(artifact_file, f'checkpoints')
            elapsed = (time.perf_counter() - start) * 1000  # ms
            mlflow_artifact_times.append(elapsed)
        
        if (run_idx + 1) % 5 == 0:
            print(f'  Completed {run_idx + 1} artifacts...')
    
    print(f'✓ MLflow: {NUM_RUNS} artifacts ({ARTIFACT_SIZE_MB}MB each)')
    print(f'  Mean time: {np.mean(mlflow_artifact_times):.2f}ms')
    print(f'  Throughput: {ARTIFACT_SIZE_MB * 1000 / np.mean(mlflow_artifact_times):.1f} MB/s')
else:
    print('⚠️ MLflow not available, skipping...')

In [None]:
# Cell 21: Artifact Storage Results Visualization
artifact_throughput = {}

if synadb_artifact_times:
    artifact_throughput['SynaDB'] = ARTIFACT_SIZE_MB * 1000 / np.mean(synadb_artifact_times)

if mlflow_artifact_times:
    artifact_throughput['MLflow'] = ARTIFACT_SIZE_MB * 1000 / np.mean(mlflow_artifact_times)

if artifact_throughput:
    fig = throughput_comparison(
        artifact_throughput,
        title=f'Artifact Storage Throughput ({ARTIFACT_SIZE_MB}MB artifacts)',
        ylabel='MB/second'
    )
    plt.show()
    
    # Calculate speedup
    if 'SynaDB' in artifact_throughput and 'MLflow' in artifact_throughput:
        speedup = artifact_throughput['SynaDB'] / artifact_throughput['MLflow']
        print(f'\n📊 SynaDB is {speedup:.1f}x faster for artifact storage')
else:
    print('No artifact storage results to display.')

## 🔍 Benchmark: Query Performance <a id="benchmark-query"></a>

Let's compare how fast each system can retrieve experiment runs and metrics.

In [None]:
# Cell 23: SynaDB Query Performance Benchmark
synadb_query_times = []

if HAS_SYNADB and synadb_tracker:
    print('Benchmarking SynaDB query performance...')
    
    # Warm up
    for _ in range(3):
        synadb_tracker.list_runs('benchmark_exp')
    
    # Benchmark listing runs
    for _ in range(100):
        start = time.perf_counter()
        runs = synadb_tracker.list_runs('benchmark_exp')
        elapsed = (time.perf_counter() - start) * 1000  # ms
        synadb_query_times.append(elapsed)
    
    print(f'✓ SynaDB: 100 list_runs queries')
    print(f'  Mean latency: {np.mean(synadb_query_times):.2f}ms')
    print(f'  P95 latency: {np.percentile(synadb_query_times, 95):.2f}ms')
else:
    print('⚠️ SynaDB not available, skipping...')

In [None]:
# Cell 24: MLflow Query Performance Benchmark
mlflow_query_times = []

if HAS_MLFLOW:
    import mlflow
    from mlflow.tracking import MlflowClient
    
    print('Benchmarking MLflow query performance...')
    
    client = MlflowClient()
    experiment = client.get_experiment_by_name('benchmark_exp')
    
    if experiment:
        # Warm up
        for _ in range(3):
            client.search_runs(experiment.experiment_id)
        
        # Benchmark listing runs
        for _ in range(100):
            start = time.perf_counter()
            runs = client.search_runs(experiment.experiment_id)
            elapsed = (time.perf_counter() - start) * 1000  # ms
            mlflow_query_times.append(elapsed)
        
        print(f'✓ MLflow: 100 search_runs queries')
        print(f'  Mean latency: {np.mean(mlflow_query_times):.2f}ms')
        print(f'  P95 latency: {np.percentile(mlflow_query_times, 95):.2f}ms')
    else:
        print('⚠️ MLflow experiment not found')
else:
    print('⚠️ MLflow not available, skipping...')

In [None]:
# Cell 25: Query Performance Results Visualization
query_latencies = {}

if synadb_query_times:
    query_latencies['SynaDB'] = np.mean(synadb_query_times)

if mlflow_query_times:
    query_latencies['MLflow'] = np.mean(mlflow_query_times)

if query_latencies:
    fig = bar_comparison(
        query_latencies,
        title='Query Latency (list runs)',
        ylabel='Latency (ms)',
        lower_is_better=True
    )
    plt.show()
    
    # Calculate speedup
    if 'SynaDB' in query_latencies and 'MLflow' in query_latencies:
        speedup = query_latencies['MLflow'] / query_latencies['SynaDB']
        print(f'\n📊 SynaDB is {speedup:.1f}x faster for queries')
else:
    print('No query performance results to display.')

## 🔌 Demo: Offline Usage <a id="demo-offline"></a>

One of SynaDB's key advantages is its offline-first design. Let's demonstrate this.

In [None]:
# Cell 27: Offline Usage Demonstration
from IPython.display import display, Markdown, HTML

offline_comparison = '''
### Offline Capability Comparison

| Feature | SynaDB | MLflow |
|---------|--------|--------|
| **Works offline** | ✅ Always | ⚠️ Local mode only |
| **No server required** | ✅ Embedded | ❌ Needs tracking server |
| **Single file storage** | ✅ One .db file | ❌ Directory structure |
| **Zero configuration** | ✅ Just import | ⚠️ Set tracking URI |
| **Air-gapped environments** | ✅ Perfect fit | ⚠️ Limited support |
| **Edge deployment** | ✅ Lightweight | ❌ Heavy dependencies |

### SynaDB Offline Example

```python
# SynaDB works anywhere - no network, no server, no config
from synadb import ExperimentTracker

tracker = ExperimentTracker("experiments.db")  # That's it!
run_id = tracker.start_run("my_experiment")
tracker.log_metric(run_id, "accuracy", 0.95)
tracker.end_run(run_id, "Completed")
```

### MLflow Local Mode

```python
# MLflow requires explicit configuration for local mode
import mlflow

mlflow.set_tracking_uri("file:///path/to/mlruns")  # Must set URI
mlflow.set_experiment("my_experiment")  # Must set experiment
with mlflow.start_run():
    mlflow.log_metric("accuracy", 0.95)
```
'''

display(Markdown(offline_comparison))

In [None]:
# Cell 28: Storage Size Comparison
def get_dir_size(path):
    """Get total size of a directory in bytes."""
    total = 0
    if os.path.isfile(path):
        return os.path.getsize(path)
    if not os.path.exists(path):
        return 0
    for dirpath, dirnames, filenames in os.walk(path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            total += os.path.getsize(fp)
    return total

def count_files(path):
    """Count files in a directory."""
    if os.path.isfile(path):
        return 1
    if not os.path.exists(path):
        return 0
    count = 0
    for dirpath, dirnames, filenames in os.walk(path):
        count += len(filenames)
    return count

print('Storage Comparison\n')
print('=' * 60)

storage_sizes = {}

# SynaDB
if os.path.exists(synadb_path):
    size = get_dir_size(synadb_path)
    files = count_files(synadb_path)
    storage_sizes['SynaDB'] = size / (1024 * 1024)  # MB
    print(f'SynaDB: {size / (1024 * 1024):.2f} MB ({files} file)')

# MLflow
if os.path.exists(mlflow_path):
    size = get_dir_size(mlflow_path)
    files = count_files(mlflow_path)
    storage_sizes['MLflow'] = size / (1024 * 1024)  # MB
    print(f'MLflow: {size / (1024 * 1024):.2f} MB ({files} files)')

print('\n' + '=' * 60)
print('\nNote: SynaDB stores everything in a single file.')
print('MLflow uses a directory structure with many files.')

## 📊 Results Summary <a id="results"></a>

Let's summarize all benchmark results.

In [None]:
# Cell 30: Results Summary Table
from IPython.display import display, Markdown, HTML

# Build summary table
summary_rows = []

# Parameter logging
if synadb_param_times and mlflow_param_times:
    synadb_param = np.mean(synadb_param_times)
    mlflow_param = np.mean(mlflow_param_times)
    speedup = mlflow_param / synadb_param
    summary_rows.append(f'| Parameter Logging | {synadb_param:.2f}ms | {mlflow_param:.2f}ms | **{speedup:.1f}x** |')

# Metric logging
if synadb_metric_times and mlflow_metric_times:
    synadb_metric = sum(synadb_metric_times)
    mlflow_metric = sum(mlflow_metric_times)
    speedup = mlflow_metric / synadb_metric
    summary_rows.append(f'| Metric Logging | {synadb_metric:.0f}ms | {mlflow_metric:.0f}ms | **{speedup:.1f}x** |')

# Artifact storage
if synadb_artifact_times and mlflow_artifact_times:
    synadb_artifact = np.mean(synadb_artifact_times)
    mlflow_artifact = np.mean(mlflow_artifact_times)
    speedup = mlflow_artifact / synadb_artifact
    summary_rows.append(f'| Artifact Storage | {synadb_artifact:.2f}ms | {mlflow_artifact:.2f}ms | **{speedup:.1f}x** |')

# Query performance
if synadb_query_times and mlflow_query_times:
    synadb_query = np.mean(synadb_query_times)
    mlflow_query = np.mean(mlflow_query_times)
    speedup = mlflow_query / synadb_query
    summary_rows.append(f'| Query Performance | {synadb_query:.2f}ms | {mlflow_query:.2f}ms | **{speedup:.1f}x** |')

if summary_rows:
    summary_md = '''### Performance Summary

| Benchmark | SynaDB | MLflow | Speedup |
|-----------|--------|--------|--------|
''' + '\n'.join(summary_rows)
    
    display(Markdown(summary_md))
else:
    print('No results to summarize.')

In [None]:
# Cell 31: Feature Comparison Table
feature_comparison = '''
### Feature Comparison

| Feature | SynaDB | MLflow |
|---------|--------|--------|
| **Type** | Embedded | Server-based |
| **Storage** | Single file | Directory structure |
| **Setup** | Zero config | Requires tracking URI |
| **Offline** | ✅ Always works | ⚠️ Local mode only |
| **UI** | Jupyter integration | Web dashboard |
| **Model Registry** | ✅ Built-in | ✅ Built-in |
| **Artifact Storage** | ✅ In database | ✅ File system |
| **Query API** | ✅ Simple | ✅ Rich filtering |
| **Collaboration** | Export/Import | Server sharing |
| **Dependencies** | Minimal | Many |
'''

display(Markdown(feature_comparison))

## 🎯 Conclusions <a id="conclusions"></a>

In [None]:
# Cell 33: Conclusions
conclusions = [
    'SynaDB provides significantly faster experiment tracking operations',
    'Zero configuration makes SynaDB ideal for quick prototyping',
    'Single-file storage simplifies backup and sharing',
    'Offline-first design perfect for edge and air-gapped environments',
    'MLflow offers richer UI and collaboration features for teams',
]

summary = '''SynaDB ExperimentTracker excels in performance and simplicity, 
making it ideal for individual practitioners and offline scenarios. 
MLflow remains valuable for teams needing rich collaboration features and web UI.'''

conclusion_box(
    title='Key Takeaways',
    points=conclusions,
    summary=summary
)

In [None]:
# Cell 34: Cleanup
# Clean up temporary files
import shutil

try:
    shutil.rmtree(temp_dir)
    print(f'✓ Cleaned up temp directory: {temp_dir}')
except Exception as e:
    print(f'⚠️ Could not clean up temp directory: {e}')